[bugfix] Use custom bluemonday policy to disallow inline img tags (#2100)
This commit is contained in:
parent
3aedd937c3
commit
dc96562b40
|
@ -20,6 +20,7 @@ package ap
|
|||
import (
|
||||
"github.com/superseriousbusiness/activity/pub"
|
||||
"github.com/superseriousbusiness/activity/streams"
|
||||
"github.com/superseriousbusiness/gotosocial/internal/text"
|
||||
)
|
||||
|
||||
/*
|
||||
|
@ -126,7 +127,8 @@ func NormalizeIncomingActivityObject(activity pub.Activity, rawJSON map[string]i
|
|||
}
|
||||
|
||||
// NormalizeIncomingContent replaces the Content of the given item
|
||||
// with the raw 'content' value from the raw json object map.
|
||||
// with the sanitized version of the raw 'content' value from the
|
||||
// raw json object map.
|
||||
//
|
||||
// noop if there was no content in the json object map or the
|
||||
// content was not a plain string.
|
||||
|
@ -145,6 +147,14 @@ func NormalizeIncomingContent(item WithSetContent, rawJSON map[string]interface{
|
|||
return
|
||||
}
|
||||
|
||||
// Content should be HTML encoded by default:
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-content
|
||||
//
|
||||
// TODO: sanitize differently based on mediaType.
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-mediatype
|
||||
content = text.SanitizeToHTML(content)
|
||||
content = text.MinifyHTML(content)
|
||||
|
||||
// Set normalized content property from the raw string;
|
||||
// this replaces any existing content property on the item.
|
||||
contentProp := streams.NewActivityStreamsContentProperty()
|
||||
|
@ -154,7 +164,8 @@ func NormalizeIncomingContent(item WithSetContent, rawJSON map[string]interface{
|
|||
|
||||
// NormalizeIncomingAttachments normalizes all attachments (if any) of the given
|
||||
// item, replacing the 'name' (aka content warning) field of each attachment
|
||||
// with the raw 'name' value from the raw json object map.
|
||||
// with the raw 'name' value from the raw json object map, and doing sanitization
|
||||
// on the result.
|
||||
//
|
||||
// noop if there are no attachments; noop if attachment is not a format
|
||||
// we can understand.
|
||||
|
@ -212,7 +223,8 @@ func NormalizeIncomingAttachments(item WithAttachment, rawJSON map[string]interf
|
|||
}
|
||||
|
||||
// NormalizeIncomingSummary replaces the Summary of the given item
|
||||
// with the raw 'summary' value from the raw json object map.
|
||||
// with the sanitized version of the raw 'summary' value from the
|
||||
// raw json object map.
|
||||
//
|
||||
// noop if there was no summary in the json object map or the
|
||||
// summary was not a plain string.
|
||||
|
@ -229,6 +241,11 @@ func NormalizeIncomingSummary(item WithSetSummary, rawJSON map[string]interface{
|
|||
return
|
||||
}
|
||||
|
||||
// Summary should be HTML encoded:
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-summary
|
||||
summary = text.SanitizeToHTML(summary)
|
||||
summary = text.MinifyHTML(summary)
|
||||
|
||||
// Set normalized summary property from the raw string; this
|
||||
// will replace any existing summary property on the item.
|
||||
summaryProp := streams.NewActivityStreamsSummaryProperty()
|
||||
|
@ -254,6 +271,13 @@ func NormalizeIncomingName(item WithSetName, rawJSON map[string]interface{}) {
|
|||
return
|
||||
}
|
||||
|
||||
// Name *must not* include any HTML markup:
|
||||
// https://www.w3.org/TR/activitystreams-vocabulary/#dfn-name
|
||||
//
|
||||
// todo: We probably want to update this to allow
|
||||
// *escaped* HTML markup, but for now just nuke it.
|
||||
name = text.SanitizeToPlaintext(name)
|
||||
|
||||
// Set normalized name property from the raw string; this
|
||||
// will replace any existing name property on the item.
|
||||
nameProp := streams.NewActivityStreamsNameProperty()
|
||||
|
|
|
@ -146,7 +146,7 @@ func (suite *NormalizeTestSuite) getStatusableWithMultipleAttachments() (vocab.A
|
|||
"type": "Document",
|
||||
"mediaType": "image/jpeg",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg",
|
||||
"name": "danger: #cute but will claw you :("
|
||||
"name": "image of a cat & there's a note saying: <danger: #cute but will claw you :(>"
|
||||
}
|
||||
]
|
||||
}`)
|
||||
|
@ -192,7 +192,7 @@ func (suite *NormalizeTestSuite) TestNormalizeActivityObject() {
|
|||
)
|
||||
|
||||
ap.NormalizeIncomingActivityObject(create, map[string]interface{}{"object": rawNote})
|
||||
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" data-tag="twittermigration" href="https://example.org/tag/twittermigration" rel="tag ugc">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
|
||||
suite.Equal(`UPDATE: As of this morning there are now more than 7 million Mastodon users, most from the <a class="hashtag" href="https://example.org/tag/twittermigration" rel="tag ugc nofollow noreferrer noopener" target="_blank">#TwitterMigration</a>.<br><br>In fact, 100,000 new accounts have been created since last night.<br><br>Since last night's spike 8,000-12,000 new accounts are being created every hour.<br><br>Yesterday, I estimated that Mastodon would have 8 million users by the end of the week. That might happen a lot sooner if this trend continues.`, ap.ExtractContent(note))
|
||||
}
|
||||
|
||||
func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment() {
|
||||
|
@ -224,7 +224,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
|
@ -265,7 +265,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsOneAttachment
|
|||
"@context": "https://www.w3.org/ns/activitystreams",
|
||||
"attachment": {
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
|
@ -304,7 +304,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
|
|||
},
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "danger: #cute%20but%20will%20claw%20you%20:(",
|
||||
"name": "image of a cat \u0026amp; there's a note saying: \u0026lt;danger: #cute but will claw you :(\u0026gt;",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
}
|
||||
|
@ -326,7 +326,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
|
|||
"attachment": [
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "DESCRIPTION: here's \u003c\u003ca\u003e\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"name": "DESCRIPTION: here's \u003c\u003e picture of a #cat, it's cute! here's some special characters: \"\" \\ weeee''''",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
},
|
||||
|
@ -343,7 +343,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableAttachmentsMultipleAttac
|
|||
},
|
||||
{
|
||||
"mediaType": "image/jpeg",
|
||||
"name": "danger: #cute but will claw you :(",
|
||||
"name": "image of a cat \u0026 there's a note saying:",
|
||||
"type": "Document",
|
||||
"url": "https://files.example.org/media_attachments/files/110/258/459/579/509/026/original/b65392ebe0fb04ef.jpeg"
|
||||
}
|
||||
|
@ -380,7 +380,7 @@ func (suite *NormalizeTestSuite) TestNormalizeStatusableSummary() {
|
|||
suite.Equal(`warning: #WEIRD%20%23SUMMARY%20;;;;a;;a;asv%20%20%20%20khop8273987(*%5E&%5E)`, ap.ExtractSummary(statusable))
|
||||
|
||||
ap.NormalizeIncomingSummary(statusable, rawAccount)
|
||||
suite.Equal(`warning: #WEIRD #SUMMARY ;;;;a;;a;asv khop8273987(*^&^)`, ap.ExtractSummary(statusable))
|
||||
suite.Equal(`warning: #WEIRD #SUMMARY ;;;;a;;a;asv khop8273987(*^&^)`, ap.ExtractSummary(statusable))
|
||||
}
|
||||
|
||||
func (suite *NormalizeTestSuite) TestNormalizeStatusableName() {
|
||||
|
|
|
@ -43,7 +43,7 @@ type StatusCreateTestSuite struct {
|
|||
const (
|
||||
statusWithLinksAndTags = "#test alright, should be able to post #links with fragments in them now, let's see........\n\nhttps://docs.gotosocial.org/en/latest/user_guide/posts/#links\n\n#gotosocial\n\n(tobi remember to pull the docker image challenge)"
|
||||
statusMarkdown = "# Title\n\n## Smaller title\n\nThis is a post written in [markdown](https://www.markdownguide.org/)\n\n<img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\"/>"
|
||||
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p><img src=\"https://d33wubrfki0l68.cloudfront.net/f1f475a6fda1c2c4be4cac04033db5c3293032b4/513a4/assets/images/markdown-mark-white.svg\" crossorigin=\"anonymous\">"
|
||||
statusMarkdownExpected = "<h1>Title</h1><h2>Smaller title</h2><p>This is a post written in <a href=\"https://www.markdownguide.org/\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">markdown</a></p>"
|
||||
)
|
||||
|
||||
// Post a new status with some custom visibility settings
|
||||
|
|
|
@ -71,7 +71,7 @@ func (p *Processor) Create(
|
|||
Username: form.Username,
|
||||
Email: form.Email,
|
||||
Password: form.Password,
|
||||
Reason: text.SanitizePlaintext(reason),
|
||||
Reason: text.SanitizeToPlaintext(reason),
|
||||
PreApproved: !config.GetAccountsApprovalRequired(), // Mark as approved if no approval required.
|
||||
SignUpIP: form.IP,
|
||||
Locale: form.Locale,
|
||||
|
|
|
@ -67,7 +67,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
}
|
||||
|
||||
// Parse new display name (always from plaintext).
|
||||
account.DisplayName = text.SanitizePlaintext(displayName)
|
||||
account.DisplayName = text.SanitizeToPlaintext(displayName)
|
||||
|
||||
// If display name has changed, account emojis may have also changed.
|
||||
emojisChanged = true
|
||||
|
@ -110,8 +110,8 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
|
||||
// Sanitize raw field values.
|
||||
fieldRaw := >smodel.Field{
|
||||
Name: text.SanitizePlaintext(name),
|
||||
Value: text.SanitizePlaintext(value),
|
||||
Name: text.SanitizeToPlaintext(name),
|
||||
Value: text.SanitizeToPlaintext(value),
|
||||
}
|
||||
fieldsRaw = append(fieldsRaw, fieldRaw)
|
||||
}
|
||||
|
@ -255,7 +255,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, form
|
|||
if err := validate.CustomCSS(customCSS); err != nil {
|
||||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
account.CustomCSS = text.SanitizePlaintext(customCSS)
|
||||
account.CustomCSS = text.SanitizeToPlaintext(customCSS)
|
||||
}
|
||||
|
||||
if form.EnableRSS != nil {
|
||||
|
|
|
@ -67,8 +67,8 @@ func (p *Processor) DomainBlockCreate(
|
|||
ID: id.NewULID(),
|
||||
Domain: domain,
|
||||
CreatedByAccountID: account.ID,
|
||||
PrivateComment: text.SanitizePlaintext(privateComment),
|
||||
PublicComment: text.SanitizePlaintext(publicComment),
|
||||
PrivateComment: text.SanitizeToPlaintext(privateComment),
|
||||
PublicComment: text.SanitizeToPlaintext(publicComment),
|
||||
Obfuscate: &obfuscate,
|
||||
SubscriptionID: subscriptionID,
|
||||
}
|
||||
|
|
|
@ -159,7 +159,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
return nil, gtserror.NewErrorBadRequest(err, fmt.Sprintf("site title invalid: %s", err))
|
||||
}
|
||||
updatingColumns = append(updatingColumns, "title")
|
||||
instance.Title = text.SanitizePlaintext(*form.Title) // don't allow html in site title
|
||||
instance.Title = text.SanitizeToPlaintext(*form.Title) // don't allow html in site title
|
||||
}
|
||||
|
||||
// validate & update site contact account if it's set on the form
|
||||
|
@ -215,7 +215,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
updatingColumns = append(updatingColumns, "short_description")
|
||||
instance.ShortDescription = text.SanitizeHTML(*form.ShortDescription) // html is OK in site description, but we should sanitize it
|
||||
instance.ShortDescription = text.SanitizeToHTML(*form.ShortDescription) // html is OK in site description, but we should sanitize it
|
||||
}
|
||||
|
||||
// validate & update site description if it's set on the form
|
||||
|
@ -224,7 +224,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
updatingColumns = append(updatingColumns, "description")
|
||||
instance.Description = text.SanitizeHTML(*form.Description) // html is OK in site description, but we should sanitize it
|
||||
instance.Description = text.SanitizeToHTML(*form.Description) // html is OK in site description, but we should sanitize it
|
||||
}
|
||||
|
||||
// validate & update site terms if it's set on the form
|
||||
|
@ -233,7 +233,7 @@ func (p *Processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
|
|||
return nil, gtserror.NewErrorBadRequest(err, err.Error())
|
||||
}
|
||||
updatingColumns = append(updatingColumns, "terms")
|
||||
instance.Terms = text.SanitizeHTML(*form.Terms) // html is OK in site terms, but we should sanitize it
|
||||
instance.Terms = text.SanitizeToHTML(*form.Terms) // html is OK in site terms, but we should sanitize it
|
||||
}
|
||||
|
||||
var updateInstanceAccount bool
|
||||
|
|
|
@ -47,7 +47,7 @@ func (p *Processor) Update(ctx context.Context, account *gtsmodel.Account, media
|
|||
var updatingColumns []string
|
||||
|
||||
if form.Description != nil {
|
||||
attachment.Description = text.SanitizePlaintext(*form.Description)
|
||||
attachment.Description = text.SanitizeToPlaintext(*form.Description)
|
||||
updatingColumns = append(updatingColumns, "description")
|
||||
}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ func (p *Processor) Create(ctx context.Context, account *gtsmodel.Account, appli
|
|||
Local: &local,
|
||||
AccountID: account.ID,
|
||||
AccountURI: account.URI,
|
||||
ContentWarning: text.SanitizePlaintext(form.SpoilerText),
|
||||
ContentWarning: text.SanitizeToPlaintext(form.SpoilerText),
|
||||
ActivityStreamsType: ap.ObjectNote,
|
||||
Sensitive: &sensitive,
|
||||
CreatedWithApplicationID: application.ID,
|
||||
|
|
|
@ -61,13 +61,10 @@ func (f *formatter) FromPlainEmojiOnly(ctx context.Context, pmf gtsmodel.ParseMe
|
|||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// clean anything dangerous out of the HTML
|
||||
result.HTML = SanitizeHTML(result.HTML)
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// shrink ray
|
||||
result.HTML, err = m.String("text/html", result.HTML)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "error minifying HTML: %s", err)
|
||||
}
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -57,13 +57,10 @@ func (f *formatter) FromMarkdown(ctx context.Context, pmf gtsmodel.ParseMentionF
|
|||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// clean anything dangerous out of the HTML
|
||||
result.HTML = SanitizeHTML(result.HTML)
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// shrink ray
|
||||
result.HTML, err = m.String("text/html", result.HTML)
|
||||
if err != nil {
|
||||
log.Errorf(ctx, "error minifying HTML: %s", err)
|
||||
}
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -51,7 +51,7 @@ const (
|
|||
withHashtag = "# Title\n\nhere's a simple status that uses hashtag #Hashtag!"
|
||||
withHashtagExpected = "<h1>Title</h1><p>here's a simple status that uses hashtag <a href=\"http://localhost:8080/tags/hashtag\" class=\"mention hashtag\" rel=\"tag nofollow noreferrer noopener\" target=\"_blank\">#<span>Hashtag</span></a>!</p>"
|
||||
mdWithHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a <a href=\"https://example.org\">link</a>.\n\nHere's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\">"
|
||||
mdWithHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p><p>Here's an image: <img src=\"https://gts.superseriousbusiness.org/assets/logo.png\" alt=\"The GoToSocial sloth logo.\" width=\"500\" height=\"600\" crossorigin=\"anonymous\"></p>"
|
||||
mdWithHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a <a href=\"https://example.org\" rel=\"nofollow noreferrer noopener\" target=\"_blank\">link</a>.</p><p>Here's an image:</p>"
|
||||
mdWithCheekyHTML = "# Title\n\nHere's a simple text in markdown.\n\nHere's a cheeky little script: <script>alert(ahhhh)</script>"
|
||||
mdWithCheekyHTMLExpected = "<h1>Title</h1><p>Here's a simple text in markdown.</p><p>Here's a cheeky little script:</p>"
|
||||
mdWithHashtagInitial = "#welcome #Hashtag"
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
package text
|
||||
|
||||
import (
|
||||
"github.com/superseriousbusiness/gotosocial/internal/log"
|
||||
"github.com/tdewolff/minify/v2"
|
||||
"github.com/tdewolff/minify/v2/html"
|
||||
)
|
||||
|
@ -31,3 +32,23 @@ var m = func() *minify.M {
|
|||
})
|
||||
return m
|
||||
}()
|
||||
|
||||
// MinifyHTML minifies the given string
|
||||
// under the assumption that it's HTML.
|
||||
//
|
||||
// If input is not HTML encoded, this
|
||||
// function will try to do minimization
|
||||
// anyway, but this may produce unexpected
|
||||
// results.
|
||||
//
|
||||
// If an error occurs during minimization,
|
||||
// it will be logged and the original string
|
||||
// returned unmodified.
|
||||
func MinifyHTML(in string) string {
|
||||
out, err := m.String("text/html", in)
|
||||
if err != nil {
|
||||
log.Error(nil, err)
|
||||
}
|
||||
|
||||
return out
|
||||
}
|
||||
|
|
|
@ -65,13 +65,10 @@ func (f *formatter) fromPlain(
|
|||
result.HTML = htmlContentBytes.String()
|
||||
|
||||
// Clean anything dangerous out of resulting HTML.
|
||||
result.HTML = SanitizeHTML(result.HTML)
|
||||
result.HTML = SanitizeToHTML(result.HTML)
|
||||
|
||||
// Shrink ray!
|
||||
var err error
|
||||
if result.HTML, err = m.String("text/html", result.HTML); err != nil {
|
||||
log.Errorf(ctx, "error minifying HTML: %s", err)
|
||||
}
|
||||
result.HTML = MinifyHTML(result.HTML)
|
||||
|
||||
return result
|
||||
}
|
||||
|
|
|
@ -25,44 +25,167 @@ import (
|
|||
"github.com/microcosm-cc/bluemonday"
|
||||
)
|
||||
|
||||
// '[A]llows a broad selection of HTML elements and attributes that are safe for user generated content.
|
||||
// Note that this policy does not allow iframes, object, embed, styles, script, etc.
|
||||
// An example usage scenario would be blog post bodies where a variety of formatting is expected along with the potential for TABLEs and IMGs.'
|
||||
//
|
||||
// Source: https://github.com/microcosm-cc/bluemonday#usage
|
||||
var regular *bluemonday.Policy = bluemonday.UGCPolicy().
|
||||
RequireNoReferrerOnLinks(true).
|
||||
RequireNoFollowOnLinks(false). // remove the global default which adds rel="nofollow" to all links including local relative
|
||||
RequireNoFollowOnFullyQualifiedLinks(true). // add rel="nofollow" on all external links
|
||||
RequireCrossOriginAnonymous(true).
|
||||
AddTargetBlankToFullyQualifiedLinks(true).
|
||||
AllowAttrs("class", "href", "rel").OnElements("a").
|
||||
AllowAttrs("class").OnElements("span").
|
||||
AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code").
|
||||
SkipElementsContent("code", "pre")
|
||||
// Regular HTML policy is an adapted version of the default
|
||||
// bluemonday UGC policy, with some tweaks of our own.
|
||||
// See: https://github.com/microcosm-cc/bluemonday#usage
|
||||
var regular *bluemonday.Policy = func() *bluemonday.Policy {
|
||||
p := bluemonday.NewPolicy()
|
||||
|
||||
// '[C]an be thought of as equivalent to stripping all HTML elements and their attributes as it has nothing on its allowlist.
|
||||
// An example usage scenario would be blog post titles where HTML tags are not expected at all
|
||||
// and if they are then the elements and the content of the elements should be stripped. This is a very strict policy.'
|
||||
// AllowStandardAttributes will enable "id", "title" and
|
||||
// the language specific attributes "dir" and "lang" on
|
||||
// all elements that are allowed
|
||||
p.AllowStandardAttributes()
|
||||
|
||||
/*
|
||||
LAYOUT AND FORMATTING
|
||||
*/
|
||||
|
||||
// "aside" is permitted and takes no attributes.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/aside
|
||||
p.AllowElements("article", "aside")
|
||||
|
||||
// "details" is permitted, including the "open" attribute
|
||||
// which can either be blank or the value "open".
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/details
|
||||
p.AllowAttrs("open").Matching(regexp.MustCompile(`(?i)^(|open)$`)).OnElements("details")
|
||||
|
||||
// "section" is permitted and takes no attributes.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/section
|
||||
p.AllowElements("section")
|
||||
|
||||
// "summary" is permitted and takes no attributes.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/summary
|
||||
p.AllowElements("summary")
|
||||
|
||||
// "h1" through "h6" are permitted and take no attributes.
|
||||
p.AllowElements("h1", "h2", "h3", "h4", "h5", "h6")
|
||||
|
||||
// "hgroup" is permitted and takes no attributes.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/hgroup
|
||||
p.AllowElements("hgroup")
|
||||
|
||||
// "blockquote" is permitted, including the "cite"
|
||||
// attribute which must be a standard URL.
|
||||
p.AllowAttrs("cite").OnElements("blockquote")
|
||||
|
||||
// "br" "div" "hr" "p" "span" "wbr" are permitted and take no attributes
|
||||
p.AllowElements("br", "div", "hr", "p", "span", "wbr")
|
||||
|
||||
// The following are all inline phrasing elements:
|
||||
p.AllowElements("abbr", "acronym", "cite", "code", "dfn", "em",
|
||||
"figcaption", "mark", "s", "samp", "strong", "sub", "sup", "var")
|
||||
|
||||
// "q" is permitted and "cite" is a URL and handled by URL policies
|
||||
p.AllowAttrs("cite").OnElements("q")
|
||||
|
||||
// "time" is permitted
|
||||
p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("time")
|
||||
|
||||
// Block and inline elements that impart no
|
||||
// semantic meaning but style the document.
|
||||
// Underlines, italics, bold, strikethrough etc.
|
||||
p.AllowElements("b", "i", "pre", "small", "strike", "tt", "u")
|
||||
|
||||
// "del" "ins" are permitted
|
||||
p.AllowAttrs("cite").Matching(bluemonday.Paragraph).OnElements("del", "ins")
|
||||
p.AllowAttrs("datetime").Matching(bluemonday.ISO8601).OnElements("del", "ins")
|
||||
|
||||
// Enable ordered, unordered, and definition lists.
|
||||
p.AllowLists()
|
||||
|
||||
// Class needed on span for mentions, which look like this when assembled:
|
||||
// `<span class="h-card"><a href="https://example.org/users/targetAccount" class="u-url mention">@<span>someusername</span></a></span>`
|
||||
p.AllowAttrs("class").OnElements("span")
|
||||
|
||||
/*
|
||||
LANGUAGE FORMATTING
|
||||
*/
|
||||
|
||||
// "bdi" "bdo" are permitted on "dir".
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes/dir
|
||||
p.AllowAttrs("dir").Matching(bluemonday.Direction).OnElements("bdi", "bdo")
|
||||
|
||||
// "rp" "rt" "ruby" are permitted. See:
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/rp
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/rt
|
||||
// https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ruby
|
||||
p.AllowElements("rp", "rt", "ruby")
|
||||
|
||||
/*
|
||||
CODE BLOCKS
|
||||
*/
|
||||
|
||||
// Permit language tags for code elements.
|
||||
p.AllowAttrs("class").Matching(regexp.MustCompile("^language-[a-zA-Z0-9]+$")).OnElements("code")
|
||||
|
||||
// Don't sanitize HTML inside code blocks.
|
||||
p.SkipElementsContent("code", "pre")
|
||||
|
||||
/*
|
||||
LINKS AND LINK SAFETY.
|
||||
*/
|
||||
|
||||
// Permit hyperlinks.
|
||||
p.AllowAttrs("class", "href", "rel").OnElements("a")
|
||||
|
||||
// URLs must be parseable by net/url.Parse().
|
||||
p.RequireParseableURLs(true)
|
||||
|
||||
// Most common URL schemes only.
|
||||
p.AllowURLSchemes("mailto", "http", "https")
|
||||
|
||||
// Force rel="noreferrer".
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel/noreferrer
|
||||
p.RequireNoReferrerOnLinks(true)
|
||||
|
||||
// Add rel="nofollow" on all fully qualified (not relative) links.
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/rel#nofollow
|
||||
p.RequireNoFollowOnFullyQualifiedLinks(true)
|
||||
|
||||
// Force crossorigin="anonymous"
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Attributes/crossorigin#anonymous
|
||||
p.RequireCrossOriginAnonymous(true)
|
||||
|
||||
// Force target="_blank".
|
||||
// See: https://developer.mozilla.org/en-US/docs/Web/HTML/Element/a#target
|
||||
p.AddTargetBlankToFullyQualifiedLinks(true)
|
||||
|
||||
return p
|
||||
}()
|
||||
|
||||
// '[C]an be thought of as equivalent to stripping all HTML
|
||||
// elements and their attributes as it has nothing on its allowlist.
|
||||
// An example usage scenario would be blog post titles where HTML
|
||||
// tags are not expected at all and if they are then the elements
|
||||
// and the content of the elements should be stripped. This is a
|
||||
// very strict policy.'
|
||||
//
|
||||
// Source: https://github.com/microcosm-cc/bluemonday#usage
|
||||
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
|
||||
|
||||
// removeHTML strictly removes *all* recognized HTML elements from the given string.
|
||||
// removeHTML strictly removes *all* recognized
|
||||
// HTML elements from the given string.
|
||||
func removeHTML(in string) string {
|
||||
return strict.Sanitize(in)
|
||||
}
|
||||
|
||||
// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
|
||||
func SanitizeHTML(in string) string {
|
||||
// SanitizeToHTML sanitizes only risky html elements
|
||||
// from the given string, allowing safe ones through.
|
||||
func SanitizeToHTML(in string) string {
|
||||
return regular.Sanitize(in)
|
||||
}
|
||||
|
||||
// SanitizePlaintext runs text through basic sanitization. This removes
|
||||
// any html elements that were in the string, and returns clean plaintext.
|
||||
func SanitizePlaintext(in string) string {
|
||||
// SanitizeToPlaintext runs text through basic sanitization.
|
||||
// This removes any html elements that were in the string,
|
||||
// and returns clean plaintext.
|
||||
func SanitizeToPlaintext(in string) string {
|
||||
// Unescape first to catch any tricky critters.
|
||||
content := html.UnescapeString(in)
|
||||
|
||||
// Remove all detected HTML.
|
||||
content = removeHTML(content)
|
||||
|
||||
// Unescape again to return plaintext.
|
||||
content = html.UnescapeString(content)
|
||||
return strings.TrimSpace(content)
|
||||
}
|
||||
|
|
|
@ -36,30 +36,30 @@ type SanitizeTestSuite struct {
|
|||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeOutgoing() {
|
||||
s := text.SanitizeHTML(sanitizeOutgoing)
|
||||
s := text.SanitizeToHTML(sanitizeOutgoing)
|
||||
suite.Equal(sanitizedOutgoing, s)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeHTML() {
|
||||
s := text.SanitizeHTML(sanitizeHTML)
|
||||
s := text.SanitizeToHTML(sanitizeHTML)
|
||||
suite.Equal(sanitizedHTML, s)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption1() {
|
||||
dodgyCaption := "<script>console.log('haha!')</script>this is just a normal caption ;)"
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("this is just a normal caption ;)", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption2() {
|
||||
dodgyCaption := "<em>here's a LOUD caption</em>"
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("here's a LOUD caption", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption3() {
|
||||
dodgyCaption := ""
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("", sanitized)
|
||||
}
|
||||
|
||||
|
@ -75,21 +75,21 @@ with some newlines
|
|||
|
||||
|
||||
`
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption5() {
|
||||
// html-escaped: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("hello world", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeCaption6() {
|
||||
// html-encoded: "<script>console.log('aha!')</script> hello world"
|
||||
dodgyCaption := `<script>console.log('aha!')</script> hello world`
|
||||
sanitized := text.SanitizePlaintext(dodgyCaption)
|
||||
sanitized := text.SanitizeToPlaintext(dodgyCaption)
|
||||
suite.Equal("hello world", sanitized)
|
||||
}
|
||||
|
||||
|
@ -104,24 +104,30 @@ func (suite *SanitizeTestSuite) TestSanitizeCustomCSS() {
|
|||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
}`
|
||||
sanitized := text.SanitizePlaintext(customCSS)
|
||||
sanitized := text.SanitizeToPlaintext(customCSS)
|
||||
suite.Equal(customCSS, sanitized) // should be the same as it was before
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeNaughtyCustomCSS1() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "</style><title>pee pee poo poo</title><style>"
|
||||
sanitized := text.SanitizePlaintext(customCSS)
|
||||
sanitized := text.SanitizeToPlaintext(customCSS)
|
||||
suite.Empty(sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeNaughtyCustomCSS2() {
|
||||
// try to break out of <style> into <head> and change the document title
|
||||
customCSS := "pee pee poo poo</style><title></title><style>"
|
||||
sanitized := text.SanitizePlaintext(customCSS)
|
||||
sanitized := text.SanitizeToPlaintext(customCSS)
|
||||
suite.Equal("pee pee poo poo", sanitized)
|
||||
}
|
||||
|
||||
func (suite *SanitizeTestSuite) TestSanitizeInlineImg() {
|
||||
withInlineImg := "<p>Here's an inline image: <img class=\"fixed-size-img svelte-uci8eb\" aria-hidden=\"false\" alt=\"A black-and-white photo of an Oblique Strategy card. The card reads: 'Define an area as 'safe' and use it as an anchor'.\" title=\"A black-and-white photo of an Oblique Strategy card. The card reads: 'Define an area as 'safe' and use it as an anchor'.\" width=\"0\" height=\"0\" src=\"https://example.org/fileserver/01H7J83147QMCE17C0RS9P10Y9/attachment/small/01H7J8365XXRTCP6CAMGEM49ZE.jpg\" style=\"object-position: 50% 50%;\"></p>"
|
||||
sanitized := text.SanitizeToHTML(withInlineImg)
|
||||
suite.Equal(`<p>Here's an inline image: </p>`, sanitized)
|
||||
}
|
||||
|
||||
func TestSanitizeTestSuite(t *testing.T) {
|
||||
suite.Run(t, new(SanitizeTestSuite))
|
||||
}
|
||||
|
|
|
@ -67,7 +67,7 @@ func ogBase(instance *apimodel.InstanceV1) *ogMeta {
|
|||
}
|
||||
|
||||
og := &ogMeta{
|
||||
Title: text.SanitizePlaintext(instance.Title) + " - GoToSocial",
|
||||
Title: text.SanitizeToPlaintext(instance.Title) + " - GoToSocial",
|
||||
Type: "website",
|
||||
Locale: locale,
|
||||
URL: instance.URI,
|
||||
|
@ -156,7 +156,7 @@ func parseTitle(account *apimodel.Account, accountDomain string) string {
|
|||
// parseDescription returns a string description which is
|
||||
// safe to use as a template.HTMLAttr inside templates.
|
||||
func parseDescription(in string) string {
|
||||
i := text.SanitizePlaintext(in)
|
||||
i := text.SanitizeToPlaintext(in)
|
||||
i = strings.ReplaceAll(i, "\n", " ")
|
||||
i = strings.Join(strings.Fields(i), " ")
|
||||
i = html.EscapeString(i)
|
||||
|
|
Loading…
Reference in New Issue