From 5668ce1ec701ed12eb099020e8a322de08e6f810 Mon Sep 17 00:00:00 2001
From: tobi <31960611+tsmethurst@users.noreply.github.com>
Date: Thu, 26 May 2022 11:37:13 +0200
Subject: [PATCH] [bugfix] Fix HTML escaping in instance title (#607)
* move caption sanitization -> sanitize.go
* use sanitizeplaintext rather than removehtml
* rename sanitizecaption to sanitizeplaintext
* avoid removing html twice from statuses
* unexport remoteHTML
it's no longer used outside the text package so this
makes it less confusing
* test instance PATCH
---
internal/api/client/instance/instance_test.go | 126 +++++++++++++++++
.../api/client/instance/instancepatch_test.go | 130 ++++++++++++++++++
internal/processing/account/create.go | 2 +-
internal/processing/account/update.go | 2 +-
.../processing/admin/createdomainblock.go | 4 +-
internal/processing/instance.go | 2 +-
internal/processing/media/update.go | 2 +-
internal/processing/status/create.go | 2 +-
internal/processing/status/util.go | 8 +-
internal/text/caption.go | 29 ----
internal/text/caption_test.go | 82 -----------
internal/text/plain.go | 2 +-
internal/text/removehtml_test.go | 57 ++++++++
internal/text/sanitize.go | 16 ++-
internal/text/sanitize_test.go | 68 ++++++---
15 files changed, 381 insertions(+), 151 deletions(-)
create mode 100644 internal/api/client/instance/instance_test.go
create mode 100644 internal/api/client/instance/instancepatch_test.go
delete mode 100644 internal/text/caption.go
delete mode 100644 internal/text/caption_test.go
create mode 100644 internal/text/removehtml_test.go
diff --git a/internal/api/client/instance/instance_test.go b/internal/api/client/instance/instance_test.go
new file mode 100644
index 000000000..a1fe1f17c
--- /dev/null
+++ b/internal/api/client/instance/instance_test.go
@@ -0,0 +1,126 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package instance_test
+
+import (
+ "bytes"
+ "fmt"
+ "net/http"
+ "net/http/httptest"
+
+ "codeberg.org/gruf/go-store/kv"
+ "github.com/gin-gonic/gin"
+ "github.com/spf13/viper"
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/api/client/instance"
+ "github.com/superseriousbusiness/gotosocial/internal/concurrency"
+ "github.com/superseriousbusiness/gotosocial/internal/config"
+ "github.com/superseriousbusiness/gotosocial/internal/db"
+ "github.com/superseriousbusiness/gotosocial/internal/email"
+ "github.com/superseriousbusiness/gotosocial/internal/federation"
+ "github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
+ "github.com/superseriousbusiness/gotosocial/internal/media"
+ "github.com/superseriousbusiness/gotosocial/internal/messages"
+ "github.com/superseriousbusiness/gotosocial/internal/oauth"
+ "github.com/superseriousbusiness/gotosocial/internal/processing"
+ "github.com/superseriousbusiness/gotosocial/testrig"
+)
+
+type InstanceStandardTestSuite struct {
+ // standard suite interfaces
+ suite.Suite
+ db db.DB
+ storage *kv.KVStore
+ mediaManager media.Manager
+ federator federation.Federator
+ processor processing.Processor
+ emailSender email.Sender
+ sentEmails map[string]string
+
+ // standard suite models
+ testTokens map[string]*gtsmodel.Token
+ testClients map[string]*gtsmodel.Client
+ testApplications map[string]*gtsmodel.Application
+ testUsers map[string]*gtsmodel.User
+ testAccounts map[string]*gtsmodel.Account
+ testAttachments map[string]*gtsmodel.MediaAttachment
+ testStatuses map[string]*gtsmodel.Status
+
+ // module being tested
+ instanceModule *instance.Module
+}
+
+func (suite *InstanceStandardTestSuite) SetupSuite() {
+ suite.testTokens = testrig.NewTestTokens()
+ suite.testClients = testrig.NewTestClients()
+ suite.testApplications = testrig.NewTestApplications()
+ suite.testUsers = testrig.NewTestUsers()
+ suite.testAccounts = testrig.NewTestAccounts()
+ suite.testAttachments = testrig.NewTestAttachments()
+ suite.testStatuses = testrig.NewTestStatuses()
+}
+
+func (suite *InstanceStandardTestSuite) SetupTest() {
+ testrig.InitTestConfig()
+ testrig.InitTestLog()
+
+ fedWorker := concurrency.NewWorkerPool[messages.FromFederator](-1, -1)
+ clientWorker := concurrency.NewWorkerPool[messages.FromClientAPI](-1, -1)
+
+ suite.db = testrig.NewTestDB()
+ suite.storage = testrig.NewTestStorage()
+ suite.mediaManager = testrig.NewTestMediaManager(suite.db, suite.storage)
+ suite.federator = testrig.NewTestFederator(suite.db, testrig.NewTestTransportController(testrig.NewMockHTTPClient(nil), suite.db, fedWorker), suite.storage, suite.mediaManager, fedWorker)
+ suite.sentEmails = make(map[string]string)
+ suite.emailSender = testrig.NewEmailSender("../../../../web/template/", suite.sentEmails)
+ suite.processor = testrig.NewTestProcessor(suite.db, suite.storage, suite.federator, suite.emailSender, suite.mediaManager, clientWorker, fedWorker)
+ suite.instanceModule = instance.New(suite.processor).(*instance.Module)
+ testrig.StandardDBSetup(suite.db, nil)
+ testrig.StandardStorageSetup(suite.storage, "../../../../testrig/media")
+}
+
+func (suite *InstanceStandardTestSuite) TearDownTest() {
+ testrig.StandardDBTeardown(suite.db)
+ testrig.StandardStorageTeardown(suite.storage)
+}
+
+func (suite *InstanceStandardTestSuite) newContext(recorder *httptest.ResponseRecorder, requestMethod string, requestBody []byte, requestPath string, bodyContentType string) *gin.Context {
+ ctx, _ := gin.CreateTestContext(recorder)
+
+ ctx.Set(oauth.SessionAuthorizedAccount, suite.testAccounts["admin_account"])
+ ctx.Set(oauth.SessionAuthorizedToken, oauth.DBTokenToToken(suite.testTokens["admin_account"]))
+ ctx.Set(oauth.SessionAuthorizedApplication, suite.testApplications["admin_account"])
+ ctx.Set(oauth.SessionAuthorizedUser, suite.testUsers["admin_account"])
+
+ protocol := viper.GetString(config.Keys.Protocol)
+ host := viper.GetString(config.Keys.Host)
+
+ baseURI := fmt.Sprintf("%s://%s", protocol, host)
+ requestURI := fmt.Sprintf("%s/%s", baseURI, requestPath)
+
+ ctx.Request = httptest.NewRequest(http.MethodPatch, requestURI, bytes.NewReader(requestBody)) // the endpoint we're hitting
+
+ if bodyContentType != "" {
+ ctx.Request.Header.Set("Content-Type", bodyContentType)
+ }
+
+ ctx.Request.Header.Set("accept", "application/json")
+
+ return ctx
+}
diff --git a/internal/api/client/instance/instancepatch_test.go b/internal/api/client/instance/instancepatch_test.go
new file mode 100644
index 000000000..5577cbbcc
--- /dev/null
+++ b/internal/api/client/instance/instancepatch_test.go
@@ -0,0 +1,130 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package instance_test
+
+import (
+ "io"
+ "net/http"
+ "net/http/httptest"
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+ "github.com/superseriousbusiness/gotosocial/internal/api/client/instance"
+ "github.com/superseriousbusiness/gotosocial/testrig"
+)
+
+type InstancePatchTestSuite struct {
+ InstanceStandardTestSuite
+}
+
+func (suite *InstancePatchTestSuite) TestInstancePatch1() {
+ requestBody, w, err := testrig.CreateMultipartFormData(
+ "", "",
+ map[string]string{
+ "title": "Example Instance",
+ "contact_username": "admin",
+ "contact_email": "someone@example.org",
+ })
+ if err != nil {
+ panic(err)
+ }
+ bodyBytes := requestBody.Bytes()
+
+ // set up the request
+ recorder := httptest.NewRecorder()
+ ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType())
+
+ // call the handler
+ suite.instanceModule.InstanceUpdatePATCHHandler(ctx)
+
+ // we should have OK because our request was valid
+ suite.Equal(http.StatusOK, recorder.Code)
+
+ result := recorder.Result()
+ defer result.Body.Close()
+
+ b, err := io.ReadAll(result.Body)
+ suite.NoError(err)
+
+ suite.Equal(`{"uri":"http://localhost:8080","title":"Example Instance","description":"","short_description":"","email":"someone@example.org","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","contact_account":{"id":"01F8MH17FWEB39HZJ76B6VXSKF","username":"admin","acct":"admin","display_name":"","locked":false,"bot":false,"created_at":"2022-05-17T13:10:59Z","note":"","url":"http://localhost:8080/@admin","avatar":"","avatar_static":"","header":"","header_static":"","followers_count":1,"following_count":1,"statuses_count":4,"last_status_at":"2021-10-20T10:41:37Z","emojis":[],"fields":[]},"max_toot_chars":5000}`, string(b))
+}
+
+func (suite *InstancePatchTestSuite) TestInstancePatch2() {
+ requestBody, w, err := testrig.CreateMultipartFormData(
+ "", "",
+ map[string]string{
+ "title": "
Geoff's Instance
",
+ })
+ if err != nil {
+ panic(err)
+ }
+ bodyBytes := requestBody.Bytes()
+
+ // set up the request
+ recorder := httptest.NewRecorder()
+ ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType())
+
+ // call the handler
+ suite.instanceModule.InstanceUpdatePATCHHandler(ctx)
+
+ // we should have OK because our request was valid
+ suite.Equal(http.StatusOK, recorder.Code)
+
+ result := recorder.Result()
+ defer result.Body.Close()
+
+ b, err := io.ReadAll(result.Body)
+ suite.NoError(err)
+
+ suite.Equal(`{"uri":"http://localhost:8080","title":"Geoff's Instance","description":"","short_description":"","email":"","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","max_toot_chars":5000}`, string(b))
+}
+
+func (suite *InstancePatchTestSuite) TestInstancePatch3() {
+ requestBody, w, err := testrig.CreateMultipartFormData(
+ "", "",
+ map[string]string{
+ "short_description": "This is some html, which is allowed in short descriptions.
",
+ })
+ if err != nil {
+ panic(err)
+ }
+ bodyBytes := requestBody.Bytes()
+
+ // set up the request
+ recorder := httptest.NewRecorder()
+ ctx := suite.newContext(recorder, http.MethodPatch, bodyBytes, instance.InstanceInformationPath, w.FormDataContentType())
+
+ // call the handler
+ suite.instanceModule.InstanceUpdatePATCHHandler(ctx)
+
+ // we should have OK because our request was valid
+ suite.Equal(http.StatusOK, recorder.Code)
+
+ result := recorder.Result()
+ defer result.Body.Close()
+
+ b, err := io.ReadAll(result.Body)
+ suite.NoError(err)
+
+ suite.Equal(`{"uri":"http://localhost:8080","title":"localhost:8080","description":"","short_description":"\u003cp\u003eThis is some html, which is \u003cem\u003eallowed\u003c/em\u003e in short descriptions.\u003c/p\u003e","email":"","version":"","registrations":true,"approval_required":true,"invites_enabled":false,"urls":{"streaming_api":"wss://localhost:8080"},"stats":{"domain_count":0,"status_count":16,"user_count":4},"thumbnail":"","max_toot_chars":5000}`, string(b))
+}
+
+func TestInstancePatchTestSuite(t *testing.T) {
+ suite.Run(t, &InstancePatchTestSuite{})
+}
diff --git a/internal/processing/account/create.go b/internal/processing/account/create.go
index bbca11fae..61c4f95ef 100644
--- a/internal/processing/account/create.go
+++ b/internal/processing/account/create.go
@@ -64,7 +64,7 @@ func (p *processor) Create(ctx context.Context, applicationToken oauth2.TokenInf
}
l.Trace("creating new username and account")
- user, err := p.db.NewSignup(ctx, form.Username, text.RemoveHTML(reason), approvalRequired, form.Email, form.Password, form.IP, form.Locale, application.ID, false, false)
+ user, err := p.db.NewSignup(ctx, form.Username, text.SanitizePlaintext(reason), approvalRequired, form.Email, form.Password, form.IP, form.Locale, application.ID, false, false)
if err != nil {
return nil, fmt.Errorf("error creating new signup in the database: %s", err)
}
diff --git a/internal/processing/account/update.go b/internal/processing/account/update.go
index 3d6bbae2a..5fae6e73b 100644
--- a/internal/processing/account/update.go
+++ b/internal/processing/account/update.go
@@ -53,7 +53,7 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, form
if err := validate.DisplayName(*form.DisplayName); err != nil {
return nil, err
}
- account.DisplayName = text.RemoveHTML(*form.DisplayName)
+ account.DisplayName = text.SanitizePlaintext(*form.DisplayName)
}
if form.Note != nil {
diff --git a/internal/processing/admin/createdomainblock.go b/internal/processing/admin/createdomainblock.go
index 3cfaabce0..1c641950c 100644
--- a/internal/processing/admin/createdomainblock.go
+++ b/internal/processing/admin/createdomainblock.go
@@ -59,8 +59,8 @@ func (p *processor) DomainBlockCreate(ctx context.Context, account *gtsmodel.Acc
ID: blockID,
Domain: domain,
CreatedByAccountID: account.ID,
- PrivateComment: text.RemoveHTML(privateComment),
- PublicComment: text.RemoveHTML(publicComment),
+ PrivateComment: text.SanitizePlaintext(privateComment),
+ PublicComment: text.SanitizePlaintext(publicComment),
Obfuscate: obfuscate,
SubscriptionID: subscriptionID,
}
diff --git a/internal/processing/instance.go b/internal/processing/instance.go
index 11f966adb..f4fe2ca79 100644
--- a/internal/processing/instance.go
+++ b/internal/processing/instance.go
@@ -65,7 +65,7 @@ func (p *processor) InstancePatch(ctx context.Context, form *apimodel.InstanceSe
if err := validate.SiteTitle(*form.Title); err != nil {
return nil, gtserror.NewErrorBadRequest(err, fmt.Sprintf("site title invalid: %s", err))
}
- i.Title = text.RemoveHTML(*form.Title) // don't allow html in site title
+ i.Title = text.SanitizePlaintext(*form.Title) // don't allow html in site title
}
// validate & update site contact account if it's set on the form
diff --git a/internal/processing/media/update.go b/internal/processing/media/update.go
index b275b9ffe..116588a48 100644
--- a/internal/processing/media/update.go
+++ b/internal/processing/media/update.go
@@ -45,7 +45,7 @@ func (p *processor) Update(ctx context.Context, account *gtsmodel.Account, media
}
if form.Description != nil {
- attachment.Description = text.SanitizeCaption(*form.Description)
+ attachment.Description = text.SanitizePlaintext(*form.Description)
if err := p.db.UpdateByPrimaryKey(ctx, attachment); err != nil {
return nil, gtserror.NewErrorInternalError(fmt.Errorf("database error updating description: %s", err))
}
diff --git a/internal/processing/status/create.go b/internal/processing/status/create.go
index add8a5bc6..e5f6e9647 100644
--- a/internal/processing/status/create.go
+++ b/internal/processing/status/create.go
@@ -49,7 +49,7 @@ func (p *processor) Create(ctx context.Context, account *gtsmodel.Account, appli
Local: true,
AccountID: account.ID,
AccountURI: account.URI,
- ContentWarning: text.SanitizeCaption(form.SpoilerText),
+ ContentWarning: text.SanitizePlaintext(form.SpoilerText),
ActivityStreamsType: ap.ObjectNote,
Sensitive: form.Sensitive,
Language: form.Language,
diff --git a/internal/processing/status/util.go b/internal/processing/status/util.go
index 190d88f1b..df645189e 100644
--- a/internal/processing/status/util.go
+++ b/internal/processing/status/util.go
@@ -27,7 +27,6 @@ import (
apimodel "github.com/superseriousbusiness/gotosocial/internal/api/model"
"github.com/superseriousbusiness/gotosocial/internal/db"
"github.com/superseriousbusiness/gotosocial/internal/gtsmodel"
- "github.com/superseriousbusiness/gotosocial/internal/text"
"github.com/superseriousbusiness/gotosocial/internal/util"
)
@@ -269,16 +268,13 @@ func (p *processor) ProcessContent(ctx context.Context, form *apimodel.AdvancedS
form.Format = apimodel.StatusFormatDefault
}
- // remove any existing html from the status
- content := text.RemoveHTML(form.Status)
-
// parse content out of the status depending on what format has been submitted
var formatted string
switch form.Format {
case apimodel.StatusFormatPlain:
- formatted = p.formatter.FromPlain(ctx, content, status.Mentions, status.Tags)
+ formatted = p.formatter.FromPlain(ctx, form.Status, status.Mentions, status.Tags)
case apimodel.StatusFormatMarkdown:
- formatted = p.formatter.FromMarkdown(ctx, content, status.Mentions, status.Tags)
+ formatted = p.formatter.FromMarkdown(ctx, form.Status, status.Mentions, status.Tags)
default:
return fmt.Errorf("format %s not recognised as a valid status format", form.Format)
}
diff --git a/internal/text/caption.go b/internal/text/caption.go
deleted file mode 100644
index c3c86b0b1..000000000
--- a/internal/text/caption.go
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text
-
-// SanitizeCaption runs image captions (or indeed any plain text) through basic sanitization.
-// It returns plain text rather than HTML, in contrast to other functions in this package.
-func SanitizeCaption(in string) string {
- content := preformat(in)
-
- content = RemoveHTML(content)
-
- return postformat(content)
-}
diff --git a/internal/text/caption_test.go b/internal/text/caption_test.go
deleted file mode 100644
index f1337df09..000000000
--- a/internal/text/caption_test.go
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- GoToSocial
- Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
-
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU Affero General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU Affero General Public License for more details.
-
- You should have received a copy of the GNU Affero General Public License
- along with this program. If not, see .
-*/
-
-package text_test
-
-import (
- "testing"
-
- "github.com/stretchr/testify/suite"
- "github.com/superseriousbusiness/gotosocial/internal/text"
-)
-
-type CaptionTestSuite struct {
- suite.Suite
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption1() {
- dodgyCaption := "this is just a normal caption ;)"
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("this is just a normal caption ;)", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption2() {
- dodgyCaption := "here's a LOUD caption"
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("here's a LOUD caption", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption3() {
- dodgyCaption := ""
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption4() {
- dodgyCaption := `
-
-
-here is
-a multi line
-caption
-with some newlines
-
-
-
-`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption5() {
- // html-escaped: " hello world"
- dodgyCaption := `<script>console.log('aha!')</script> hello world`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("hello world", sanitized)
-}
-
-func (suite *CaptionTestSuite) TestSanitizeCaption6() {
- // html-encoded: " hello world"
- dodgyCaption := `<script>console.log('aha!')</script> hello world`
- sanitized := text.SanitizeCaption(dodgyCaption)
- suite.Equal("hello world", sanitized)
-}
-
-func TestCaptionTestSuite(t *testing.T) {
- suite.Run(t, new(CaptionTestSuite))
-}
diff --git a/internal/text/plain.go b/internal/text/plain.go
index 4ef3b3715..bc10d1b67 100644
--- a/internal/text/plain.go
+++ b/internal/text/plain.go
@@ -35,7 +35,7 @@ func (f *formatter) FromPlain(ctx context.Context, plain string, mentions []*gts
content := preformat(plain)
// sanitize any html elements
- content = RemoveHTML(content)
+ content = removeHTML(content)
// format links nicely
content = f.ReplaceLinks(ctx, content)
diff --git a/internal/text/removehtml_test.go b/internal/text/removehtml_test.go
new file mode 100644
index 000000000..0029b45a5
--- /dev/null
+++ b/internal/text/removehtml_test.go
@@ -0,0 +1,57 @@
+/*
+ GoToSocial
+ Copyright (C) 2021-2022 GoToSocial Authors admin@gotosocial.org
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Affero General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Affero General Public License for more details.
+
+ You should have received a copy of the GNU Affero General Public License
+ along with this program. If not, see .
+*/
+
+package text
+
+import (
+ "testing"
+
+ "github.com/stretchr/testify/suite"
+)
+
+const (
+ test_removeHTML = `Another test @foss_satan
#Hashtag
Text
`
+ test_removedHTML = `Another test @foss_satan#HashtagText`
+ test_withEscapedLiteral = `it\u0026amp;#39;s its it is`
+ test_withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
+ test_withEscaped = "it\u0026amp;#39;s its it is"
+ test_withEscapedExpected = "it's its it is"
+)
+
+type RemoveHTMLTestSuite struct {
+ suite.Suite
+}
+
+func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscapedLiteral() {
+ s := removeHTML(test_withEscapedLiteral)
+ suite.Equal(test_withEscapedLiteralExpected, s)
+}
+
+func (suite *RemoveHTMLTestSuite) TestSanitizeWithEscaped() {
+ s := removeHTML(test_withEscaped)
+ suite.Equal(test_withEscapedExpected, s)
+}
+
+func (suite *RemoveHTMLTestSuite) TestRemoveHTML() {
+ s := removeHTML(test_removeHTML)
+ suite.Equal(test_removedHTML, s)
+}
+
+func TestRemoveHTMLTestSuite(t *testing.T) {
+ suite.Run(t, &RemoveHTMLTestSuite{})
+}
diff --git a/internal/text/sanitize.go b/internal/text/sanitize.go
index 897dea34d..d4faabbb1 100644
--- a/internal/text/sanitize.go
+++ b/internal/text/sanitize.go
@@ -46,12 +46,20 @@ var regular *bluemonday.Policy = bluemonday.UGCPolicy().
// Source: https://github.com/microcosm-cc/bluemonday#usage
var strict *bluemonday.Policy = bluemonday.StrictPolicy()
-// SanitizeHTML cleans up HTML in the given string, allowing through only safe HTML elements.
+// removeHTML strictly removes *all* recognized HTML elements from the given string.
+func removeHTML(in string) string {
+ return strict.Sanitize(in)
+}
+
+// SanitizeHTML sanitizes risky html elements from the given string, allowing only safe ones through.
func SanitizeHTML(in string) string {
return regular.Sanitize(in)
}
-// RemoveHTML removes all HTML from the given string.
-func RemoveHTML(in string) string {
- return strict.Sanitize(in)
+// SanitizePlaintext runs text through basic sanitization. This removes
+// any html elements that were in the string, and returns clean plaintext.
+func SanitizePlaintext(in string) string {
+ content := preformat(in)
+ content = removeHTML(content)
+ return postformat(content)
}
diff --git a/internal/text/sanitize_test.go b/internal/text/sanitize_test.go
index 4270e2602..eea5daadb 100644
--- a/internal/text/sanitize_test.go
+++ b/internal/text/sanitize_test.go
@@ -26,17 +26,8 @@ import (
)
const (
- removeHTML = `Another test @foss_satan
#Hashtag
Text
`
- removedHTML = `Another test @foss_satan#HashtagText`
-
- sanitizeHTML = `here's some naughty html: !!!`
- sanitizedHTML = `here's some naughty html: !!!`
-
- withEscapedLiteral = `it\u0026amp;#39;s its it is`
- withEscapedLiteralExpected = `it\u0026amp;#39;s its it is`
- withEscaped = "it\u0026amp;#39;s its it is"
- withEscapedExpected = "it's its it is"
-
+ sanitizeHTML = `here's some naughty html: !!!`
+ sanitizedHTML = `here's some naughty html: !!!`
sanitizeOutgoing = `gotta test some fucking ''''''''' marks
`
sanitizedOutgoing = `gotta test some fucking ''''''''' marks
`
)
@@ -45,11 +36,6 @@ type SanitizeTestSuite struct {
suite.Suite
}
-func (suite *SanitizeTestSuite) TestRemoveHTML() {
- s := text.RemoveHTML(removeHTML)
- suite.Equal(removedHTML, s)
-}
-
func (suite *SanitizeTestSuite) TestSanitizeOutgoing() {
s := text.SanitizeHTML(sanitizeOutgoing)
suite.Equal(sanitizedOutgoing, s)
@@ -60,14 +46,52 @@ func (suite *SanitizeTestSuite) TestSanitizeHTML() {
suite.Equal(sanitizedHTML, s)
}
-func (suite *SanitizeTestSuite) TestSanitizeWithEscapedLiteral() {
- s := text.RemoveHTML(withEscapedLiteral)
- suite.Equal(withEscapedLiteralExpected, s)
+func (suite *SanitizeTestSuite) TestSanitizeCaption1() {
+ dodgyCaption := "this is just a normal caption ;)"
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("this is just a normal caption ;)", sanitized)
}
-func (suite *SanitizeTestSuite) TestSanitizeWithEscaped() {
- s := text.RemoveHTML(withEscaped)
- suite.Equal(withEscapedExpected, s)
+func (suite *SanitizeTestSuite) TestSanitizeCaption2() {
+ dodgyCaption := "here's a LOUD caption"
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("here's a LOUD caption", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption3() {
+ dodgyCaption := ""
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption4() {
+ dodgyCaption := `
+
+
+here is
+a multi line
+caption
+with some newlines
+
+
+
+`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("here is\na multi line\ncaption\nwith some newlines", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption5() {
+ // html-escaped: " hello world"
+ dodgyCaption := `<script>console.log('aha!')</script> hello world`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("hello world", sanitized)
+}
+
+func (suite *SanitizeTestSuite) TestSanitizeCaption6() {
+ // html-encoded: " hello world"
+ dodgyCaption := `<script>console.log('aha!')</script> hello world`
+ sanitized := text.SanitizePlaintext(dodgyCaption)
+ suite.Equal("hello world", sanitized)
}
func TestSanitizeTestSuite(t *testing.T) {