From 7798a3d746f560860306a331a17e2f242f318fba Mon Sep 17 00:00:00 2001 From: Gusted Date: Wed, 13 Sep 2023 12:04:10 +0200 Subject: [PATCH] [GITEA] Use restricted sanitizer for repository description - Currently the repository description uses the same sanitizer as a normal markdown document. This means that element such as heading and images are allowed and can be abused. - Create a minimal restricted sanitizer for the repository description, which only allows what the postprocessor currently allows, which are links and emojis. - Added unit testing. - Resolves https://codeberg.org/forgejo/forgejo/issues/1202 - Resolves https://codeberg.org/Codeberg/Community/issues/1122 (cherry picked from commit a8afa4cd181d7c31f73d6a8fae4c6a4b9622a425) (cherry picked from commit 0238587c51e2c749413ca5a63e47590399fe5a2b) (cherry picked from commit a8c7bbf728326b992e000a3d19c8833610f960c9) (cherry picked from commit 80e05a8245092b4158c6c970ca0563181b40f2eb) (cherry picked from commit f5af5050b34891ff16a4ef1f8e3d805fe135238d) (cherry picked from commit 608f981e551db5f38550b622646cc307fe0566b9) (cherry picked from commit f40cff9263c628b634d846511d1274b4257ac90b) (cherry picked from commit 5f113bb61115074bb48f314f71075e228e207bf9) --- models/repo/repo.go | 4 ++-- modules/markup/sanitizer.go | 35 +++++++++++++++++++++++++++++--- modules/markup/sanitizer_test.go | 22 ++++++++++++++++++++ 3 files changed, 56 insertions(+), 5 deletions(-) diff --git a/models/repo/repo.go b/models/repo/repo.go index c4b215e074..db3709f1e8 100644 --- a/models/repo/repo.go +++ b/models/repo/repo.go @@ -584,9 +584,9 @@ func (repo *Repository) DescriptionHTML(ctx context.Context) template.HTML { }, repo.Description) if err != nil { log.Error("Failed to render description for %s (ID: %d): %v", repo.Name, repo.ID, err) - return template.HTML(markup.Sanitize(repo.Description)) + return template.HTML(markup.SanitizeDescription(repo.Description)) } - return template.HTML(markup.Sanitize(desc)) + return template.HTML(markup.SanitizeDescription(desc)) } // CloneLink represents different types of clone URLs of repository. diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 48c08831f1..992e85b989 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -18,9 +18,10 @@ import ( // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow // any modification to the underlying policies once it's been created. type Sanitizer struct { - defaultPolicy *bluemonday.Policy - rendererPolicies map[string]*bluemonday.Policy - init sync.Once + defaultPolicy *bluemonday.Policy + descriptionPolicy *bluemonday.Policy + rendererPolicies map[string]*bluemonday.Policy + init sync.Once } var ( @@ -41,6 +42,7 @@ func NewSanitizer() { func InitializeSanitizer() { sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} sanitizer.defaultPolicy = createDefaultPolicy() + sanitizer.descriptionPolicy = createRepoDescriptionPolicy() for name, renderer := range renderers { sanitizerRules := renderer.SanitizerRules() @@ -161,6 +163,27 @@ func createDefaultPolicy() *bluemonday.Policy { return policy } +// createRepoDescriptionPolicy returns a minimal more strict policy that is used for +// repository descriptions. +func createRepoDescriptionPolicy() *bluemonday.Policy { + policy := bluemonday.NewPolicy() + + // Allow italics and bold. + policy.AllowElements("i", "b", "em", "strong") + + // Allow code. + policy.AllowElements("code") + + // Allow links + policy.AllowAttrs("href", "target", "rel").OnElements("a") + + // Allow classes for emojis + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^emoji$`)).OnElements("img", "span") + policy.AllowAttrs("aria-label").OnElements("span") + + return policy +} + func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { for _, rule := range rules { if rule.AllowDataURIImages { @@ -176,6 +199,12 @@ func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitize } } +// SanitizeDescription sanitizes the HTML generated for a repository description. +func SanitizeDescription(s string) string { + NewSanitizer() + return sanitizer.descriptionPolicy.Sanitize(s) +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer() diff --git a/modules/markup/sanitizer_test.go b/modules/markup/sanitizer_test.go index 0bc63ff0a7..b7b8792bd7 100644 --- a/modules/markup/sanitizer_test.go +++ b/modules/markup/sanitizer_test.go @@ -73,6 +73,28 @@ func Test_Sanitizer(t *testing.T) { } } +func TestDescriptionSanitizer(t *testing.T) { + NewSanitizer() + + testCases := []string{ + `

Title

`, `Title`, + `image`, ``, + `THUMBS UP`, `THUMBS UP`, + `Hello World`, `Hello World`, + `
`, ``, + `https://example.com`, `https://example.com`, + `Important!`, `Important!`, + `
Click me! Nothing to see here.
`, `Click me! Nothing to see here.`, + ``, ``, + `I have a strong opinion about this.`, `I have a strong opinion about this.`, + `Provides alternative wg(8) tool`, `Provides alternative wg(8) tool`, + } + + for i := 0; i < len(testCases); i += 2 { + assert.Equal(t, testCases[i+1], SanitizeDescription(testCases[i])) + } +} + func TestSanitizeNonEscape(t *testing.T) { descStr := "<script>alert(document.domain)</script>"