Don't treat BOM escape sequence as hidden character. (#18909)
* Don't treat BOM escape sequence as hidden character. - BOM sequence is a common non-harmfull escape sequence, it shouldn't be shown as hidden character. - Follows GitHub's behavior. - Resolves #18837 Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
parent
329b959160
commit
bf2867dec2
|
@ -63,6 +63,7 @@ func EscapeControlBytes(text []byte) (EscapeStatus, []byte) {
|
||||||
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
|
func EscapeControlReader(text io.Reader, output io.Writer) (escaped EscapeStatus, err error) {
|
||||||
buf := make([]byte, 4096)
|
buf := make([]byte, 4096)
|
||||||
readStart := 0
|
readStart := 0
|
||||||
|
runeCount := 0
|
||||||
var n int
|
var n int
|
||||||
var writePos int
|
var writePos int
|
||||||
|
|
||||||
|
@ -79,6 +80,8 @@ readingloop:
|
||||||
|
|
||||||
for i < len(bs) {
|
for i < len(bs) {
|
||||||
r, size := utf8.DecodeRune(bs[i:])
|
r, size := utf8.DecodeRune(bs[i:])
|
||||||
|
runeCount++
|
||||||
|
|
||||||
// Now handle the codepoints
|
// Now handle the codepoints
|
||||||
switch {
|
switch {
|
||||||
case r == utf8.RuneError:
|
case r == utf8.RuneError:
|
||||||
|
@ -113,6 +116,8 @@ readingloop:
|
||||||
lineHasRTLScript = false
|
lineHasRTLScript = false
|
||||||
lineHasLTRScript = false
|
lineHasLTRScript = false
|
||||||
|
|
||||||
|
case runeCount == 1 && r == 0xFEFF: // UTF BOM
|
||||||
|
// the first BOM is safe
|
||||||
case r == '\r' || r == '\t' || r == ' ':
|
case r == '\r' || r == '\t' || r == ' ':
|
||||||
// These are acceptable control characters and space characters
|
// These are acceptable control characters and space characters
|
||||||
case unicode.IsSpace(r):
|
case unicode.IsSpace(r):
|
||||||
|
|
|
@ -129,6 +129,14 @@ then resh (ר), and finally heh (ה) (which should appear leftmost).`,
|
||||||
"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
|
"\n" + `if access_level != "user<span class="escaped-code-point" data-escaped="[U+202E]"><span class="char">` + "\u202e" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>// Check if admin<span class="escaped-code-point" data-escaped="[U+2069]"><span class="char">` + "\u2069" + `</span></span> <span class="escaped-code-point" data-escaped="[U+2066]"><span class="char">` + "\u2066" + `</span></span>" {` + "\n",
|
||||||
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
|
status: EscapeStatus{Escaped: true, HasBIDI: true, BadBIDI: true, HasLTRScript: true, HasRTLScript: true},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
// UTF-8/16/32 all use the same codepoint for BOM
|
||||||
|
// Gitea could read UTF-16/32 content and convert into UTF-8 internally then render it, so we only process UTF-8 internally
|
||||||
|
name: "UTF BOM",
|
||||||
|
text: "\xef\xbb\xbftest",
|
||||||
|
result: "\xef\xbb\xbftest",
|
||||||
|
status: EscapeStatus{HasLTRScript: true},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestEscapeControlString(t *testing.T) {
|
func TestEscapeControlString(t *testing.T) {
|
||||||
|
@ -163,10 +171,18 @@ func TestEscapeControlReader(t *testing.T) {
|
||||||
// lets add some control characters to the tests
|
// lets add some control characters to the tests
|
||||||
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
|
tests := make([]escapeControlTest, 0, len(escapeControlTests)*3)
|
||||||
copy(tests, escapeControlTests)
|
copy(tests, escapeControlTests)
|
||||||
|
|
||||||
|
// if there is a BOM, we should keep the BOM
|
||||||
|
addPrefix := func(prefix, s string) string {
|
||||||
|
if strings.HasPrefix(s, "\xef\xbb\xbf") {
|
||||||
|
return s[:3] + prefix + s[3:]
|
||||||
|
}
|
||||||
|
return prefix + s
|
||||||
|
}
|
||||||
for _, test := range escapeControlTests {
|
for _, test := range escapeControlTests {
|
||||||
test.name += " (+Control)"
|
test.name += " (+Control)"
|
||||||
test.text = "\u001E" + test.text
|
test.text = addPrefix("\u001E", test.text)
|
||||||
test.result = `<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">` + "\u001e" + `</span></span>` + test.result
|
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+001E]"><span class="char">`+"\u001e"+`</span></span>`, test.result)
|
||||||
test.status.Escaped = true
|
test.status.Escaped = true
|
||||||
test.status.HasControls = true
|
test.status.HasControls = true
|
||||||
tests = append(tests, test)
|
tests = append(tests, test)
|
||||||
|
@ -174,8 +190,8 @@ func TestEscapeControlReader(t *testing.T) {
|
||||||
|
|
||||||
for _, test := range escapeControlTests {
|
for _, test := range escapeControlTests {
|
||||||
test.name += " (+Mark)"
|
test.name += " (+Mark)"
|
||||||
test.text = "\u0300" + test.text
|
test.text = addPrefix("\u0300", test.text)
|
||||||
test.result = `<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">` + "\u0300" + `</span></span>` + test.result
|
test.result = addPrefix(`<span class="escaped-code-point" data-escaped="[U+0300]"><span class="char">`+"\u0300"+`</span></span>`, test.result)
|
||||||
test.status.Escaped = true
|
test.status.Escaped = true
|
||||||
test.status.HasMarks = true
|
test.status.HasMarks = true
|
||||||
tests = append(tests, test)
|
tests = append(tests, test)
|
||||||
|
|
Loading…
Reference in New Issue