Bump golang.org/x/text from 0.3.7 to 0.4.0 (#981)
Bumps [golang.org/x/text](https://github.com/golang/text) from 0.3.7 to 0.4.0. - [Release notes](https://github.com/golang/text/releases) - [Commits](https://github.com/golang/text/compare/v0.3.7...v0.4.0) --- updated-dependencies: - dependency-name: golang.org/x/text dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] <support@github.com> Signed-off-by: dependabot[bot] <support@github.com> Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
This commit is contained in:
parent
1a23fb0dc9
commit
1638470388
2
go.mod
2
go.mod
|
@ -51,7 +51,7 @@ require (
|
|||
golang.org/x/exp v0.0.0-20220613132600-b0d781184e0d
|
||||
golang.org/x/net v0.0.0-20220909164309-bea034e7d591
|
||||
golang.org/x/oauth2 v0.0.0-20220822191816-0ebed06d0094
|
||||
golang.org/x/text v0.3.7
|
||||
golang.org/x/text v0.4.0
|
||||
gopkg.in/mcuadros/go-syslog.v2 v2.3.0
|
||||
modernc.org/sqlite v1.18.2
|
||||
mvdan.cc/xurls/v2 v2.4.0
|
||||
|
|
3
go.sum
3
go.sum
|
@ -888,8 +888,9 @@ golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
|||
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.3.7 h1:olpwvP2KacW1ZWvsR7uQhoyTYvKAupfQrRGBFM352Gk=
|
||||
golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ=
|
||||
golang.org/x/text v0.4.0 h1:BrVqGRd7+k1DiOgtnFvAkoQEWQvBc25ouMJM6429SFg=
|
||||
golang.org/x/text v0.4.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
|
||||
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
|
||||
|
|
|
@ -1,3 +0,0 @@
|
|||
# This source code refers to The Go Authors for copyright purposes.
|
||||
# The master list of authors is in the main Go distribution,
|
||||
# visible at http://tip.golang.org/AUTHORS.
|
|
@ -1,3 +0,0 @@
|
|||
# This source code was written by the Go contributors.
|
||||
# The master list of contributors is in the main Go distribution,
|
||||
# visible at http://tip.golang.org/CONTRIBUTORS.
|
|
@ -14,19 +14,19 @@ package cases
|
|||
//
|
||||
// The per-rune values have the following format:
|
||||
//
|
||||
// if (exception) {
|
||||
// 15..4 unsigned exception index
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
// if (exception) {
|
||||
// 15..4 unsigned exception index
|
||||
// } else {
|
||||
// 15..8 XOR pattern or index to XOR pattern for case mapping
|
||||
// Only 13..8 are used for XOR patterns.
|
||||
// 7 inverseFold (fold to upper, not to lower)
|
||||
// 6 index: interpret the XOR pattern as an index
|
||||
// or isMid if case mode is cIgnorableUncased.
|
||||
// 5..4 CCC: zero (normal or break), above or other
|
||||
// }
|
||||
// 3 exception: interpret this value as an exception index
|
||||
// (TODO: is this bit necessary? Probably implied from case mode.)
|
||||
// 2..0 case mode
|
||||
//
|
||||
// For the non-exceptional cases, a rune must be either uncased, lowercase or
|
||||
// uppercase. If the rune is cased, the XOR pattern maps either a lowercase
|
||||
|
@ -128,37 +128,40 @@ const (
|
|||
// The entry is pointed to by the exception index in an entry. It has the
|
||||
// following format:
|
||||
//
|
||||
// Header
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
// Header:
|
||||
//
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
// byte 0:
|
||||
// 7..6 unused
|
||||
// 5..4 CCC type (same bits as entry)
|
||||
// 3 unused
|
||||
// 2..0 length of fold
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
// byte 1:
|
||||
// 7..6 unused
|
||||
// 5..3 length of 1st mapping of case type
|
||||
// 2..0 length of 2nd mapping of case type
|
||||
//
|
||||
// case 1st 2nd
|
||||
// lower -> upper, title
|
||||
// upper -> lower, title
|
||||
// title -> lower, upper
|
||||
//
|
||||
// Lengths with the value 0x7 indicate no value and implies no change.
|
||||
// A length of 0 indicates a mapping to zero-length string.
|
||||
//
|
||||
// Body bytes:
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// case folding bytes
|
||||
// lowercase mapping bytes
|
||||
// uppercase mapping bytes
|
||||
// titlecase mapping bytes
|
||||
// closure mapping bytes (for NFKC_Casefold). (TODO)
|
||||
//
|
||||
// Fallbacks:
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// missing fold -> lower
|
||||
// missing title -> upper
|
||||
// all missing -> original rune
|
||||
//
|
||||
// exceptions starts with a dummy byte to enforce that there is no zero index
|
||||
// value.
|
||||
|
|
|
@ -966,7 +966,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
|
|||
0x3fd00000, 0x3fd00072, 0x3fd000da, 0x3fd0010c,
|
||||
0x3ff00000, 0x3ff000d1, 0x40100000, 0x401000c3,
|
||||
0x40200000, 0x4020004c, 0x40700000, 0x40800000,
|
||||
0x4085a000, 0x4085a0ba, 0x408e3000, 0x408e30ba,
|
||||
0x4085a000, 0x4085a0ba, 0x408e8000, 0x408e80ba,
|
||||
0x40c00000, 0x40c000b3, 0x41200000, 0x41200111,
|
||||
0x41600000, 0x4160010f, 0x41c00000, 0x41d00000,
|
||||
// Entry 280 - 29F
|
||||
|
@ -994,7 +994,7 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
|
|||
0x4ae00130, 0x4b400000, 0x4b400099, 0x4b4000e8,
|
||||
0x4bc00000, 0x4bc05000, 0x4bc05024, 0x4bc20000,
|
||||
0x4bc20137, 0x4bc5a000, 0x4bc5a137, 0x4be00000,
|
||||
0x4be5a000, 0x4be5a0b4, 0x4beeb000, 0x4beeb0b4,
|
||||
0x4be5a000, 0x4be5a0b4, 0x4bef1000, 0x4bef10b4,
|
||||
0x4c000000, 0x4c300000, 0x4c30013e, 0x4c900000,
|
||||
// Entry 2E0 - 2FF
|
||||
0x4c900001, 0x4cc00000, 0x4cc0012f, 0x4ce00000,
|
||||
|
@ -1012,4 +1012,4 @@ var coreTags = []language.CompactCoreInfo{ // 773 elements
|
|||
|
||||
const specialTagsStr string = "ca-ES-valencia en-US-u-va-posix"
|
||||
|
||||
// Total table size 3147 bytes (3KiB); checksum: BE816D44
|
||||
// Total table size 3147 bytes (3KiB); checksum: 6772C83C
|
||||
|
|
|
@ -50,7 +50,7 @@ func (id Language) Canonicalize() (Language, AliasType) {
|
|||
return normLang(id)
|
||||
}
|
||||
|
||||
// mapLang returns the mapped langID of id according to mapping m.
|
||||
// normLang returns the mapped langID of id according to mapping m.
|
||||
func normLang(id Language) (Language, AliasType) {
|
||||
k := sort.Search(len(AliasMap), func(i int) bool {
|
||||
return AliasMap[i].From >= uint16(id)
|
||||
|
@ -328,7 +328,7 @@ func (r Region) IsPrivateUse() bool {
|
|||
return r.typ()&iso3166UserAssigned != 0
|
||||
}
|
||||
|
||||
type Script uint8
|
||||
type Script uint16
|
||||
|
||||
// getScriptID returns the script id for string s. It assumes that s
|
||||
// is of the format [A-Z][a-z]{3}.
|
||||
|
|
|
@ -270,7 +270,7 @@ func parse(scan *scanner, s string) (t Tag, err error) {
|
|||
} else if n >= 4 {
|
||||
return Und, ErrSyntax
|
||||
} else { // the usual case
|
||||
t, end = parseTag(scan)
|
||||
t, end = parseTag(scan, true)
|
||||
if n := len(scan.token); n == 1 {
|
||||
t.pExt = uint16(end)
|
||||
end = parseExtensions(scan)
|
||||
|
@ -296,7 +296,8 @@ func parse(scan *scanner, s string) (t Tag, err error) {
|
|||
|
||||
// parseTag parses language, script, region and variants.
|
||||
// It returns a Tag and the end position in the input that was parsed.
|
||||
func parseTag(scan *scanner) (t Tag, end int) {
|
||||
// If doNorm is true, then <lang>-<extlang> will be normalized to <extlang>.
|
||||
func parseTag(scan *scanner, doNorm bool) (t Tag, end int) {
|
||||
var e error
|
||||
// TODO: set an error if an unknown lang, script or region is encountered.
|
||||
t.LangID, e = getLangID(scan.token)
|
||||
|
@ -307,14 +308,17 @@ func parseTag(scan *scanner) (t Tag, end int) {
|
|||
for len(scan.token) == 3 && isAlpha(scan.token[0]) {
|
||||
// From http://tools.ietf.org/html/bcp47, <lang>-<extlang> tags are equivalent
|
||||
// to a tag of the form <extlang>.
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
copy(scan.b[langStart:], lang.String())
|
||||
scan.b[langStart+3] = '-'
|
||||
scan.start = langStart + 4
|
||||
if doNorm {
|
||||
lang, e := getLangID(scan.token)
|
||||
if lang != 0 {
|
||||
t.LangID = lang
|
||||
langStr := lang.String()
|
||||
copy(scan.b[langStart:], langStr)
|
||||
scan.b[langStart+len(langStr)] = '-'
|
||||
scan.start = langStart + len(langStr) + 1
|
||||
}
|
||||
scan.gobble(e)
|
||||
}
|
||||
scan.gobble(e)
|
||||
end = scan.scan()
|
||||
}
|
||||
if len(scan.token) == 4 && isAlpha(scan.token[0]) {
|
||||
|
@ -559,7 +563,7 @@ func parseExtension(scan *scanner) int {
|
|||
case 't': // https://www.ietf.org/rfc/rfc6497.txt
|
||||
scan.scan()
|
||||
if n := len(scan.token); n >= 2 && n <= 3 && isAlpha(scan.token[1]) {
|
||||
_, end = parseTag(scan)
|
||||
_, end = parseTag(scan, false)
|
||||
scan.toLower(start, end)
|
||||
}
|
||||
for len(scan.token) == 2 && !isAlpha(scan.token[1]) {
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -10,18 +10,17 @@
|
|||
// and provides the user with the best experience
|
||||
// (see https://blog.golang.org/matchlang).
|
||||
//
|
||||
//
|
||||
// Matching preferred against supported languages
|
||||
// # Matching preferred against supported languages
|
||||
//
|
||||
// A Matcher for an application that supports English, Australian English,
|
||||
// Danish, and standard Mandarin can be created as follows:
|
||||
//
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
// var matcher = language.NewMatcher([]language.Tag{
|
||||
// language.English, // The first language is used as fallback.
|
||||
// language.MustParse("en-AU"),
|
||||
// language.Danish,
|
||||
// language.Chinese,
|
||||
// })
|
||||
//
|
||||
// This list of supported languages is typically implied by the languages for
|
||||
// which there exists translations of the user interface.
|
||||
|
@ -30,14 +29,14 @@
|
|||
// language tags.
|
||||
// The MatchString finds best matches for such strings:
|
||||
//
|
||||
// handler(w http.ResponseWriter, r *http.Request) {
|
||||
// lang, _ := r.Cookie("lang")
|
||||
// accept := r.Header.Get("Accept-Language")
|
||||
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||
// handler(w http.ResponseWriter, r *http.Request) {
|
||||
// lang, _ := r.Cookie("lang")
|
||||
// accept := r.Header.Get("Accept-Language")
|
||||
// tag, _ := language.MatchStrings(matcher, lang.String(), accept)
|
||||
//
|
||||
// // tag should now be used for the initialization of any
|
||||
// // locale-specific service.
|
||||
// }
|
||||
// // tag should now be used for the initialization of any
|
||||
// // locale-specific service.
|
||||
// }
|
||||
//
|
||||
// The Matcher's Match method can be used to match Tags directly.
|
||||
//
|
||||
|
@ -48,8 +47,7 @@
|
|||
// For instance, it will know that a reader of Bokmål Danish can read Norwegian
|
||||
// and will know that Cantonese ("yue") is a good match for "zh-HK".
|
||||
//
|
||||
//
|
||||
// Using match results
|
||||
// # Using match results
|
||||
//
|
||||
// To guarantee a consistent user experience to the user it is important to
|
||||
// use the same language tag for the selection of any locale-specific services.
|
||||
|
@ -58,9 +56,9 @@
|
|||
// More subtly confusing is using the wrong sorting order or casing
|
||||
// algorithm for a certain language.
|
||||
//
|
||||
// All the packages in x/text that provide locale-specific services
|
||||
// (e.g. collate, cases) should be initialized with the tag that was
|
||||
// obtained at the start of an interaction with the user.
|
||||
// All the packages in x/text that provide locale-specific services
|
||||
// (e.g. collate, cases) should be initialized with the tag that was
|
||||
// obtained at the start of an interaction with the user.
|
||||
//
|
||||
// Note that Tag that is returned by Match and MatchString may differ from any
|
||||
// of the supported languages, as it may contain carried over settings from
|
||||
|
@ -70,8 +68,7 @@
|
|||
// Match and MatchString both return the index of the matched supported tag
|
||||
// to simplify associating such data with the matched tag.
|
||||
//
|
||||
//
|
||||
// Canonicalization
|
||||
// # Canonicalization
|
||||
//
|
||||
// If one uses the Matcher to compare languages one does not need to
|
||||
// worry about canonicalization.
|
||||
|
@ -92,10 +89,9 @@
|
|||
// equivalence relations. The CanonType type can be used to alter the
|
||||
// canonicalization form.
|
||||
//
|
||||
// References
|
||||
// # References
|
||||
//
|
||||
// BCP 47 - Tags for Identifying Languages http://tools.ietf.org/html/bcp47
|
||||
//
|
||||
package language // import "golang.org/x/text/language"
|
||||
|
||||
// TODO: explanation on how to match languages for your own locale-specific
|
||||
|
|
|
@ -1,39 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !go1.2
|
||||
// +build !go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
func sortStable(s sort.Interface) {
|
||||
ss := stableSort{
|
||||
s: s,
|
||||
pos: make([]int, s.Len()),
|
||||
}
|
||||
for i := range ss.pos {
|
||||
ss.pos[i] = i
|
||||
}
|
||||
sort.Sort(&ss)
|
||||
}
|
||||
|
||||
type stableSort struct {
|
||||
s sort.Interface
|
||||
pos []int
|
||||
}
|
||||
|
||||
func (s *stableSort) Len() int {
|
||||
return len(s.pos)
|
||||
}
|
||||
|
||||
func (s *stableSort) Less(i, j int) bool {
|
||||
return s.s.Less(i, j) || !s.s.Less(j, i) && s.pos[i] < s.pos[j]
|
||||
}
|
||||
|
||||
func (s *stableSort) Swap(i, j int) {
|
||||
s.s.Swap(i, j)
|
||||
s.pos[i], s.pos[j] = s.pos[j], s.pos[i]
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build go1.2
|
||||
// +build go1.2
|
||||
|
||||
package language
|
||||
|
||||
import "sort"
|
||||
|
||||
var sortStable = sort.Stable
|
|
@ -545,7 +545,7 @@ type bestMatch struct {
|
|||
// match as the preferred match.
|
||||
//
|
||||
// If pin is true and have and tag are a strong match, it will henceforth only
|
||||
// consider matches for this language. This corresponds to the nothing that most
|
||||
// consider matches for this language. This corresponds to the idea that most
|
||||
// users have a strong preference for the first defined language. A user can
|
||||
// still prefer a second language over a dialect of the preferred language by
|
||||
// explicitly specifying dialects, e.g. "en, nl, en-GB". In this case pin should
|
||||
|
|
|
@ -6,6 +6,7 @@ package language
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
|
@ -147,6 +148,7 @@ func update(b *language.Builder, part ...interface{}) (err error) {
|
|||
}
|
||||
|
||||
var errInvalidWeight = errors.New("ParseAcceptLanguage: invalid weight")
|
||||
var errTagListTooLarge = errors.New("tag list exceeds max length")
|
||||
|
||||
// ParseAcceptLanguage parses the contents of an Accept-Language header as
|
||||
// defined in http://www.ietf.org/rfc/rfc2616.txt and returns a list of Tags and
|
||||
|
@ -164,6 +166,10 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
|||
}
|
||||
}()
|
||||
|
||||
if strings.Count(s, "-") > 1000 {
|
||||
return nil, nil, errTagListTooLarge
|
||||
}
|
||||
|
||||
var entry string
|
||||
for s != "" {
|
||||
if entry, s = split(s, ','); entry == "" {
|
||||
|
@ -201,7 +207,7 @@ func ParseAcceptLanguage(s string) (tag []Tag, q []float32, err error) {
|
|||
tag = append(tag, t)
|
||||
q = append(q, float32(w))
|
||||
}
|
||||
sortStable(&tagSort{tag, q})
|
||||
sort.Stable(&tagSort{tag, q})
|
||||
return tag, q, nil
|
||||
}
|
||||
|
||||
|
|
|
@ -39,12 +39,12 @@ const (
|
|||
_Hani = 57
|
||||
_Hans = 59
|
||||
_Hant = 60
|
||||
_Qaaa = 143
|
||||
_Qaai = 151
|
||||
_Qabx = 192
|
||||
_Zinh = 245
|
||||
_Zyyy = 250
|
||||
_Zzzz = 251
|
||||
_Qaaa = 147
|
||||
_Qaai = 155
|
||||
_Qabx = 196
|
||||
_Zinh = 252
|
||||
_Zyyy = 257
|
||||
_Zzzz = 258
|
||||
)
|
||||
|
||||
var regionToGroups = []uint8{ // 358 elements
|
||||
|
@ -265,9 +265,9 @@ var matchScript = []scriptIntelligibility{ // 26 elements
|
|||
13: {wantLang: 0x39d, haveLang: 0x139, wantScript: 0x36, haveScript: 0x5a, distance: 0xa},
|
||||
14: {wantLang: 0x3be, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
|
||||
15: {wantLang: 0x3fa, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
|
||||
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xcf, haveScript: 0x5a, distance: 0xa},
|
||||
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xde, haveScript: 0x5a, distance: 0xa},
|
||||
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe1, haveScript: 0x5a, distance: 0xa},
|
||||
16: {wantLang: 0x40c, haveLang: 0x139, wantScript: 0xd4, haveScript: 0x5a, distance: 0xa},
|
||||
17: {wantLang: 0x450, haveLang: 0x139, wantScript: 0xe3, haveScript: 0x5a, distance: 0xa},
|
||||
18: {wantLang: 0x461, haveLang: 0x139, wantScript: 0xe6, haveScript: 0x5a, distance: 0xa},
|
||||
19: {wantLang: 0x46f, haveLang: 0x139, wantScript: 0x2c, haveScript: 0x5a, distance: 0xa},
|
||||
20: {wantLang: 0x476, haveLang: 0x3e2, wantScript: 0x5a, haveScript: 0x20, distance: 0xa},
|
||||
21: {wantLang: 0x4b4, haveLang: 0x139, wantScript: 0x5, haveScript: 0x5a, distance: 0xa},
|
||||
|
|
|
@ -33,7 +33,7 @@ func In(rt *unicode.RangeTable) Set {
|
|||
return setFunc(func(r rune) bool { return unicode.Is(rt, r) })
|
||||
}
|
||||
|
||||
// In creates a Set with a Contains method that returns true for all runes not
|
||||
// NotIn creates a Set with a Contains method that returns true for all runes not
|
||||
// in the given RangeTable.
|
||||
func NotIn(rt *unicode.RangeTable) Set {
|
||||
return setFunc(func(r rune) bool { return !unicode.Is(rt, r) })
|
||||
|
|
|
@ -316,7 +316,7 @@ func (p *Profile) Compare(a, b string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
return bytes.Compare(akey, bkey) == 0
|
||||
return bytes.Equal(akey, bkey)
|
||||
}
|
||||
|
||||
// Allowed returns a runes.Set containing every rune that is a member of the
|
||||
|
|
|
@ -193,14 +193,14 @@ func (p *paragraph) run() {
|
|||
//
|
||||
// At the end of this function:
|
||||
//
|
||||
// - The member variable matchingPDI is set to point to the index of the
|
||||
// matching PDI character for each isolate initiator character. If there is
|
||||
// no matching PDI, it is set to the length of the input text. For other
|
||||
// characters, it is set to -1.
|
||||
// - The member variable matchingIsolateInitiator is set to point to the
|
||||
// index of the matching isolate initiator character for each PDI character.
|
||||
// If there is no matching isolate initiator, or the character is not a PDI,
|
||||
// it is set to -1.
|
||||
// - The member variable matchingPDI is set to point to the index of the
|
||||
// matching PDI character for each isolate initiator character. If there is
|
||||
// no matching PDI, it is set to the length of the input text. For other
|
||||
// characters, it is set to -1.
|
||||
// - The member variable matchingIsolateInitiator is set to point to the
|
||||
// index of the matching isolate initiator character for each PDI character.
|
||||
// If there is no matching isolate initiator, or the character is not a PDI,
|
||||
// it is set to -1.
|
||||
func (p *paragraph) determineMatchingIsolates() {
|
||||
p.matchingPDI = make([]int, p.Len())
|
||||
p.matchingIsolateInitiator = make([]int, p.Len())
|
||||
|
@ -435,7 +435,7 @@ func maxLevel(a, b level) level {
|
|||
}
|
||||
|
||||
// Rule X10, second bullet: Determine the start-of-sequence (sos) and end-of-sequence (eos) types,
|
||||
// either L or R, for each isolating run sequence.
|
||||
// either L or R, for each isolating run sequence.
|
||||
func (p *paragraph) isolatingRunSequence(indexes []int) *isolatingRunSequence {
|
||||
length := len(indexes)
|
||||
types := make([]Class, length)
|
||||
|
@ -495,9 +495,9 @@ func (s *isolatingRunSequence) resolveWeakTypes() {
|
|||
if t == NSM {
|
||||
s.types[i] = precedingCharacterType
|
||||
} else {
|
||||
if t.in(LRI, RLI, FSI, PDI) {
|
||||
precedingCharacterType = ON
|
||||
}
|
||||
// if t.in(LRI, RLI, FSI, PDI) {
|
||||
// precedingCharacterType = ON
|
||||
// }
|
||||
precedingCharacterType = t
|
||||
}
|
||||
}
|
||||
|
@ -905,7 +905,7 @@ func (p *paragraph) getLevels(linebreaks []int) []level {
|
|||
// Lines are concatenated from left to right. So for example, the fifth
|
||||
// character from the left on the third line is
|
||||
//
|
||||
// getReordering(linebreaks)[linebreaks[1] + 4]
|
||||
// getReordering(linebreaks)[linebreaks[1] + 4]
|
||||
//
|
||||
// (linebreaks[1] is the position after the last character of the second
|
||||
// line, which is also the index of the first character on the third line,
|
||||
|
|
|
@ -110,10 +110,11 @@ func (p Properties) BoundaryAfter() bool {
|
|||
}
|
||||
|
||||
// We pack quick check data in 4 bits:
|
||||
// 5: Combines forward (0 == false, 1 == true)
|
||||
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
|
||||
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
|
||||
// 1..0: Number of trailing non-starters.
|
||||
//
|
||||
// 5: Combines forward (0 == false, 1 == true)
|
||||
// 4..3: NFC_QC Yes(00), No (10), or Maybe (11)
|
||||
// 2: NFD_QC Yes (0) or No (1). No also means there is a decomposition.
|
||||
// 1..0: Number of trailing non-starters.
|
||||
//
|
||||
// When all 4 bits are zero, the character is inert, meaning it is never
|
||||
// influenced by normalization.
|
||||
|
|
|
@ -18,16 +18,17 @@ import (
|
|||
// A Form denotes a canonical representation of Unicode code points.
|
||||
// The Unicode-defined normalization and equivalence forms are:
|
||||
//
|
||||
// NFC Unicode Normalization Form C
|
||||
// NFD Unicode Normalization Form D
|
||||
// NFKC Unicode Normalization Form KC
|
||||
// NFKD Unicode Normalization Form KD
|
||||
// NFC Unicode Normalization Form C
|
||||
// NFD Unicode Normalization Form D
|
||||
// NFKC Unicode Normalization Form KC
|
||||
// NFKD Unicode Normalization Form KD
|
||||
//
|
||||
// For a Form f, this documentation uses the notation f(x) to mean
|
||||
// the bytes or string x converted to the given form.
|
||||
// A position n in x is called a boundary if conversion to the form can
|
||||
// proceed independently on both sides:
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// f(x) == append(f(x[0:n]), f(x[n:])...)
|
||||
//
|
||||
// References: https://unicode.org/reports/tr15/ and
|
||||
// https://unicode.org/notes/tn5/.
|
||||
|
|
|
@ -7315,7 +7315,7 @@ const recompMapPacked = "" +
|
|||
"\x00V\x03\x03\x00\x00\x1e|" + // 0x00560303: 0x00001E7C
|
||||
"\x00v\x03\x03\x00\x00\x1e}" + // 0x00760303: 0x00001E7D
|
||||
"\x00V\x03#\x00\x00\x1e~" + // 0x00560323: 0x00001E7E
|
||||
"\x00v\x03#\x00\x00\x1e\u007f" + // 0x00760323: 0x00001E7F
|
||||
"\x00v\x03#\x00\x00\x1e\x7f" + // 0x00760323: 0x00001E7F
|
||||
"\x00W\x03\x00\x00\x00\x1e\x80" + // 0x00570300: 0x00001E80
|
||||
"\x00w\x03\x00\x00\x00\x1e\x81" + // 0x00770300: 0x00001E81
|
||||
"\x00W\x03\x01\x00\x00\x1e\x82" + // 0x00570301: 0x00001E82
|
||||
|
@ -7342,7 +7342,7 @@ const recompMapPacked = "" +
|
|||
"\x00t\x03\b\x00\x00\x1e\x97" + // 0x00740308: 0x00001E97
|
||||
"\x00w\x03\n\x00\x00\x1e\x98" + // 0x0077030A: 0x00001E98
|
||||
"\x00y\x03\n\x00\x00\x1e\x99" + // 0x0079030A: 0x00001E99
|
||||
"\x01\u007f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
|
||||
"\x01\x7f\x03\a\x00\x00\x1e\x9b" + // 0x017F0307: 0x00001E9B
|
||||
"\x00A\x03#\x00\x00\x1e\xa0" + // 0x00410323: 0x00001EA0
|
||||
"\x00a\x03#\x00\x00\x1e\xa1" + // 0x00610323: 0x00001EA1
|
||||
"\x00A\x03\t\x00\x00\x1e\xa2" + // 0x00410309: 0x00001EA2
|
||||
|
|
|
@ -1146,21 +1146,31 @@ var widthIndex = [1408]uint8{
|
|||
}
|
||||
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.
|
||||
var inverseData = [150][4]byte{
|
||||
|
|
|
@ -1158,21 +1158,31 @@ var widthIndex = [1408]uint8{
|
|||
}
|
||||
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.
|
||||
var inverseData = [150][4]byte{
|
||||
|
|
|
@ -1178,21 +1178,31 @@ var widthIndex = [1408]uint8{
|
|||
}
|
||||
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.
|
||||
var inverseData = [150][4]byte{
|
||||
|
|
|
@ -1179,21 +1179,31 @@ var widthIndex = [1408]uint8{
|
|||
}
|
||||
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.
|
||||
var inverseData = [150][4]byte{
|
||||
|
|
|
@ -1114,21 +1114,31 @@ var widthIndex = [1408]uint8{
|
|||
}
|
||||
|
||||
// inverseData contains 4-byte entries of the following format:
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// <length> <modified UTF-8-encoded rune> <0 padding>
|
||||
//
|
||||
// The last byte of the UTF-8-encoded rune is xor-ed with the last byte of the
|
||||
// UTF-8 encoding of the original rune. Mappings often have the following
|
||||
// pattern:
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// A -> A (U+FF21 -> U+0041)
|
||||
// B -> B (U+FF22 -> U+0042)
|
||||
// ...
|
||||
//
|
||||
// By xor-ing the last byte the same entry can be shared by many mappings. This
|
||||
// reduces the total number of distinct entries by about two thirds.
|
||||
// The resulting entry for the aforementioned mappings is
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// { 0x01, 0xE0, 0x00, 0x00 }
|
||||
//
|
||||
// Using this entry to map U+FF21 (UTF-8 [EF BC A1]), we get
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// E0 ^ A1 = 41.
|
||||
//
|
||||
// Similarly, for U+FF22 (UTF-8 [EF BC A2]), we get
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// E0 ^ A2 = 42.
|
||||
//
|
||||
// Note that because of the xor-ing, the byte sequence stored in the entry is
|
||||
// not valid UTF-8.
|
||||
var inverseData = [150][4]byte{
|
||||
|
|
|
@ -683,7 +683,7 @@ golang.org/x/sys/execabs
|
|||
golang.org/x/sys/internal/unsafeheader
|
||||
golang.org/x/sys/unix
|
||||
golang.org/x/sys/windows
|
||||
# golang.org/x/text v0.3.7
|
||||
# golang.org/x/text v0.4.0
|
||||
## explicit; go 1.17
|
||||
golang.org/x/text/cases
|
||||
golang.org/x/text/internal
|
||||
|
|
Loading…
Reference in New Issue