mirror of
1
Fork 0
forgejo/vendor/github.com/yuin/goldmark/util/util.go

898 lines
25 KiB
Go

// Package util provides utility functions for the goldmark.
package util
import (
"bytes"
"io"
"net/url"
"regexp"
"sort"
"strconv"
"strings"
"unicode/utf8"
)
// A CopyOnWriteBuffer is a byte buffer that copies buffer when
// it need to be changed.
type CopyOnWriteBuffer struct {
buffer []byte
copied bool
}
// NewCopyOnWriteBuffer returns a new CopyOnWriteBuffer.
func NewCopyOnWriteBuffer(buffer []byte) CopyOnWriteBuffer {
return CopyOnWriteBuffer{
buffer: buffer,
copied: false,
}
}
// Write writes given bytes to the buffer.
func (b *CopyOnWriteBuffer) Write(value []byte) {
if !b.copied {
b.buffer = make([]byte, 0, len(b.buffer)+20)
b.copied = true
}
b.buffer = append(b.buffer, value...)
}
// WriteByte writes the given byte to the buffer.
func (b *CopyOnWriteBuffer) WriteByte(c byte) {
if !b.copied {
b.buffer = make([]byte, 0, len(b.buffer)+20)
b.copied = true
}
b.buffer = append(b.buffer, c)
}
// Bytes returns bytes of this buffer.
func (b *CopyOnWriteBuffer) Bytes() []byte {
return b.buffer
}
// IsCopied returns true if buffer has been copied, otherwise false.
func (b *CopyOnWriteBuffer) IsCopied() bool {
return b.copied
}
// IsEscapedPunctuation returns true if caracter at a given index i
// is an escaped punctuation, otherwise false.
func IsEscapedPunctuation(source []byte, i int) bool {
return source[i] == '\\' && i < len(source)-1 && IsPunct(source[i+1])
}
// ReadWhile read the given source while pred is true.
func ReadWhile(source []byte, index [2]int, pred func(byte) bool) (int, bool) {
j := index[0]
ok := false
for ; j < index[1]; j++ {
c1 := source[j]
if pred(c1) {
ok = true
continue
}
break
}
return j, ok
}
// IsBlank returns true if the given string is all space characters.
func IsBlank(bs []byte) bool {
for _, b := range bs {
if !IsSpace(b) {
return false
}
}
return true
}
// VisualizeSpaces visualize invisible space characters.
func VisualizeSpaces(bs []byte) []byte {
bs = bytes.Replace(bs, []byte(" "), []byte("[SPACE]"), -1)
bs = bytes.Replace(bs, []byte("\t"), []byte("[TAB]"), -1)
bs = bytes.Replace(bs, []byte("\n"), []byte("[NEWLINE]\n"), -1)
bs = bytes.Replace(bs, []byte("\r"), []byte("[CR]"), -1)
return bs
}
// TabWidth calculates actual width of a tab at the given position.
func TabWidth(currentPos int) int {
return 4 - currentPos%4
}
// IndentPosition searches an indent position with the given width for the given line.
// If the line contains tab characters, paddings may be not zero.
// currentPos==0 and width==2:
//
// position: 0 1
// [TAB]aaaa
// width: 1234 5678
//
// width=2 is in the tab character. In this case, IndentPosition returns
// (pos=1, padding=2)
func IndentPosition(bs []byte, currentPos, width int) (pos, padding int) {
if width == 0 {
return 0, 0
}
w := 0
l := len(bs)
i := 0
hasTab := false
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
hasTab = true
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
if !hasTab {
return width, 0
}
return i, w - width
}
return -1, -1
}
// IndentPositionPadding searches an indent position with the given width for the given line.
// This function is mostly same as IndentPosition except this function
// takes account into additional paddings.
func IndentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
if width == 0 {
return 0, paddingv
}
w := 0
i := 0
l := len(bs)
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
return i - paddingv, w - width
}
return -1, -1
}
// DedentPosition dedents lines by the given width.
func DedentPosition(bs []byte, currentPos, width int) (pos, padding int) {
if width == 0 {
return 0, 0
}
w := 0
l := len(bs)
i := 0
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
return i, w - width
}
return i, 0
}
// DedentPositionPadding dedents lines by the given width.
// This function is mostly same as DedentPosition except this function
// takes account into additional paddings.
func DedentPositionPadding(bs []byte, currentPos, paddingv, width int) (pos, padding int) {
if width == 0 {
return 0, paddingv
}
w := 0
i := 0
l := len(bs)
for ; i < l; i++ {
if bs[i] == '\t' {
w += TabWidth(currentPos + w)
} else if bs[i] == ' ' {
w++
} else {
break
}
}
if w >= width {
return i - paddingv, w - width
}
return i - paddingv, 0
}
// IndentWidth calculate an indent width for the given line.
func IndentWidth(bs []byte, currentPos int) (width, pos int) {
l := len(bs)
for i := 0; i < l; i++ {
b := bs[i]
if b == ' ' {
width++
pos++
} else if b == '\t' {
width += TabWidth(currentPos + width)
pos++
} else {
break
}
}
return
}
// FirstNonSpacePosition returns a potisoin line that is a first nonspace
// character.
func FirstNonSpacePosition(bs []byte) int {
i := 0
for ; i < len(bs); i++ {
c := bs[i]
if c == ' ' || c == '\t' {
continue
}
if c == '\n' {
return -1
}
return i
}
return -1
}
// FindClosure returns a position that closes the given opener.
// If codeSpan is set true, it ignores characters in code spans.
// If allowNesting is set true, closures correspond to nested opener will be
// ignored.
func FindClosure(bs []byte, opener, closure byte, codeSpan, allowNesting bool) int {
i := 0
opened := 1
codeSpanOpener := 0
for i < len(bs) {
c := bs[i]
if codeSpan && codeSpanOpener != 0 && c == '`' {
codeSpanCloser := 0
for ; i < len(bs); i++ {
if bs[i] == '`' {
codeSpanCloser++
} else {
i--
break
}
}
if codeSpanCloser == codeSpanOpener {
codeSpanOpener = 0
}
} else if c == '\\' && i < len(bs)-1 && IsPunct(bs[i+1]) {
i += 2
continue
} else if codeSpan && codeSpanOpener == 0 && c == '`' {
for ; i < len(bs); i++ {
if bs[i] == '`' {
codeSpanOpener++
} else {
i--
break
}
}
} else if (codeSpan && codeSpanOpener == 0) || !codeSpan {
if c == closure {
opened--
if opened == 0 {
return i
}
} else if c == opener {
if !allowNesting {
return -1
}
opened++
}
}
i++
}
return -1
}
// TrimLeft trims characters in the given s from head of the source.
// bytes.TrimLeft offers same functionalities, but bytes.TrimLeft
// allocates new buffer for the result.
func TrimLeft(source, b []byte) []byte {
i := 0
for ; i < len(source); i++ {
c := source[i]
found := false
for j := 0; j < len(b); j++ {
if c == b[j] {
found = true
break
}
}
if !found {
break
}
}
return source[i:]
}
// TrimRight trims characters in the given s from tail of the source.
func TrimRight(source, b []byte) []byte {
i := len(source) - 1
for ; i >= 0; i-- {
c := source[i]
found := false
for j := 0; j < len(b); j++ {
if c == b[j] {
found = true
break
}
}
if !found {
break
}
}
return source[:i+1]
}
// TrimLeftLength returns a length of leading specified characters.
func TrimLeftLength(source, s []byte) int {
return len(source) - len(TrimLeft(source, s))
}
// TrimRightLength returns a length of trailing specified characters.
func TrimRightLength(source, s []byte) int {
return len(source) - len(TrimRight(source, s))
}
// TrimLeftSpaceLength returns a length of leading space characters.
func TrimLeftSpaceLength(source []byte) int {
i := 0
for ; i < len(source); i++ {
if !IsSpace(source[i]) {
break
}
}
return i
}
// TrimRightSpaceLength returns a length of trailing space characters.
func TrimRightSpaceLength(source []byte) int {
l := len(source)
i := l - 1
for ; i >= 0; i-- {
if !IsSpace(source[i]) {
break
}
}
if i < 0 {
return l
}
return l - 1 - i
}
// TrimLeftSpace returns a subslice of the given string by slicing off all leading
// space characters.
func TrimLeftSpace(source []byte) []byte {
return TrimLeft(source, spaces)
}
// TrimRightSpace returns a subslice of the given string by slicing off all trailing
// space characters.
func TrimRightSpace(source []byte) []byte {
return TrimRight(source, spaces)
}
// ReplaceSpaces replaces sequence of spaces with the given repl.
func ReplaceSpaces(source []byte, repl byte) []byte {
var ret []byte
start := -1
for i, c := range source {
iss := IsSpace(c)
if start < 0 && iss {
start = i
continue
} else if start >= 0 && iss {
continue
} else if start >= 0 {
if ret == nil {
ret = make([]byte, 0, len(source))
ret = append(ret, source[:start]...)
}
ret = append(ret, repl)
start = -1
}
if ret != nil {
ret = append(ret, c)
}
}
if start >= 0 && ret != nil {
ret = append(ret, repl)
}
if ret == nil {
return source
}
return ret
}
// ToRune decode given bytes start at pos and returns a rune.
func ToRune(source []byte, pos int) rune {
i := pos
for ; i >= 0; i-- {
if utf8.RuneStart(source[i]) {
break
}
}
r, _ := utf8.DecodeRune(source[i:])
return r
}
// ToValidRune returns 0xFFFD if the given rune is invalid, otherwise v.
func ToValidRune(v rune) rune {
if v == 0 || !utf8.ValidRune(v) {
return rune(0xFFFD)
}
return v
}
// ToLinkReference convert given bytes into a valid link reference string.
// ToLinkReference trims leading and trailing spaces and convert into lower
// case and replace spaces with a single space character.
func ToLinkReference(v []byte) string {
v = TrimLeftSpace(v)
v = TrimRightSpace(v)
return strings.ToLower(string(ReplaceSpaces(v, ' ')))
}
var htmlEscapeTable = [256][]byte{nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&quot;"), nil, nil, nil, []byte("&amp;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, []byte("&lt;"), nil, []byte("&gt;"), nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil}
// EscapeHTMLByte returns HTML escaped bytes if the given byte should be escaped,
// otherwise nil.
func EscapeHTMLByte(b byte) []byte {
return htmlEscapeTable[b]
}
// EscapeHTML escapes characters that should be escaped in HTML text.
func EscapeHTML(v []byte) []byte {
cob := NewCopyOnWriteBuffer(v)
n := 0
for i := 0; i < len(v); i++ {
c := v[i]
escaped := htmlEscapeTable[c]
if escaped != nil {
cob.Write(v[n:i])
cob.Write(escaped)
n = i + 1
}
}
if cob.IsCopied() {
cob.Write(v[n:])
}
return cob.Bytes()
}
// UnescapePunctuations unescapes blackslash escaped punctuations.
func UnescapePunctuations(source []byte) []byte {
cob := NewCopyOnWriteBuffer(source)
limit := len(source)
n := 0
for i := 0; i < limit; {
c := source[i]
if i < limit-1 && c == '\\' && IsPunct(source[i+1]) {
cob.Write(source[n:i])
cob.WriteByte(source[i+1])
i += 2
n = i
continue
}
i++
}
if cob.IsCopied() {
cob.Write(source[n:])
}
return cob.Bytes()
}
// ResolveNumericReferences resolve numeric references like '&#1234;" .
func ResolveNumericReferences(source []byte) []byte {
cob := NewCopyOnWriteBuffer(source)
buf := make([]byte, 6, 6)
limit := len(source)
ok := false
n := 0
for i := 0; i < limit; i++ {
if source[i] == '&' {
pos := i
next := i + 1
if next < limit && source[next] == '#' {
nnext := next + 1
if nnext < limit {
nc := source[nnext]
// code point like #x22;
if nnext < limit && nc == 'x' || nc == 'X' {
start := nnext + 1
i, ok = ReadWhile(source, [2]int{start, limit}, IsHexDecimal)
if ok && i < limit && source[i] == ';' {
v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 16, 32)
cob.Write(source[n:pos])
n = i + 1
runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
cob.Write(buf[:runeSize])
continue
}
// code point like #1234;
} else if nc >= '0' && nc <= '9' {
start := nnext
i, ok = ReadWhile(source, [2]int{start, limit}, IsNumeric)
if ok && i < limit && i-start < 8 && source[i] == ';' {
v, _ := strconv.ParseUint(BytesToReadOnlyString(source[start:i]), 0, 32)
cob.Write(source[n:pos])
n = i + 1
runeSize := utf8.EncodeRune(buf, ToValidRune(rune(v)))
cob.Write(buf[:runeSize])
continue
}
}
}
}
i = next - 1
}
}
if cob.IsCopied() {
cob.Write(source[n:])
}
return cob.Bytes()
}
// ResolveEntityNames resolve entity references like '&ouml;" .
func ResolveEntityNames(source []byte) []byte {
cob := NewCopyOnWriteBuffer(source)
limit := len(source)
ok := false
n := 0
for i := 0; i < limit; i++ {
if source[i] == '&' {
pos := i
next := i + 1
if !(next < limit && source[next] == '#') {
start := next
i, ok = ReadWhile(source, [2]int{start, limit}, IsAlphaNumeric)
if ok && i < limit && source[i] == ';' {
name := BytesToReadOnlyString(source[start:i])
entity, ok := LookUpHTML5EntityByName(name)
if ok {
cob.Write(source[n:pos])
n = i + 1
cob.Write(entity.Characters)
continue
}
}
}
i = next - 1
}
}
if cob.IsCopied() {
cob.Write(source[n:])
}
return cob.Bytes()
}
var htmlSpace = []byte("%20")
// URLEscape escape the given URL.
// If resolveReference is set true:
// 1. unescape punctuations
// 2. resolve numeric references
// 3. resolve entity references
//
// URL encoded values (%xx) are keeped as is.
func URLEscape(v []byte, resolveReference bool) []byte {
if resolveReference {
v = UnescapePunctuations(v)
v = ResolveNumericReferences(v)
v = ResolveEntityNames(v)
}
cob := NewCopyOnWriteBuffer(v)
limit := len(v)
n := 0
for i := 0; i < limit; {
c := v[i]
if urlEscapeTable[c] == 1 {
i++
continue
}
if c == '%' && i+2 < limit && IsHexDecimal(v[i+1]) && IsHexDecimal(v[i+1]) {
i += 3
continue
}
u8len := utf8lenTable[c]
if u8len == 99 { // invalid utf8 leading byte, skip it
i++
continue
}
if c == ' ' {
cob.Write(v[n:i])
cob.Write(htmlSpace)
i++
n = i
continue
}
if int(u8len) >= len(v) {
u8len = int8(len(v) - 1)
}
if u8len == 0 {
i++
n = i
continue
}
cob.Write(v[n:i])
stop := i + int(u8len)
if stop > len(v) {
i++
n = i
continue
}
cob.Write(StringToReadOnlyBytes(url.QueryEscape(string(v[i:stop]))))
i += int(u8len)
n = i
}
if cob.IsCopied() && n < limit {
cob.Write(v[n:])
}
return cob.Bytes()
}
// FindURLIndex returns a stop index value if the given bytes seem an URL.
// This function is equivalent to [A-Za-z][A-Za-z0-9.+-]{1,31}:[^<>\x00-\x20]* .
func FindURLIndex(b []byte) int {
i := 0
if !(len(b) > 0 && urlTable[b[i]]&7 == 7) {
return -1
}
i++
for ; i < len(b); i++ {
c := b[i]
if urlTable[c]&4 != 4 {
break
}
}
if i == 1 || i > 33 || i >= len(b) {
return -1
}
if b[i] != ':' {
return -1
}
i++
for ; i < len(b); i++ {
c := b[i]
if urlTable[c]&1 != 1 {
break
}
}
return i
}
var emailDomainRegexp = regexp.MustCompile(`^[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*`)
// FindEmailIndex returns a stop index value if the given bytes seem an email address.
func FindEmailIndex(b []byte) int {
// TODO: eliminate regexps
i := 0
for ; i < len(b); i++ {
c := b[i]
if emailTable[c]&1 != 1 {
break
}
}
if i == 0 {
return -1
}
if i >= len(b) || b[i] != '@' {
return -1
}
i++
if i >= len(b) {
return -1
}
match := emailDomainRegexp.FindSubmatchIndex(b[i:])
if match == nil {
return -1
}
return i + match[1]
}
var spaces = []byte(" \t\n\x0b\x0c\x0d")
var spaceTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var punctTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
// a-zA-Z0-9, ;/?:@&=+$,-_.!~*'()#
var urlEscapeTable = [256]int8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
var utf8lenTable = [256]int8{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 99, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 99, 99, 99, 99, 99, 99, 99, 99}
var urlTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 5, 1, 5, 5, 1, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 0, 1, 0, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
var emailTable = [256]uint8{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
// UTF8Len returns a byte length of the utf-8 character.
func UTF8Len(b byte) int8 {
return utf8lenTable[b]
}
// IsPunct returns true if the given character is a punctuation, otherwise false.
func IsPunct(c byte) bool {
return punctTable[c] == 1
}
// IsSpace returns true if the given character is a space, otherwise false.
func IsSpace(c byte) bool {
return spaceTable[c] == 1
}
// IsNumeric returns true if the given character is a numeric, otherwise false.
func IsNumeric(c byte) bool {
return c >= '0' && c <= '9'
}
// IsHexDecimal returns true if the given character is a hexdecimal, otherwise false.
func IsHexDecimal(c byte) bool {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F'
}
// IsAlphaNumeric returns true if the given character is a alphabet or a numeric, otherwise false.
func IsAlphaNumeric(c byte) bool {
return c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9'
}
// A BufWriter is a subset of the bufio.Writer .
type BufWriter interface {
io.Writer
Available() int
Buffered() int
Flush() error
WriteByte(c byte) error
WriteRune(r rune) (size int, err error)
WriteString(s string) (int, error)
}
// A PrioritizedValue struct holds pair of an arbitrary value and a priority.
type PrioritizedValue struct {
// Value is an arbitrary value that you want to prioritize.
Value interface{}
// Priority is a priority of the value.
Priority int
}
// PrioritizedSlice is a slice of the PrioritizedValues
type PrioritizedSlice []PrioritizedValue
// Sort sorts the PrioritizedSlice in ascending order.
func (s PrioritizedSlice) Sort() {
sort.Slice(s, func(i, j int) bool {
return s[i].Priority < s[j].Priority
})
}
// Remove removes the given value from this slice.
func (s PrioritizedSlice) Remove(v interface{}) PrioritizedSlice {
i := 0
found := false
for ; i < len(s); i++ {
if s[i].Value == v {
found = true
break
}
}
if !found {
return s
}
return append(s[:i], s[i+1:]...)
}
// Prioritized returns a new PrioritizedValue.
func Prioritized(v interface{}, priority int) PrioritizedValue {
return PrioritizedValue{v, priority}
}
func bytesHash(b []byte) uint64 {
var hash uint64 = 5381
for _, c := range b {
hash = ((hash << 5) + hash) + uint64(c)
}
return hash
}
// BytesFilter is a efficient data structure for checking whether bytes exist or not.
// BytesFilter is thread-safe.
type BytesFilter interface {
// Add adds given bytes to this set.
Add([]byte)
// Contains return true if this set contains given bytes, otherwise false.
Contains([]byte) bool
// Extend copies this filter and adds given bytes to new filter.
Extend(...[]byte) BytesFilter
}
type bytesFilter struct {
chars [256]uint8
threshold int
slots [][][]byte
}
// NewBytesFilter returns a new BytesFilter.
func NewBytesFilter(elements ...[]byte) BytesFilter {
s := &bytesFilter{
threshold: 3,
slots: make([][][]byte, 64),
}
for _, element := range elements {
s.Add(element)
}
return s
}
func (s *bytesFilter) Add(b []byte) {
l := len(b)
m := s.threshold
if l < s.threshold {
m = l
}
for i := 0; i < m; i++ {
s.chars[b[i]] |= 1 << uint8(i)
}
h := bytesHash(b) % uint64(len(s.slots))
slot := s.slots[h]
if slot == nil {
slot = [][]byte{}
}
s.slots[h] = append(slot, b)
}
func (s *bytesFilter) Extend(bs ...[]byte) BytesFilter {
newFilter := NewBytesFilter().(*bytesFilter)
newFilter.chars = s.chars
newFilter.threshold = s.threshold
for k, v := range s.slots {
newSlot := make([][]byte, len(v))
copy(newSlot, v)
newFilter.slots[k] = v
}
for _, b := range bs {
newFilter.Add(b)
}
return newFilter
}
func (s *bytesFilter) Contains(b []byte) bool {
l := len(b)
m := s.threshold
if l < s.threshold {
m = l
}
for i := 0; i < m; i++ {
if (s.chars[b[i]] & (1 << uint8(i))) == 0 {
return false
}
}
h := bytesHash(b) % uint64(len(s.slots))
slot := s.slots[h]
if slot == nil || len(slot) == 0 {
return false
}
for _, element := range slot {
if bytes.Equal(element, b) {
return true
}
}
return false
}