diff --git a/go.mod b/go.mod index 71509ee6a..75956c7d6 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,7 @@ go 1.17 require ( codeberg.org/gruf/go-runners v1.2.0 - codeberg.org/gruf/go-store v1.1.5 + codeberg.org/gruf/go-store v1.2.2 github.com/ReneKroon/ttlcache v1.7.0 github.com/buckket/go-blurhash v1.1.0 github.com/coreos/go-oidc/v3 v3.1.0 @@ -46,8 +46,9 @@ require ( require ( codeberg.org/gruf/go-bytes v1.0.2 // indirect - codeberg.org/gruf/go-errors v1.0.4 // indirect + codeberg.org/gruf/go-errors v1.0.5 // indirect codeberg.org/gruf/go-fastpath v1.0.2 // indirect + codeberg.org/gruf/go-format v1.0.3 // indirect codeberg.org/gruf/go-hashenc v1.0.1 // indirect codeberg.org/gruf/go-logger v1.3.2 // indirect codeberg.org/gruf/go-mutexes v1.0.1 // indirect @@ -107,6 +108,7 @@ require ( github.com/ugorji/go/codec v1.2.6 // indirect github.com/vmihailenco/msgpack/v5 v5.3.5 // indirect github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect + github.com/zeebo/blake3 v0.2.1 // indirect golang.org/x/mod v0.5.1 // indirect golang.org/x/net v0.0.0-20211209124913-491a49abca63 // indirect golang.org/x/sys v0.0.0-20211210111614-af8b64212486 // indirect diff --git a/go.sum b/go.sum index 05d7597e1..082158f63 100644 --- a/go.sum +++ b/go.sum @@ -53,9 +53,13 @@ codeberg.org/gruf/go-bytes v1.0.2/go.mod h1:1v/ibfaosfXSZtRdW2rWaVrDXMc9E3bsi/M9 codeberg.org/gruf/go-cache v1.1.2/go.mod h1:/Dbc+xU72Op3hMn6x2PXF3NE9uIDFeS+sXPF00hN/7o= codeberg.org/gruf/go-errors v1.0.4 h1:jOJCn/GMb6ELLRVlnmpimGRC2CbTreH5/CBZNWh9GZA= codeberg.org/gruf/go-errors v1.0.4/go.mod h1:rJ08LdIE79Jg8vZ2TGylz/I+tZ1UuMJkGK5mNambIfQ= +codeberg.org/gruf/go-errors v1.0.5 h1:rxV70oQkfasUdggLHxOX2QAoJOMFM7XWxHQR45Zx/Fg= +codeberg.org/gruf/go-errors v1.0.5/go.mod h1:n03EpmvcmfzU3/xJKC0XXtleXXJUNFpT2fgISODvZ1Y= codeberg.org/gruf/go-fastpath v1.0.1/go.mod h1:edveE/Kp3Eqi0JJm0lXYdkVrB28cNUkcb/bRGFTPqeI= codeberg.org/gruf/go-fastpath v1.0.2 h1:O3nuYPMXnN89dsgAwVFU5iCGINtPJdITWmbRe2an/iQ= codeberg.org/gruf/go-fastpath v1.0.2/go.mod h1:edveE/Kp3Eqi0JJm0lXYdkVrB28cNUkcb/bRGFTPqeI= +codeberg.org/gruf/go-format v1.0.3 h1:WoUGzTwZe6SIhILNvtr0qNIA7BOOCgdBlk5bUrfeiio= +codeberg.org/gruf/go-format v1.0.3/go.mod h1:k3TLXp1dqAXdDqxlon0yEM+3FFHdNn0D6BVJTwTy5As= codeberg.org/gruf/go-hashenc v1.0.1 h1:EBvNe2wW8IPMUqT1XihB6/IM6KMJDLMFBxIUvmsy1f8= codeberg.org/gruf/go-hashenc v1.0.1/go.mod h1:IfHhPCVScOiYmJLqdCQT9bYVS1nxNTV4ewMUvFWDPtc= codeberg.org/gruf/go-logger v1.3.1/go.mod h1:tBduUc+Yb9vqGRxY9/FB0ZlYznSteLy/KmIANo7zFjA= @@ -74,6 +78,8 @@ codeberg.org/gruf/go-runners v1.2.0 h1:tkoPrwYMkVg1o/C4PGTR1YbC11XX4r06uLPOYajBs codeberg.org/gruf/go-runners v1.2.0/go.mod h1:9gTrmMnO3d+50C+hVzcmGBf+zTuswReS278E2EMvnmw= codeberg.org/gruf/go-store v1.1.5 h1:fp28vzGD15OsAF51CCwi7woH+Y3vb0aMl4OFh9JSjA0= codeberg.org/gruf/go-store v1.1.5/go.mod h1:Q6ev500ddKghDQ8KS4IstL/W9fptDKa2T9oeHP+tXsI= +codeberg.org/gruf/go-store v1.2.2 h1:YJPzJpZv/D3t9hQC00/u76eQDScQw4++OWjfobnjHAA= +codeberg.org/gruf/go-store v1.2.2/go.mod h1:Xjw1U098th0yXF2CCx6jThQ+9FIPWAX9OGjYslO+UtE= dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= @@ -734,6 +740,10 @@ github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= github.com/yuin/goldmark v1.4.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/zeebo/assert v1.1.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= +github.com/zeebo/blake3 v0.2.1 h1:O+N0Y8Re2XAYjp0adlZDA2juyRguhMfPCgh8YIf7vyE= +github.com/zeebo/blake3 v0.2.1/go.mod h1:TSQ0KjMH+pht+bRyvVooJ1rBpvvngSGaPISafq9MxJk= +github.com/zeebo/pcg v1.0.1/go.mod h1:09F0S9iiKrwn9rlI5yjLkmrug154/YRW6KnnXVDM/l4= github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q= go.etcd.io/etcd/api/v3 v3.5.0/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= go.etcd.io/etcd/api/v3 v3.5.1/go.mod h1:cbVKeC6lCfl7j/8jBhAK6aIYO9XOjdptoxU/nLQcPvs= @@ -949,6 +959,7 @@ golang.org/x/sys v0.0.0-20200523222454-059865788121/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20200803210538-64077c9b5642/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200905004654-be1d3432aa8f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201014080544-cc95f250f6bc/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201126233918-771906719818/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201201145000-ef89a241ccb3/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= diff --git a/vendor/codeberg.org/gruf/go-errors/data.go b/vendor/codeberg.org/gruf/go-errors/data.go index 3b242f03c..b5226172c 100644 --- a/vendor/codeberg.org/gruf/go-errors/data.go +++ b/vendor/codeberg.org/gruf/go-errors/data.go @@ -4,17 +4,9 @@ import ( "fmt" "sync" - "codeberg.org/gruf/go-bytes" - "codeberg.org/gruf/go-logger" + "codeberg.org/gruf/go-format" ) -// global logfmt data formatter. -var logfmt = logger.TextFormat{ - Strict: false, - Verbose: true, - MaxDepth: 5, -} - // KV is a structure for setting key-value pairs in ErrorData. type KV struct { Key string @@ -31,7 +23,7 @@ type ErrorData interface { Append(...KV) // Implement byte slice representation formatter. - logger.Formattable + format.Formattable // Implement string representation formatter. fmt.Stringer @@ -89,13 +81,22 @@ func (d *errorData) Append(kvs ...KV) { } func (d *errorData) AppendFormat(b []byte) []byte { - buf := bytes.Buffer{B: b} + buf := format.Buffer{B: b} d.mu.Lock() buf.B = append(buf.B, '{') + + // Append data as kv pairs for i := range d.data { - logfmt.AppendKey(&buf, d.data[i].Key) - logfmt.AppendValue(&buf, d.data[i].Value) + key := d.data[i].Key + val := d.data[i].Value + format.Appendf(&buf, "{:k}={:v} ", key, val) } + + // Drop trailing space + if len(d.data) > 0 { + buf.Truncate(1) + } + buf.B = append(buf.B, '}') d.mu.Unlock() return buf.B diff --git a/vendor/codeberg.org/gruf/go-logger/LICENSE b/vendor/codeberg.org/gruf/go-format/LICENSE similarity index 100% rename from vendor/codeberg.org/gruf/go-logger/LICENSE rename to vendor/codeberg.org/gruf/go-format/LICENSE diff --git a/vendor/codeberg.org/gruf/go-format/README.md b/vendor/codeberg.org/gruf/go-format/README.md new file mode 100644 index 000000000..7126e215e --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/README.md @@ -0,0 +1,16 @@ +# go-format + +String formatting package using Rust-style formatting directives. + +Output is generally more visually-friendly than `"fmt"`, while performance is neck-and-neck. + +README is WIP. + +## todos + +- improved verbose printing of number types + +- more test cases + +- improved verbose printing of string ptr types + diff --git a/vendor/codeberg.org/gruf/go-format/buffer.go b/vendor/codeberg.org/gruf/go-format/buffer.go new file mode 100644 index 000000000..393f2fcd3 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/buffer.go @@ -0,0 +1,81 @@ +package format + +import ( + "io" + "unicode/utf8" + "unsafe" +) + +// ensure we conform to io.Writer. +var _ io.Writer = (*Buffer)(nil) + +// Buffer is a simple wrapper around a byte slice. +type Buffer struct { + B []byte +} + +// Write will append given byte slice to buffer, fulfilling io.Writer. +func (buf *Buffer) Write(b []byte) (int, error) { + buf.B = append(buf.B, b...) + return len(b), nil +} + +// AppendByte appends given byte to the buffer. +func (buf *Buffer) AppendByte(b byte) { + buf.B = append(buf.B, b) +} + +// AppendRune appends given rune to the buffer. +func (buf *Buffer) AppendRune(r rune) { + if r < utf8.RuneSelf { + buf.B = append(buf.B, byte(r)) + return + } + + l := buf.Len() + for i := 0; i < utf8.UTFMax; i++ { + buf.B = append(buf.B, 0) + } + n := utf8.EncodeRune(buf.B[l:buf.Len()], r) + buf.B = buf.B[:l+n] +} + +// Append will append given byte slice to the buffer. +func (buf *Buffer) Append(b []byte) { + buf.B = append(buf.B, b...) +} + +// AppendString appends given string to the buffer. +func (buf *Buffer) AppendString(s string) { + buf.B = append(buf.B, s...) +} + +// Len returns the length of the buffer's underlying byte slice. +func (buf *Buffer) Len() int { + return len(buf.B) +} + +// Cap returns the capacity of the buffer's underlying byte slice. +func (buf *Buffer) Cap() int { + return cap(buf.B) +} + +// Truncate will reduce the length of the buffer by 'n'. +func (buf *Buffer) Truncate(n int) { + if n > len(buf.B) { + n = len(buf.B) + } + buf.B = buf.B[:buf.Len()-n] +} + +// Reset will reset the buffer length to 0 (retains capacity). +func (buf *Buffer) Reset() { + buf.B = buf.B[:0] +} + +// String returns the underlying byte slice as a string. Please note +// this value is tied directly to the underlying byte slice, if you +// write to the buffer then returned string values will also change. +func (buf *Buffer) String() string { + return *(*string)(unsafe.Pointer(&buf.B)) +} diff --git a/vendor/codeberg.org/gruf/go-format/format.go b/vendor/codeberg.org/gruf/go-format/format.go new file mode 100644 index 000000000..856fe890e --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/format.go @@ -0,0 +1,565 @@ +package format + +import ( + "reflect" + "strconv" + "unsafe" +) + +// Formattable defines a type capable of being formatted and appended to a byte buffer. +type Formattable interface { + AppendFormat([]byte) []byte +} + +// format is the object passed among the append___ formatting functions. +type format struct { + flags uint8 // 'isKey' and 'verbose' flags + drefs uint8 // current value deref count + curd uint8 // current depth + maxd uint8 // maximum depth + buf *Buffer // out buffer +} + +const ( + // flag bit constants. + isKeyBit = uint8(1) << 0 + isValBit = uint8(1) << 1 + vboseBit = uint8(1) << 2 + panicBit = uint8(1) << 3 +) + +// AtMaxDepth returns whether format is currently at max depth. +func (f format) AtMaxDepth() bool { + return f.curd > f.maxd +} + +// Derefs returns no. times current value has been dereferenced. +func (f format) Derefs() uint8 { + return f.drefs +} + +// IsKey returns whether the isKey flag is set. +func (f format) IsKey() bool { + return (f.flags & isKeyBit) != 0 +} + +// IsValue returns whether the isVal flag is set. +func (f format) IsValue() bool { + return (f.flags & isValBit) != 0 +} + +// Verbose returns whether the verbose flag is set. +func (f format) Verbose() bool { + return (f.flags & vboseBit) != 0 +} + +// Panic returns whether the panic flag is set. +func (f format) Panic() bool { + return (f.flags & panicBit) != 0 +} + +// SetIsKey returns format instance with the isKey bit set to value. +func (f format) SetIsKey() format { + return format{ + flags: f.flags & ^isValBit | isKeyBit, + curd: f.curd, + maxd: f.maxd, + buf: f.buf, + } +} + +// SetIsValue returns format instance with the isVal bit set to value. +func (f format) SetIsValue() format { + return format{ + flags: f.flags & ^isKeyBit | isValBit, + curd: f.curd, + maxd: f.maxd, + buf: f.buf, + } +} + +// SetPanic returns format instance with the panic bit set to value. +func (f format) SetPanic() format { + return format{ + flags: f.flags | panicBit /* handle panic as value */ | isValBit & ^isKeyBit, + curd: f.curd, + maxd: f.maxd, + buf: f.buf, + } +} + +// IncrDepth returns format instance with depth incremented. +func (f format) IncrDepth() format { + return format{ + flags: f.flags, + curd: f.curd + 1, + maxd: f.maxd, + buf: f.buf, + } +} + +// IncrDerefs returns format instance with dereference count incremented. +func (f format) IncrDerefs() format { + return format{ + flags: f.flags, + drefs: f.drefs + 1, + curd: f.curd, + maxd: f.maxd, + buf: f.buf, + } +} + +// appendType appends a type using supplied type str. +func appendType(fmt format, t string) { + for i := uint8(0); i < fmt.Derefs(); i++ { + fmt.buf.AppendByte('*') + } + fmt.buf.AppendString(t) +} + +// appendNilType Appends nil to buf, type included if verbose. +func appendNilType(fmt format, t string) { + if fmt.Verbose() { + fmt.buf.AppendByte('(') + appendType(fmt, t) + fmt.buf.AppendString(`)(nil)`) + } else { + fmt.buf.AppendString(`nil`) + } +} + +// appendByte Appends a single byte to buf. +func appendByte(fmt format, b byte) { + if fmt.IsValue() || fmt.Verbose() { + fmt.buf.AppendString(`'` + string(b) + `'`) + } else { + fmt.buf.AppendByte(b) + } +} + +// appendBytes Appends a quoted byte slice to buf. +func appendBytes(fmt format, b []byte) { + if b == nil { + // Bytes CAN be nil formatted + appendNilType(fmt, `[]byte`) + } else { + // Append bytes as slice + fmt.buf.AppendByte('[') + for _, b := range b { + fmt.buf.AppendByte(b) + fmt.buf.AppendByte(',') + } + if len(b) > 0 { + fmt.buf.Truncate(1) + } + fmt.buf.AppendByte(']') + } +} + +// appendString Appends an escaped, double-quoted string to buf. +func appendString(fmt format, s string) { + switch { + // Key in a key-value pair + case fmt.IsKey(): + if !strconv.CanBackquote(s) { + // Requires quoting AND escaping + fmt.buf.B = strconv.AppendQuote(fmt.buf.B, s) + } else if containsSpaceOrTab(s) { + // Contains space, needs quotes + fmt.buf.AppendString(`"` + s + `"`) + } else { + // All else write as-is + fmt.buf.AppendString(s) + } + + // Value in a key-value pair (always escape+quote) + case fmt.IsValue(): + fmt.buf.B = strconv.AppendQuote(fmt.buf.B, s) + + // Verbose but neither key nor value (always quote) + case fmt.Verbose(): + fmt.buf.AppendString(`"` + s + `"`) + + // All else + default: + fmt.buf.AppendString(s) + } +} + +// appendBool Appends a formatted bool to buf. +func appendBool(fmt format, b bool) { + fmt.buf.B = strconv.AppendBool(fmt.buf.B, b) +} + +// appendInt Appends a formatted int to buf. +func appendInt(fmt format, i int64) { + fmt.buf.B = strconv.AppendInt(fmt.buf.B, i, 10) +} + +// appendUint Appends a formatted uint to buf. +func appendUint(fmt format, u uint64) { + fmt.buf.B = strconv.AppendUint(fmt.buf.B, u, 10) +} + +// appendFloat Appends a formatted float to buf. +func appendFloat(fmt format, f float64) { + fmt.buf.B = strconv.AppendFloat(fmt.buf.B, f, 'G', -1, 64) +} + +// appendComplex Appends a formatted complex128 to buf. +func appendComplex(fmt format, c complex128) { + appendFloat(fmt, real(c)) + fmt.buf.AppendByte('+') + appendFloat(fmt, imag(c)) + fmt.buf.AppendByte('i') +} + +// isNil will safely check if 'v' is nil without dealing with weird Go interface nil bullshit. +func isNil(i interface{}) bool { + e := *(*struct { + _ unsafe.Pointer // type + v unsafe.Pointer // value + })(unsafe.Pointer(&i)) + return (e.v == nil) +} + +// appendIfaceOrReflectValue will attempt to append as interface, falling back to reflection. +func appendIfaceOrRValue(fmt format, i interface{}) { + if !appendIface(fmt, i) { + appendRValue(fmt, reflect.ValueOf(i)) + } +} + +// appendValueNext checks for interface methods before performing appendRValue, checking + incr depth. +func appendRValueOrIfaceNext(fmt format, v reflect.Value) { + // Check we haven't hit max + if fmt.AtMaxDepth() { + fmt.buf.AppendString("...") + return + } + + // Incr the depth + fmt = fmt.IncrDepth() + + // Make actual call + if !v.CanInterface() || !appendIface(fmt, v.Interface()) { + appendRValue(fmt, v) + } +} + +// appendIface parses and Appends a formatted interface value to buf. +func appendIface(fmt format, i interface{}) (ok bool) { + ok = true // default + catchPanic := func() { + if r := recover(); r != nil { + // DON'T recurse catchPanic() + if fmt.Panic() { + panic(r) + } + + // Attempt to decode panic into buf + fmt.buf.AppendString(`!{PANIC=`) + appendIfaceOrRValue(fmt.SetPanic(), r) + fmt.buf.AppendByte('}') + + // Ensure return + ok = true + } + } + + switch i := i.(type) { + // Nil type + case nil: + fmt.buf.AppendString(`nil`) + + // Reflect types + case reflect.Type: + if isNil(i) /* safer nil check */ { + appendNilType(fmt, `reflect.Type`) + } else { + appendType(fmt, `reflect.Type`) + fmt.buf.AppendString(`(` + i.String() + `)`) + } + case reflect.Value: + appendType(fmt, `reflect.Value`) + fmt.buf.AppendByte('(') + fmt.flags |= vboseBit + appendRValue(fmt, i) + fmt.buf.AppendByte(')') + + // Bytes and string types + case byte: + appendByte(fmt, i) + case []byte: + appendBytes(fmt, i) + case string: + appendString(fmt, i) + + // Int types + case int: + appendInt(fmt, int64(i)) + case int8: + appendInt(fmt, int64(i)) + case int16: + appendInt(fmt, int64(i)) + case int32: + appendInt(fmt, int64(i)) + case int64: + appendInt(fmt, i) + + // Uint types + case uint: + appendUint(fmt, uint64(i)) + // case uint8 :: this is 'byte' + case uint16: + appendUint(fmt, uint64(i)) + case uint32: + appendUint(fmt, uint64(i)) + case uint64: + appendUint(fmt, i) + + // Float types + case float32: + appendFloat(fmt, float64(i)) + case float64: + appendFloat(fmt, i) + + // Bool type + case bool: + appendBool(fmt, i) + + // Complex types + case complex64: + appendComplex(fmt, complex128(i)) + case complex128: + appendComplex(fmt, i) + + // Method types + case error: + switch { + case fmt.Verbose(): + ok = false + case isNil(i) /* use safer nil check */ : + appendNilType(fmt, reflect.TypeOf(i).String()) + default: + defer catchPanic() + appendString(fmt, i.Error()) + } + case Formattable: + switch { + case fmt.Verbose(): + ok = false + case isNil(i) /* use safer nil check */ : + appendNilType(fmt, reflect.TypeOf(i).String()) + default: + defer catchPanic() + fmt.buf.B = i.AppendFormat(fmt.buf.B) + } + case interface{ String() string }: + switch { + case fmt.Verbose(): + ok = false + case isNil(i) /* use safer nil check */ : + appendNilType(fmt, reflect.TypeOf(i).String()) + default: + defer catchPanic() + appendString(fmt, i.String()) + } + + // No quick handler + default: + ok = false + } + + return ok +} + +// appendReflectValue will safely append a reflected value. +func appendRValue(fmt format, v reflect.Value) { + switch v.Kind() { + // String and byte types + case reflect.Uint8: + appendByte(fmt, byte(v.Uint())) + case reflect.String: + appendString(fmt, v.String()) + + // Float tpyes + case reflect.Float32, reflect.Float64: + appendFloat(fmt, v.Float()) + + // Int types + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + appendInt(fmt, v.Int()) + + // Uint types + case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: + appendUint(fmt, v.Uint()) + + // Complex types + case reflect.Complex64, reflect.Complex128: + appendComplex(fmt, v.Complex()) + + // Bool type + case reflect.Bool: + appendBool(fmt, v.Bool()) + + // Slice and array types + case reflect.Array: + appendArrayType(fmt, v) + case reflect.Slice: + if v.IsNil() { + appendNilType(fmt, v.Type().String()) + } else { + appendArrayType(fmt, v) + } + + // Map types + case reflect.Map: + if v.IsNil() { + appendNilType(fmt, v.Type().String()) + } else { + appendMapType(fmt, v) + } + + // Struct types + case reflect.Struct: + appendStructType(fmt, v) + + // Deref'able ptr types + case reflect.Ptr, reflect.Interface: + if v.IsNil() { + appendNilType(fmt, v.Type().String()) + } else { + appendRValue(fmt.IncrDerefs(), v.Elem()) + } + + // 'raw' pointer types + case reflect.UnsafePointer: + appendType(fmt, `unsafe.Pointer`) + fmt.buf.AppendByte('(') + if u := v.Pointer(); u != 0 { + fmt.buf.AppendString("0x") + fmt.buf.B = strconv.AppendUint(fmt.buf.B, uint64(u), 16) + } else { + fmt.buf.AppendString(`nil`) + } + fmt.buf.AppendByte(')') + case reflect.Uintptr: + appendType(fmt, `uintptr`) + fmt.buf.AppendByte('(') + if u := v.Uint(); u != 0 { + fmt.buf.AppendString("0x") + fmt.buf.B = strconv.AppendUint(fmt.buf.B, u, 16) + } else { + fmt.buf.AppendString(`nil`) + } + fmt.buf.AppendByte(')') + + // Generic types we don't *exactly* handle + case reflect.Func, reflect.Chan: + if v.IsNil() { + appendNilType(fmt, v.Type().String()) + } else { + fmt.buf.AppendString(v.String()) + } + + // Unhandled kind + default: + fmt.buf.AppendString(v.String()) + } +} + +// appendArrayType Appends an array of unknown type (parsed by reflection) to buf, unlike appendSliceType does NOT catch nil slice. +func appendArrayType(fmt format, v reflect.Value) { + // get no. elements + n := v.Len() + + fmt.buf.AppendByte('[') + + // Append values + for i := 0; i < n; i++ { + appendRValueOrIfaceNext(fmt.SetIsValue(), v.Index(i)) + fmt.buf.AppendByte(',') + } + + // Drop last comma + if n > 0 { + fmt.buf.Truncate(1) + } + + fmt.buf.AppendByte(']') +} + +// appendMapType Appends a map of unknown types (parsed by reflection) to buf. +func appendMapType(fmt format, v reflect.Value) { + // Prepend type if verbose + if fmt.Verbose() { + appendType(fmt, v.Type().String()) + } + + // Get a map iterator + r := v.MapRange() + n := v.Len() + + fmt.buf.AppendByte('{') + + // Iterate pairs + for r.Next() { + appendRValueOrIfaceNext(fmt.SetIsKey(), r.Key()) + fmt.buf.AppendByte('=') + appendRValueOrIfaceNext(fmt.SetIsValue(), r.Value()) + fmt.buf.AppendByte(' ') + } + + // Drop last space + if n > 0 { + fmt.buf.Truncate(1) + } + + fmt.buf.AppendByte('}') +} + +// appendStructType Appends a struct (as a set of key-value fields) to buf. +func appendStructType(fmt format, v reflect.Value) { + // Get value type & no. fields + t := v.Type() + n := v.NumField() + + // Prepend type if verbose + if fmt.Verbose() { + appendType(fmt, v.Type().String()) + } + + fmt.buf.AppendByte('{') + + // Iterate fields + for i := 0; i < n; i++ { + vfield := v.Field(i) + tfield := t.Field(i) + + // Append field name + fmt.buf.AppendString(tfield.Name) + fmt.buf.AppendByte('=') + appendRValueOrIfaceNext(fmt.SetIsValue(), vfield) + + // Iter written count + fmt.buf.AppendByte(' ') + } + + // Drop last space + if n > 0 { + fmt.buf.Truncate(1) + } + + fmt.buf.AppendByte('}') +} + +// containsSpaceOrTab checks if "s" contains space or tabs. +func containsSpaceOrTab(s string) bool { + for _, r := range s { + if r == ' ' || r == '\t' { + return true + } + } + return false +} diff --git a/vendor/codeberg.org/gruf/go-format/formatter.go b/vendor/codeberg.org/gruf/go-format/formatter.go new file mode 100644 index 000000000..640fa3f04 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/formatter.go @@ -0,0 +1,352 @@ +package format + +import ( + "strings" +) + +// Formatter allows configuring value and string formatting. +type Formatter struct { + // MaxDepth specifies the max depth of fields the formatter will iterate. + // Once max depth is reached, value will simply be formatted as "...". + // e.g. + // + // MaxDepth=1 + // type A struct{ + // Nested B + // } + // type B struct{ + // Nested C + // } + // type C struct{ + // Field string + // } + // + // Append(&buf, A{}) => {Nested={Nested={Field=...}}} + MaxDepth uint8 +} + +// Append will append formatted form of supplied values into 'buf'. +func (f Formatter) Append(buf *Buffer, v ...interface{}) { + for _, v := range v { + appendIfaceOrRValue(format{maxd: f.MaxDepth, buf: buf}, v) + buf.AppendByte(' ') + } + if len(v) > 0 { + buf.Truncate(1) + } +} + +// Appendf will append the formatted string with supplied values into 'buf'. +// Supported format directives: +// - '{}' => format supplied arg, in place +// - '{0}' => format arg at index 0 of supplied, in place +// - '{:?}' => format supplied arg verbosely, in place +// - '{:k}' => format supplied arg as key, in place +// - '{:v}' => format supplied arg as value, in place +// +// To escape either of '{}' simply append an additional brace e.g. +// - '{{' => '{' +// - '}}' => '}' +// - '{{}}' => '{}' +// - '{{:?}}' => '{:?}' +// +// More formatting directives might be included in the future. +func (f Formatter) Appendf(buf *Buffer, s string, a ...interface{}) { + const ( + // ground state + modeNone = uint8(0) + + // prev reached '{' + modeOpen = uint8(1) + + // prev reached '}' + modeClose = uint8(2) + + // parsing directive index + modeIdx = uint8(3) + + // parsing directive operands + modeOp = uint8(4) + ) + + var ( + // mode is current parsing mode + mode uint8 + + // arg is the current arg index + arg int + + // carg is current directive-set arg index + carg int + + // last is the trailing cursor to see slice windows + last int + + // idx is the current index in 's' + idx int + + // fmt is the base argument formatter + fmt = format{ + maxd: f.MaxDepth, + buf: buf, + } + + // NOTE: these functions are defined here as function + // locals as it turned out to be better for performance + // doing it this way, than encapsulating their logic in + // some kind of parsing structure. Maybe if the parser + // was pooled along with the buffers it might work out + // better, but then it makes more internal functions i.e. + // .Append() .Appendf() less accessible outside package. + // + // Currently, passing '-gcflags "-l=4"' causes a not + // insignificant decrease in ns/op, which is likely due + // to more aggressive function inlining, which this + // function can obviously stand to benefit from :) + + // Str returns current string window slice, and updates + // the trailing cursor 'last' to current 'idx' + Str = func() string { + str := s[last:idx] + last = idx + return str + } + + // MoveUp moves the trailing cursor 'last' just past 'idx' + MoveUp = func() { + last = idx + 1 + } + + // MoveUpTo moves the trailing cursor 'last' either up to + // closest '}', or current 'idx', whichever is furthest + MoveUpTo = func() { + if i := strings.IndexByte(s[idx:], '}'); i >= 0 { + idx += i + } + MoveUp() + } + + // ParseIndex parses an integer from the current string + // window, updating 'last' to 'idx'. The string window + // is ASSUMED to contain only valid ASCII numbers. This + // only returns false if number exceeds platform int size + ParseIndex = func() bool { + // Get current window + str := Str() + if len(str) < 1 { + return true + } + + // Index HAS to fit within platform int + if !can32bitInt(str) && !can64bitInt(str) { + return false + } + + // Build integer from string + carg = 0 + for _, c := range []byte(str) { + carg = carg*10 + int(c-'0') + } + + return true + } + + // ParseOp parses operands from the current string + // window, updating 'last' to 'idx'. The string window + // is ASSUMED to contain only valid operand ASCII. This + // returns success on parsing of operand logic + ParseOp = func() bool { + // Get current window + str := Str() + if len(str) < 1 { + return true + } + + // (for now) only + // accept length = 1 + if len(str) > 1 { + return false + } + + switch str[0] { + case 'k': + fmt.flags |= isKeyBit + case 'v': + fmt.flags |= isValBit + case '?': + fmt.flags |= vboseBit + } + + return true + } + + // AppendArg will take either the directive-set, or + // iterated arg index, check within bounds of 'a' and + // append the that argument formatted to the buffer. + // On failure, it will append an error string + AppendArg = func() { + // Look for idx + if carg < 0 { + carg = arg + } + + // Incr idx + arg++ + + if carg < len(a) { + // Append formatted argument value + appendIfaceOrRValue(fmt, a[carg]) + } else { + // No argument found for index + buf.AppendString(`!{MISSING_ARG}`) + } + } + + // Reset will reset the mode to ground, the flags + // to empty and parsed 'carg' to empty + Reset = func() { + mode = modeNone + fmt.flags = 0 + carg = -1 + } + ) + + for idx = 0; idx < len(s); idx++ { + // Get next char + c := s[idx] + + switch mode { + // Ground mode + case modeNone: + switch c { + case '{': + // Enter open mode + buf.AppendString(Str()) + mode = modeOpen + MoveUp() + case '}': + // Enter close mode + buf.AppendString(Str()) + mode = modeClose + MoveUp() + } + + // Encountered open '{' + case modeOpen: + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + // Starting index + mode = modeIdx + MoveUp() + case '{': + // Escaped bracket + buf.AppendByte('{') + mode = modeNone + MoveUp() + case '}': + // Format arg + AppendArg() + Reset() + MoveUp() + case ':': + // Starting operands + mode = modeOp + MoveUp() + default: + // Bad char, missing a close + buf.AppendString(`!{MISSING_CLOSE}`) + mode = modeNone + MoveUpTo() + } + + // Encountered close '}' + case modeClose: + switch c { + case '}': + // Escaped close bracket + buf.AppendByte('}') + mode = modeNone + MoveUp() + default: + // Missing an open bracket + buf.AppendString(`!{MISSING_OPEN}`) + mode = modeNone + MoveUp() + } + + // Preparing index + case modeIdx: + switch c { + case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9': + case ':': + if !ParseIndex() { + // Unable to parse an integer + buf.AppendString(`!{BAD_INDEX}`) + mode = modeNone + MoveUpTo() + } else { + // Starting operands + mode = modeOp + MoveUp() + } + case '}': + if !ParseIndex() { + // Unable to parse an integer + buf.AppendString(`!{BAD_INDEX}`) + } else { + // Format arg + AppendArg() + } + Reset() + MoveUp() + default: + // Not a valid index character + buf.AppendString(`!{BAD_INDEX}`) + mode = modeNone + MoveUpTo() + } + + // Preparing operands + case modeOp: + switch c { + case 'k', 'v', '?': + // TODO: set flags as received + case '}': + if !ParseOp() { + // Unable to parse operands + buf.AppendString(`!{BAD_OPERAND}`) + } else { + // Format arg + AppendArg() + } + Reset() + MoveUp() + default: + // Not a valid operand char + buf.AppendString(`!{BAD_OPERAND}`) + Reset() + MoveUpTo() + } + } + } + + // Append any remaining + buf.AppendString(s[last:]) +} + +// formatter is the default formatter instance. +var formatter = Formatter{ + MaxDepth: 10, +} + +// Append will append formatted form of supplied values into 'buf' using default formatter. +// See Formatter.Append() for more documentation. +func Append(buf *Buffer, v ...interface{}) { + formatter.Append(buf, v...) +} + +// Appendf will append the formatted string with supplied values into 'buf' using default formatter. +// See Formatter.Appendf() for more documentation. +func Appendf(buf *Buffer, s string, a ...interface{}) { + formatter.Appendf(buf, s, a...) +} diff --git a/vendor/codeberg.org/gruf/go-format/print.go b/vendor/codeberg.org/gruf/go-format/print.go new file mode 100644 index 000000000..288e6af10 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/print.go @@ -0,0 +1,88 @@ +package format + +import ( + "io" + "os" + "sync" +) + +// pool is the global printer buffer pool. +var pool = sync.Pool{ + New: func() interface{} { + return &Buffer{} + }, +} + +// getBuf fetches a buffer from pool. +func getBuf() *Buffer { + return pool.Get().(*Buffer) +} + +// putBuf places a Buffer back in pool. +func putBuf(buf *Buffer) { + if buf.Cap() > 64<<10 { + return // drop large + } + buf.Reset() + pool.Put(buf) +} + +// Sprint will format supplied values, returning this string. +func Sprint(v ...interface{}) string { + buf := Buffer{} + Append(&buf, v...) + return buf.String() +} + +// Sprintf will format supplied format string and args, returning this string. +// See Formatter.Appendf() for more documentation. +func Sprintf(s string, a ...interface{}) string { + buf := Buffer{} + Appendf(&buf, s, a...) + return buf.String() +} + +// Print will format supplied values, print this to os.Stdout. +func Print(v ...interface{}) { + Fprint(os.Stdout, v...) //nolint +} + +// Printf will format supplied format string and args, printing this to os.Stdout. +// See Formatter.Appendf() for more documentation. +func Printf(s string, a ...interface{}) { + Fprintf(os.Stdout, s, a...) //nolint +} + +// Println will format supplied values, append a trailing newline and print this to os.Stdout. +func Println(v ...interface{}) { + Fprintln(os.Stdout, v...) //nolint +} + +// Fprint will format supplied values, writing this to an io.Writer. +func Fprint(w io.Writer, v ...interface{}) (int, error) { + buf := getBuf() + Append(buf, v...) + n, err := w.Write(buf.B) + putBuf(buf) + return n, err +} + +// Fprintf will format supplied format string and args, writing this to an io.Writer. +// See Formatter.Appendf() for more documentation. +func Fprintf(w io.Writer, s string, a ...interface{}) (int, error) { + buf := getBuf() + Appendf(buf, s, a...) + n, err := w.Write(buf.B) + putBuf(buf) + return n, err +} + +// Println will format supplied values, append a trailing newline and writer this to an io.Writer. +func Fprintln(w io.Writer, v ...interface{}) (int, error) { + buf := getBuf() + Append(buf, v...) + buf.AppendByte('\n') + n, err := w.Write(buf.B) + putBuf(buf) + return n, err +} diff --git a/vendor/codeberg.org/gruf/go-format/util.go b/vendor/codeberg.org/gruf/go-format/util.go new file mode 100644 index 000000000..68a9e2de3 --- /dev/null +++ b/vendor/codeberg.org/gruf/go-format/util.go @@ -0,0 +1,13 @@ +package format + +import "strconv" + +// can32bitInt returns whether it's possible for 's' to contain an int on 32bit platforms. +func can32bitInt(s string) bool { + return strconv.IntSize == 32 && (0 < len(s) && len(s) < 10) +} + +// can64bitInt returns whether it's possible for 's' to contain an int on 64bit platforms. +func can64bitInt(s string) bool { + return strconv.IntSize == 64 && (0 < len(s) && len(s) < 19) +} diff --git a/vendor/codeberg.org/gruf/go-logger/README.md b/vendor/codeberg.org/gruf/go-logger/README.md deleted file mode 100644 index 57410ea87..000000000 --- a/vendor/codeberg.org/gruf/go-logger/README.md +++ /dev/null @@ -1,13 +0,0 @@ -Fast levelled logging package with customizable formatting. - -Supports logging in 2 modes: -- no locks, fastest possible logging, no guarantees for io.Writer thread safety -- mutex locks during writes, still far faster than standard library logger - -Running without locks isn't likely to cause you any issues*, but if it does, you can wrap your `io.Writer` using `AddSafety()` when instantiating your new Logger. Even when running the benchmarks, this library has no printing issues without locks, so in most cases you'll be fine, but the safety is there if you need it. - -*most logging libraries advertising high speeds are likely not performing mutex locks, which is why with this library you have the option to opt-in/out of them. - -Note there are 2 uses of the unsafe package: -- safer interface nil value checks, uses similar logic to reflect package to check if the value in the internal fat pointer is nil -- casting a byte slice to string to allow sharing of similar byte and string methods, performs same logic as `strings.Builder{}.String()` \ No newline at end of file diff --git a/vendor/codeberg.org/gruf/go-logger/clock.go b/vendor/codeberg.org/gruf/go-logger/clock.go deleted file mode 100644 index cc7d7ed0c..000000000 --- a/vendor/codeberg.org/gruf/go-logger/clock.go +++ /dev/null @@ -1,21 +0,0 @@ -package logger - -import ( - "sync" - "time" - - "codeberg.org/gruf/go-nowish" -) - -var ( - clock = nowish.Clock{} - clockOnce = sync.Once{} -) - -// startClock starts the global nowish clock. -func startClock() { - clockOnce.Do(func() { - clock.Start(time.Millisecond * 100) - clock.SetFormat("2006-01-02 15:04:05") - }) -} diff --git a/vendor/codeberg.org/gruf/go-logger/default.go b/vendor/codeberg.org/gruf/go-logger/default.go deleted file mode 100644 index 3fd65c6b1..000000000 --- a/vendor/codeberg.org/gruf/go-logger/default.go +++ /dev/null @@ -1,107 +0,0 @@ -package logger - -import ( - "os" - "sync" -) - -var ( - instance *Logger - instanceOnce = sync.Once{} -) - -// Default returns the default Logger instance. -func Default() *Logger { - instanceOnce.Do(func() { instance = New(os.Stdout) }) - return instance -} - -// Debug prints the provided arguments with the debug prefix to the global Logger instance. -func Debug(a ...interface{}) { - Default().Debug(a...) -} - -// Debugf prints the provided format string and arguments with the debug prefix to the global Logger instance. -func Debugf(s string, a ...interface{}) { - Default().Debugf(s, a...) -} - -// Info prints the provided arguments with the info prefix to the global Logger instance. -func Info(a ...interface{}) { - Default().Info(a...) -} - -// Infof prints the provided format string and arguments with the info prefix to the global Logger instance. -func Infof(s string, a ...interface{}) { - Default().Infof(s, a...) -} - -// Warn prints the provided arguments with the warn prefix to the global Logger instance. -func Warn(a ...interface{}) { - Default().Warn(a...) -} - -// Warnf prints the provided format string and arguments with the warn prefix to the global Logger instance. -func Warnf(s string, a ...interface{}) { - Default().Warnf(s, a...) -} - -// Error prints the provided arguments with the error prefix to the global Logger instance. -func Error(a ...interface{}) { - Default().Error(a...) -} - -// Errorf prints the provided format string and arguments with the error prefix to the global Logger instance. -func Errorf(s string, a ...interface{}) { - Default().Errorf(s, a...) -} - -// Fatal prints the provided arguments with the fatal prefix to the global Logger instance before exiting the program with os.Exit(1). -func Fatal(a ...interface{}) { - Default().Fatal(a...) -} - -// Fatalf prints the provided format string and arguments with the fatal prefix to the global Logger instance before exiting the program with os.Exit(1). -func Fatalf(s string, a ...interface{}) { - Default().Fatalf(s, a...) -} - -// Log prints the provided arguments with the supplied log level to the global Logger instance. -func Log(lvl LEVEL, a ...interface{}) { - Default().Log(lvl, a...) -} - -// Logf prints the provided format string and arguments with the supplied log level to the global Logger instance. -func Logf(lvl LEVEL, s string, a ...interface{}) { - Default().Logf(lvl, s, a...) -} - -// LogFields prints the provided fields formatted as key-value pairs at the supplied log level to the global Logger instance. -func LogFields(lvl LEVEL, fields map[string]interface{}) { - Default().LogFields(lvl, fields) -} - -// LogValues prints the provided values formatted as-so at the supplied log level to the global Logger instance. -func LogValues(lvl LEVEL, a ...interface{}) { - Default().LogValues(lvl, a...) -} - -// Print simply prints provided arguments to the global Logger instance. -func Print(a ...interface{}) { - Default().Print(a...) -} - -// Printf simply prints provided the provided format string and arguments to the global Logger instance. -func Printf(s string, a ...interface{}) { - Default().Printf(s, a...) -} - -// PrintFields prints the provided fields formatted as key-value pairs to the global Logger instance. -func PrintFields(fields map[string]interface{}) { - Default().PrintFields(fields) -} - -// PrintValues prints the provided values formatted as-so to the global Logger instance. -func PrintValues(a ...interface{}) { - Default().PrintValues(a...) -} diff --git a/vendor/codeberg.org/gruf/go-logger/entry.go b/vendor/codeberg.org/gruf/go-logger/entry.go deleted file mode 100644 index 11e383086..000000000 --- a/vendor/codeberg.org/gruf/go-logger/entry.go +++ /dev/null @@ -1,385 +0,0 @@ -package logger - -import ( - "context" - "fmt" - "time" - - "codeberg.org/gruf/go-bytes" -) - -// Entry defines an entry in the log, it is NOT safe for concurrent use -type Entry struct { - ctx context.Context - lvl LEVEL - buf *bytes.Buffer - log *Logger -} - -// Context returns the current set Entry context.Context -func (e *Entry) Context() context.Context { - return e.ctx -} - -// WithContext updates Entry context value to the supplied -func (e *Entry) WithContext(ctx context.Context) *Entry { - e.ctx = ctx - return e -} - -// Level appends the supplied level to the log entry, and sets the entry level. -// Please note this CAN be called and append log levels multiple times -func (e *Entry) Level(lvl LEVEL) *Entry { - e.log.Format.AppendLevel(e.buf, lvl) - e.buf.WriteByte(' ') - e.lvl = lvl - return e -} - -// Timestamp appends the current timestamp to the log entry. Please note this -// CAN be called and append the timestamp multiple times -func (e *Entry) Timestamp() *Entry { - e.log.Format.AppendTimestamp(e.buf, clock.NowFormat()) - e.buf.WriteByte(' ') - return e -} - -// TimestampIf performs Entry.Timestamp() only IF timestamping is enabled for the Logger. -// Please note this CAN be called multiple times -func (e *Entry) TimestampIf() *Entry { - if e.log.Timestamp { - e.Timestamp() - } - return e -} - -// Hooks applies currently set Hooks to the Entry. Please note this CAN be -// called and perform the Hooks multiple times -func (e *Entry) Hooks() *Entry { - for _, hook := range e.log.Hooks { - hook.Do(e) - } - return e -} - -// Byte appends a byte value to the log entry -func (e *Entry) Byte(value byte) *Entry { - e.log.Format.AppendByte(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// ByteField appends a byte value as key-value pair to the log entry -func (e *Entry) ByteField(key string, value byte) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendByte(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Bytes appends a byte slice value as to the log entry -func (e *Entry) Bytes(value []byte) *Entry { - e.log.Format.AppendBytes(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// BytesField appends a byte slice value as key-value pair to the log entry -func (e *Entry) BytesField(key string, value []byte) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendBytes(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Str appends a string value to the log entry -func (e *Entry) Str(value string) *Entry { - e.log.Format.AppendString(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// StrField appends a string value as key-value pair to the log entry -func (e *Entry) StrField(key string, value string) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendString(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Strs appends a string slice value to the log entry -func (e *Entry) Strs(value []string) *Entry { - e.log.Format.AppendStrings(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// StrsField appends a string slice value as key-value pair to the log entry -func (e *Entry) StrsField(key string, value []string) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendStrings(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Int appends an int value to the log entry -func (e *Entry) Int(value int) *Entry { - e.log.Format.AppendInt(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// IntField appends an int value as key-value pair to the log entry -func (e *Entry) IntField(key string, value int) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendInt(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Ints appends an int slice value to the log entry -func (e *Entry) Ints(value []int) *Entry { - e.log.Format.AppendInts(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// IntsField appends an int slice value as key-value pair to the log entry -func (e *Entry) IntsField(key string, value []int) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendInts(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Uint appends a uint value to the log entry -func (e *Entry) Uint(value uint) *Entry { - e.log.Format.AppendUint(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// UintField appends a uint value as key-value pair to the log entry -func (e *Entry) UintField(key string, value uint) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendUint(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Uints appends a uint slice value to the log entry -func (e *Entry) Uints(value []uint) *Entry { - e.log.Format.AppendUints(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// UintsField appends a uint slice value as key-value pair to the log entry -func (e *Entry) UintsField(key string, value []uint) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendUints(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Float appends a float value to the log entry -func (e *Entry) Float(value float64) *Entry { - e.log.Format.AppendFloat(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// FloatField appends a float value as key-value pair to the log entry -func (e *Entry) FloatField(key string, value float64) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendFloat(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Floats appends a float slice value to the log entry -func (e *Entry) Floats(value []float64) *Entry { - e.log.Format.AppendFloats(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// FloatsField appends a float slice value as key-value pair to the log entry -func (e *Entry) FloatsField(key string, value []float64) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendFloats(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Bool appends a bool value to the log entry -func (e *Entry) Bool(value bool) *Entry { - e.log.Format.AppendBool(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// BoolField appends a bool value as key-value pair to the log entry -func (e *Entry) BoolField(key string, value bool) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendBool(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Bools appends a bool slice value to the log entry -func (e *Entry) Bools(value []bool) *Entry { - e.log.Format.AppendBools(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// BoolsField appends a bool slice value as key-value pair to the log entry -func (e *Entry) BoolsField(key string, value []bool) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendBools(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Time appends a time.Time value to the log entry -func (e *Entry) Time(value time.Time) *Entry { - e.log.Format.AppendTime(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// TimeField appends a time.Time value as key-value pair to the log entry -func (e *Entry) TimeField(key string, value time.Time) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendTime(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Times appends a time.Time slice value to the log entry -func (e *Entry) Times(value []time.Time) *Entry { - e.log.Format.AppendTimes(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// TimesField appends a time.Time slice value as key-value pair to the log entry -func (e *Entry) TimesField(key string, value []time.Time) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendTimes(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// DurationField appends a time.Duration value to the log entry -func (e *Entry) Duration(value time.Duration) *Entry { - e.log.Format.AppendDuration(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// DurationField appends a time.Duration value as key-value pair to the log entry -func (e *Entry) DurationField(key string, value time.Duration) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendDuration(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Durations appends a time.Duration slice value to the log entry -func (e *Entry) Durations(value []time.Duration) *Entry { - e.log.Format.AppendDurations(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// DurationsField appends a time.Duration slice value as key-value pair to the log entry -func (e *Entry) DurationsField(key string, value []time.Duration) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendDurations(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Field appends an interface value as key-value pair to the log entry -func (e *Entry) Field(key string, value interface{}) *Entry { - e.log.Format.AppendKey(e.buf, key) - e.log.Format.AppendValue(e.buf, value) - e.buf.WriteByte(' ') - return e -} - -// Fields appends a map of key-value pairs to the log entry -func (e *Entry) Fields(fields map[string]interface{}) *Entry { - for key, value := range fields { - e.Field(key, value) - } - return e -} - -// Values appends the given values to the log entry formatted as values, without a key. -func (e *Entry) Values(values ...interface{}) *Entry { - for _, value := range values { - e.log.Format.AppendValue(e.buf, value) - e.buf.WriteByte(' ') - } - return e -} - -// Append will append the given args formatted using fmt.Sprint(a...) to the Entry. -func (e *Entry) Append(a ...interface{}) *Entry { - fmt.Fprint(e.buf, a...) - e.buf.WriteByte(' ') - return e -} - -// Appendf will append the given format string and args using fmt.Sprintf(s, a...) to the Entry. -func (e *Entry) Appendf(s string, a ...interface{}) *Entry { - fmt.Fprintf(e.buf, s, a...) - e.buf.WriteByte(' ') - return e -} - -// Msg appends the fmt.Sprint() formatted final message to the log and calls .Send() -func (e *Entry) Msg(a ...interface{}) { - e.log.Format.AppendMsg(e.buf, a...) - e.Send() -} - -// Msgf appends the fmt.Sprintf() formatted final message to the log and calls .Send() -func (e *Entry) Msgf(s string, a ...interface{}) { - e.log.Format.AppendMsgf(e.buf, s, a...) - e.Send() -} - -// Send triggers write of the log entry, skipping if the entry's log LEVEL -// is below the currently set Logger level, and releases the Entry back to -// the Logger's Entry pool. So it is NOT safe to continue using this Entry -// object after calling .Send(), .Msg() or .Msgf() -func (e *Entry) Send() { - // If nothing to do, return - if e.lvl < e.log.Level || e.buf.Len() < 1 { - e.reset() - return - } - - // Ensure a final new line - if e.buf.B[e.buf.Len()-1] != '\n' { - e.buf.WriteByte('\n') - } - - // Write, reset and release - e.log.Output.Write(e.buf.B) - e.reset() -} - -func (e *Entry) reset() { - // Reset all - e.ctx = nil - e.buf.Reset() - e.lvl = unset - - // Release to pool - e.log.pool.Put(e) -} diff --git a/vendor/codeberg.org/gruf/go-logger/format.go b/vendor/codeberg.org/gruf/go-logger/format.go deleted file mode 100644 index 3901ea37f..000000000 --- a/vendor/codeberg.org/gruf/go-logger/format.go +++ /dev/null @@ -1,87 +0,0 @@ -package logger - -import ( - "time" - - "codeberg.org/gruf/go-bytes" -) - -// Check our types impl LogFormat -var _ LogFormat = &TextFormat{} - -// Formattable defines a type capable of writing a string formatted form -// of itself to a supplied byte buffer, and returning the resulting byte -// buffer. Implementing this will greatly speed up formatting of custom -// types passed to LogFormat (assuming they implement checking for this). -type Formattable interface { - AppendFormat([]byte) []byte -} - -// LogFormat defines a method of formatting log entries -type LogFormat interface { - // AppendKey appends given key to the log buffer - AppendKey(buf *bytes.Buffer, key string) - - // AppendLevel appends given log level as key-value pair to the log buffer - AppendLevel(buf *bytes.Buffer, lvl LEVEL) - - // AppendTimestamp appends given timestamp string as key-value pair to the log buffer - AppendTimestamp(buf *bytes.Buffer, fmtNow string) - - // AppendValue appends given interface formatted as value to the log buffer - AppendValue(buf *bytes.Buffer, value interface{}) - - // AppendByte appends given byte value to the log buffer - AppendByte(buf *bytes.Buffer, value byte) - - // AppendBytes appends given byte slice value to the log buffer - AppendBytes(buf *bytes.Buffer, value []byte) - - // AppendString appends given string value to the log buffer - AppendString(buf *bytes.Buffer, value string) - - // AppendStrings appends given string slice value to the log buffer - AppendStrings(buf *bytes.Buffer, value []string) - - // AppendBool appends given bool value to the log buffer - AppendBool(buf *bytes.Buffer, value bool) - - // AppendBools appends given bool slice value to the log buffer - AppendBools(buf *bytes.Buffer, value []bool) - - // AppendInt appends given int value to the log buffer - AppendInt(buf *bytes.Buffer, value int) - - // AppendInts appends given int slice value to the log buffer - AppendInts(buf *bytes.Buffer, value []int) - - // AppendUint appends given uint value to the log buffer - AppendUint(buf *bytes.Buffer, value uint) - - // AppendUints appends given uint slice value to the log buffer - AppendUints(buf *bytes.Buffer, value []uint) - - // AppendFloat appends given float value to the log buffer - AppendFloat(buf *bytes.Buffer, value float64) - - // AppendFloats appends given float slice value to the log buffer - AppendFloats(buf *bytes.Buffer, value []float64) - - // AppendTime appends given time value to the log buffer - AppendTime(buf *bytes.Buffer, value time.Time) - - // AppendTimes appends given time slice value to the log buffer - AppendTimes(buf *bytes.Buffer, value []time.Time) - - // AppendDuration appends given duration value to the log buffer - AppendDuration(buf *bytes.Buffer, value time.Duration) - - // AppendDurations appends given duration slice value to the log buffer - AppendDurations(buf *bytes.Buffer, value []time.Duration) - - // AppendMsg appends given msg as key-value pair to the log buffer using fmt.Sprint(...) formatting - AppendMsg(buf *bytes.Buffer, a ...interface{}) - - // AppendMsgf appends given msg format string as key-value pair to the log buffer using fmt.Sprintf(...) formatting - AppendMsgf(buf *bytes.Buffer, s string, a ...interface{}) -} diff --git a/vendor/codeberg.org/gruf/go-logger/format_text.go b/vendor/codeberg.org/gruf/go-logger/format_text.go deleted file mode 100644 index f9c90f887..000000000 --- a/vendor/codeberg.org/gruf/go-logger/format_text.go +++ /dev/null @@ -1,914 +0,0 @@ -package logger - -import ( - stdfmt "fmt" - "reflect" - "strconv" - "time" - "unsafe" - - "codeberg.org/gruf/go-bytes" -) - -// DefaultTextFormat is the default TextFormat instance -var DefaultTextFormat = TextFormat{ - Strict: false, - Verbose: false, - MaxDepth: 10, - Levels: DefaultLevels(), -} - -// TextFormat is the default LogFormat implementation, with very similar formatting to the -// standard "fmt" package's '%#v' operator. The main difference being that pointers are -// dereferenced as far as possible in order to reach a printable value. It is also *mildly* faster. -type TextFormat struct { - // Strict defines whether to use strict key-value pair formatting, i.e. should the level - // timestamp and msg be formatted as key-value pairs (with forced quoting for msg) - Strict bool - - // Verbose defines whether to increase output verbosity, i.e. include types with nil values - // and force values implementing .String() / .AppendFormat() to be printed as a struct etc. - Verbose bool - - // MaxDepth specifies the max depth of fields the formatter will iterate - MaxDepth uint8 - - // Levels defines the map of log LEVELs to level strings - Levels Levels -} - -// fmt returns a new format instance based on receiver TextFormat and given buffer -func (f TextFormat) fmt(buf *bytes.Buffer) format { - var flags uint8 - if f.Verbose { - flags |= vboseBit - } - return format{ - flags: flags, - curd: 0, - maxd: f.MaxDepth, - buf: buf, - } -} - -func (f TextFormat) AppendKey(buf *bytes.Buffer, key string) { - if len(key) > 0 { - // only append if key is non-zero length - appendString(f.fmt(buf).SetIsKey(true), key) - buf.WriteByte('=') - } -} - -func (f TextFormat) AppendLevel(buf *bytes.Buffer, lvl LEVEL) { - if f.Strict { - // Strict format, append level key - buf.WriteString(`level=`) - buf.WriteString(f.Levels.Get(lvl)) - return - } - - // Write level string - buf.WriteByte('[') - buf.WriteString(f.Levels.Get(lvl)) - buf.WriteByte(']') -} - -func (f TextFormat) AppendTimestamp(buf *bytes.Buffer, now string) { - if f.Strict { - // Strict format, use key and quote - buf.WriteString(`time=`) - appendString(f.fmt(buf), now) - return - } - - // Write time as-is - buf.WriteString(now) -} - -func (f TextFormat) AppendValue(buf *bytes.Buffer, value interface{}) { - appendIfaceOrRValue(f.fmt(buf).SetIsKey(false), value) -} - -func (f TextFormat) AppendByte(buf *bytes.Buffer, value byte) { - appendByte(f.fmt(buf), value) -} - -func (f TextFormat) AppendBytes(buf *bytes.Buffer, value []byte) { - appendBytes(f.fmt(buf), value) -} - -func (f TextFormat) AppendString(buf *bytes.Buffer, value string) { - appendString(f.fmt(buf), value) -} - -func (f TextFormat) AppendStrings(buf *bytes.Buffer, value []string) { - appendStringSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendBool(buf *bytes.Buffer, value bool) { - appendBool(f.fmt(buf), value) -} - -func (f TextFormat) AppendBools(buf *bytes.Buffer, value []bool) { - appendBoolSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendInt(buf *bytes.Buffer, value int) { - appendInt(f.fmt(buf), int64(value)) -} - -func (f TextFormat) AppendInts(buf *bytes.Buffer, value []int) { - appendIntSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendUint(buf *bytes.Buffer, value uint) { - appendUint(f.fmt(buf), uint64(value)) -} - -func (f TextFormat) AppendUints(buf *bytes.Buffer, value []uint) { - appendUintSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendFloat(buf *bytes.Buffer, value float64) { - appendFloat(f.fmt(buf), value) -} - -func (f TextFormat) AppendFloats(buf *bytes.Buffer, value []float64) { - appendFloatSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendTime(buf *bytes.Buffer, value time.Time) { - appendTime(f.fmt(buf), value) -} - -func (f TextFormat) AppendTimes(buf *bytes.Buffer, value []time.Time) { - appendTimeSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendDuration(buf *bytes.Buffer, value time.Duration) { - appendDuration(f.fmt(buf), value) -} - -func (f TextFormat) AppendDurations(buf *bytes.Buffer, value []time.Duration) { - appendDurationSlice(f.fmt(buf), value) -} - -func (f TextFormat) AppendMsg(buf *bytes.Buffer, a ...interface{}) { - if f.Strict { - // Strict format, use key and quote - buf.WriteString(`msg=`) - buf.B = strconv.AppendQuote(buf.B, stdfmt.Sprint(a...)) - return - } - - // Write message as-is - stdfmt.Fprint(buf, a...) -} - -func (f TextFormat) AppendMsgf(buf *bytes.Buffer, s string, a ...interface{}) { - if f.Strict { - // Strict format, use key and quote - buf.WriteString(`msg=`) - buf.B = strconv.AppendQuote(buf.B, stdfmt.Sprintf(s, a...)) - return - } - - // Write message as-is - stdfmt.Fprintf(buf, s, a...) -} - -// format is the object passed among the append___ formatting functions -type format struct { - flags uint8 // 'isKey' and 'verbose' flags - drefs uint8 // current value deref count - curd uint8 // current depth - maxd uint8 // maximum depth - buf *bytes.Buffer // out buffer -} - -const ( - // flag bit constants - isKeyBit = uint8(1) << 0 - vboseBit = uint8(1) << 1 -) - -// AtMaxDepth returns whether format is currently at max depth. -func (f format) AtMaxDepth() bool { - return f.curd >= f.maxd -} - -// Derefs returns no. times current value has been dereferenced. -func (f format) Derefs() uint8 { - return f.drefs -} - -// IsKey returns whether the isKey flag is set. -func (f format) IsKey() bool { - return (f.flags & isKeyBit) != 0 -} - -// Verbose returns whether the verbose flag is set. -func (f format) Verbose() bool { - return (f.flags & vboseBit) != 0 -} - -// SetIsKey returns format instance with the isKey bit set to value. -func (f format) SetIsKey(is bool) format { - flags := f.flags - if is { - flags |= isKeyBit - } else { - flags &= ^isKeyBit - } - return format{ - flags: flags, - drefs: f.drefs, - curd: f.curd, - maxd: f.maxd, - buf: f.buf, - } -} - -// IncrDepth returns format instance with depth incremented. -func (f format) IncrDepth() format { - return format{ - flags: f.flags, - drefs: f.drefs, - curd: f.curd + 1, - maxd: f.maxd, - buf: f.buf, - } -} - -// IncrDerefs returns format instance with dereference count incremented. -func (f format) IncrDerefs() format { - return format{ - flags: f.flags, - drefs: f.drefs + 1, - curd: f.curd, - maxd: f.maxd, - buf: f.buf, - } -} - -// appendType appends a type using supplied type str. -func appendType(fmt format, t string) { - for i := uint8(0); i < fmt.Derefs(); i++ { - fmt.buf.WriteByte('*') - } - fmt.buf.WriteString(t) -} - -// appendNilType writes nil to buf, type included if verbose. -func appendNilType(fmt format, t string) { - if fmt.Verbose() { - fmt.buf.WriteByte('(') - appendType(fmt, t) - fmt.buf.WriteString(`)(nil)`) - } else { - fmt.buf.WriteString(`nil`) - } -} - -// appendNilFace writes nil to buf, type included if verbose. -func appendNilIface(fmt format, i interface{}) { - if fmt.Verbose() { - fmt.buf.WriteByte('(') - appendType(fmt, reflect.TypeOf(i).String()) - fmt.buf.WriteString(`)(nil)`) - } else { - fmt.buf.WriteString(`nil`) - } -} - -// appendNilRValue writes nil to buf, type included if verbose. -func appendNilRValue(fmt format, v reflect.Value) { - if fmt.Verbose() { - fmt.buf.WriteByte('(') - appendType(fmt, v.Type().String()) - fmt.buf.WriteString(`)(nil)`) - } else { - fmt.buf.WriteString(`nil`) - } -} - -// appendByte writes a single byte to buf -func appendByte(fmt format, b byte) { - fmt.buf.WriteByte(b) -} - -// appendBytes writes a quoted byte slice to buf -func appendBytes(fmt format, b []byte) { - if !fmt.IsKey() && b == nil { - // Values CAN be nil formatted - appendNilType(fmt, `[]byte`) - } else { - // unsafe cast as string to prevent reallocation - appendString(fmt, *(*string)(unsafe.Pointer(&b))) - } -} - -// appendString writes an escaped, double-quoted string to buf -func appendString(fmt format, s string) { - if !fmt.IsKey() || !strconv.CanBackquote(s) { - // All non-keys and multiline keys get quoted + escaped - fmt.buf.B = strconv.AppendQuote(fmt.buf.B, s) - return - } else if containsSpaceOrTab(s) { - // Key containing spaces/tabs, quote this - fmt.buf.WriteByte('"') - fmt.buf.WriteString(s) - fmt.buf.WriteByte('"') - return - } - - // Safe to leave unquoted - fmt.buf.WriteString(s) -} - -// appendStringSlice writes a slice of strings to buf -func appendStringSlice(fmt format, s []string) { - // Check for nil slice - if s == nil { - appendNilType(fmt, `[]string`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, s := range s { - appendString(fmt.SetIsKey(false), s) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(s) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendBool writes a formatted bool to buf -func appendBool(fmt format, b bool) { - fmt.buf.B = strconv.AppendBool(fmt.buf.B, b) -} - -// appendBool writes a slice of formatted bools to buf -func appendBoolSlice(fmt format, b []bool) { - // Check for nil slice - if b == nil { - appendNilType(fmt, `[]bool`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, b := range b { - appendBool(fmt, b) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(b) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendInt writes a formatted int to buf -func appendInt(fmt format, i int64) { - fmt.buf.B = strconv.AppendInt(fmt.buf.B, i, 10) -} - -// appendIntSlice writes a slice of formatted int to buf -func appendIntSlice(fmt format, i []int) { - // Check for nil slice - if i == nil { - appendNilType(fmt, `[]int`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, i := range i { - appendInt(fmt, int64(i)) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(i) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendUint writes a formatted uint to buf -func appendUint(fmt format, u uint64) { - fmt.buf.B = strconv.AppendUint(fmt.buf.B, u, 10) -} - -// appendUintSlice writes a slice of formatted uint to buf -func appendUintSlice(fmt format, u []uint) { - // Check for nil slice - if u == nil { - appendNilType(fmt, `[]uint`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, u := range u { - appendUint(fmt, uint64(u)) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(u) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendFloat writes a formatted float to buf -func appendFloat(fmt format, f float64) { - fmt.buf.B = strconv.AppendFloat(fmt.buf.B, f, 'G', -1, 64) -} - -// appendFloatSlice writes a slice formatted floats to buf -func appendFloatSlice(fmt format, f []float64) { - // Check for nil slice - if f == nil { - appendNilType(fmt, `[]float64`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, f := range f { - appendFloat(fmt, f) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(f) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendTime writes a formatted, quoted time string to buf -func appendTime(fmt format, t time.Time) { - appendString(fmt.SetIsKey(true), t.Format(time.RFC1123)) -} - -// appendTimeSlice writes a slice of formatted time strings to buf -func appendTimeSlice(fmt format, t []time.Time) { - // Check for nil slice - if t == nil { - appendNilType(fmt, `[]time.Time`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, t := range t { - appendString(fmt.SetIsKey(true), t.Format(time.RFC1123)) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(t) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendDuration writes a formatted, quoted duration string to buf -func appendDuration(fmt format, d time.Duration) { - appendString(fmt.SetIsKey(true), d.String()) -} - -// appendDurationSlice writes a slice of formatted, quoted duration strings to buf -func appendDurationSlice(fmt format, d []time.Duration) { - // Check for nil slice - if d == nil { - appendNilType(fmt, `[]time.Duration`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, d := range d { - appendString(fmt.SetIsKey(true), d.String()) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(d) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendComplex writes a formatted complex128 to buf -func appendComplex(fmt format, c complex128) { - appendFloat(fmt, real(c)) - fmt.buf.WriteByte('+') - appendFloat(fmt, imag(c)) - fmt.buf.WriteByte('i') -} - -// appendComplexSlice writes a slice of formatted complex128s to buf -func appendComplexSlice(fmt format, c []complex128) { - // Check for nil slice - if c == nil { - appendNilType(fmt, `[]complex128`) - return - } - - fmt.buf.WriteByte('[') - - // Write elements - for _, c := range c { - appendComplex(fmt, c) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if len(c) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// notNil will safely check if 'v' is nil without dealing with weird Go interface nil bullshit. -func notNil(i interface{}) bool { - // cast to get fat pointer - e := *(*struct { - typeOf unsafe.Pointer // ignored - valueOf unsafe.Pointer - })(unsafe.Pointer(&i)) - - // check if value part is nil - return (e.valueOf != nil) -} - -// appendIfaceOrRValueNext performs appendIfaceOrRValue checking + incr depth -func appendIfaceOrRValueNext(fmt format, i interface{}) { - // Check we haven't hit max - if fmt.AtMaxDepth() { - fmt.buf.WriteString("...") - return - } - - // Incr the depth - fmt = fmt.IncrDepth() - - // Make actual call - appendIfaceOrRValue(fmt, i) -} - -// appendIfaceOrReflectValue will attempt to append as interface, falling back to reflection -func appendIfaceOrRValue(fmt format, i interface{}) { - if !appendIface(fmt, i) { - appendRValue(fmt, reflect.ValueOf(i)) - } -} - -// appendValueOrIfaceNext performs appendRValueOrIface checking + incr depth -func appendRValueOrIfaceNext(fmt format, v reflect.Value) { - // Check we haven't hit max - if fmt.AtMaxDepth() { - fmt.buf.WriteString("...") - return - } - - // Incr the depth - fmt = fmt.IncrDepth() - - // Make actual call - appendRValueOrIface(fmt, v) -} - -// appendRValueOrIface will attempt to interface the reflect.Value, falling back to using this directly -func appendRValueOrIface(fmt format, v reflect.Value) { - if !v.CanInterface() || !appendIface(fmt, v.Interface()) { - appendRValue(fmt, v) - } -} - -// appendIface parses and writes a formatted interface value to buf -func appendIface(fmt format, i interface{}) bool { - switch i := i.(type) { - case nil: - fmt.buf.WriteString(`nil`) - case byte: - appendByte(fmt, i) - case []byte: - appendBytes(fmt, i) - case string: - appendString(fmt, i) - case []string: - appendStringSlice(fmt, i) - case int: - appendInt(fmt, int64(i)) - case int8: - appendInt(fmt, int64(i)) - case int16: - appendInt(fmt, int64(i)) - case int32: - appendInt(fmt, int64(i)) - case int64: - appendInt(fmt, i) - case []int: - appendIntSlice(fmt, i) - case uint: - appendUint(fmt, uint64(i)) - case uint16: - appendUint(fmt, uint64(i)) - case uint32: - appendUint(fmt, uint64(i)) - case uint64: - appendUint(fmt, i) - case []uint: - appendUintSlice(fmt, i) - case float32: - appendFloat(fmt, float64(i)) - case float64: - appendFloat(fmt, i) - case []float64: - appendFloatSlice(fmt, i) - case bool: - appendBool(fmt, i) - case []bool: - appendBoolSlice(fmt, i) - case time.Time: - appendTime(fmt, i) - case []time.Time: - appendTimeSlice(fmt, i) - case time.Duration: - appendDuration(fmt, i) - case []time.Duration: - appendDurationSlice(fmt, i) - case complex64: - appendComplex(fmt, complex128(i)) - case complex128: - appendComplex(fmt, i) - case []complex128: - appendComplexSlice(fmt, i) - case map[string]interface{}: - appendIfaceMap(fmt, i) - case error: - if notNil(i) /* use safer nil check */ { - appendString(fmt, i.Error()) - } else { - appendNilIface(fmt, i) - } - case Formattable: - switch { - // catch nil case first - case !notNil(i): - appendNilIface(fmt, i) - - // not permitted - case fmt.Verbose(): - return false - - // use func - default: - fmt.buf.B = i.AppendFormat(fmt.buf.B) - } - case stdfmt.Stringer: - switch { - // catch nil case first - case !notNil(i): - appendNilIface(fmt, i) - - // not permitted - case fmt.Verbose(): - return false - - // use func - default: - appendString(fmt, i.String()) - } - default: - return false // could not handle - } - - return true -} - -// appendReflectValue will safely append a reflected value -func appendRValue(fmt format, v reflect.Value) { - switch v.Kind() { - case reflect.Float32, reflect.Float64: - appendFloat(fmt, v.Float()) - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - appendInt(fmt, v.Int()) - case reflect.Uint8: - appendByte(fmt, uint8(v.Uint())) - case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64: - appendUint(fmt, v.Uint()) - case reflect.Bool: - appendBool(fmt, v.Bool()) - case reflect.Array: - appendArrayType(fmt, v) - case reflect.Slice: - appendSliceType(fmt, v) - case reflect.Map: - appendMapType(fmt, v) - case reflect.Struct: - appendStructType(fmt, v) - case reflect.Ptr: - if v.IsNil() { - appendNilRValue(fmt, v) - } else { - appendRValue(fmt.IncrDerefs(), v.Elem()) - } - case reflect.UnsafePointer: - fmt.buf.WriteString("(unsafe.Pointer)") - fmt.buf.WriteByte('(') - if u := v.Pointer(); u != 0 { - fmt.buf.WriteString("0x") - fmt.buf.B = strconv.AppendUint(fmt.buf.B, uint64(u), 16) - } else { - fmt.buf.WriteString(`nil`) - } - fmt.buf.WriteByte(')') - case reflect.Uintptr: - fmt.buf.WriteString("(uintptr)") - fmt.buf.WriteByte('(') - if u := v.Uint(); u != 0 { - fmt.buf.WriteString("0x") - fmt.buf.B = strconv.AppendUint(fmt.buf.B, u, 16) - } else { - fmt.buf.WriteString(`nil`) - } - fmt.buf.WriteByte(')') - case reflect.String: - appendString(fmt, v.String()) - case reflect.Complex64, reflect.Complex128: - appendComplex(fmt, v.Complex()) - case reflect.Func, reflect.Chan, reflect.Interface: - if v.IsNil() { - appendNilRValue(fmt, v) - } else { - fmt.buf.WriteString(v.String()) - } - default: - fmt.buf.WriteString(v.String()) - } -} - -// appendIfaceMap writes a map of key-value pairs (as a set of fields) to buf -func appendIfaceMap(fmt format, v map[string]interface{}) { - // Catch nil map - if v == nil { - appendNilType(fmt, `map[string]interface{}`) - return - } - - fmt.buf.WriteByte('{') - - // Write map pairs! - for key, value := range v { - appendString(fmt.SetIsKey(true), key) - fmt.buf.WriteByte('=') - appendIfaceOrRValueNext(fmt.SetIsKey(false), value) - fmt.buf.WriteByte(' ') - } - - // Drop last space - if len(v) > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte('}') -} - -// appendArrayType writes an array of unknown type (parsed by reflection) to buf, unlike appendSliceType does NOT catch nil slice -func appendArrayType(fmt format, v reflect.Value) { - // get no. elements - n := v.Len() - - fmt.buf.WriteByte('[') - - // Write values - for i := 0; i < n; i++ { - appendRValueOrIfaceNext(fmt.SetIsKey(false), v.Index(i)) - fmt.buf.WriteByte(',') - } - - // Drop last comma - if n > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte(']') -} - -// appendSliceType writes a slice of unknown type (parsed by reflection) to buf -func appendSliceType(fmt format, v reflect.Value) { - if v.IsNil() { - appendNilRValue(fmt, v) - } else { - appendArrayType(fmt, v) - } -} - -// appendMapType writes a map of unknown types (parsed by reflection) to buf -func appendMapType(fmt format, v reflect.Value) { - // Catch nil map - if v.IsNil() { - appendNilRValue(fmt, v) - return - } - - // Get a map iterator - r := v.MapRange() - n := v.Len() - - fmt.buf.WriteByte('{') - - // Iterate pairs - for r.Next() { - appendRValueOrIfaceNext(fmt.SetIsKey(true), r.Key()) - fmt.buf.WriteByte('=') - appendRValueOrIfaceNext(fmt.SetIsKey(false), r.Value()) - fmt.buf.WriteByte(' ') - } - - // Drop last space - if n > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte('}') -} - -// appendStructType writes a struct (as a set of key-value fields) to buf -func appendStructType(fmt format, v reflect.Value) { - // Get value type & no. fields - t := v.Type() - n := v.NumField() - w := 0 - - // If verbose, append the type - - fmt.buf.WriteByte('{') - - // Iterate fields - for i := 0; i < n; i++ { - vfield := v.Field(i) - name := t.Field(i).Name - - // Append field name - appendString(fmt.SetIsKey(true), name) - fmt.buf.WriteByte('=') - - if !vfield.CanInterface() { - // This is an unexported field - appendRValue(fmt.SetIsKey(false), vfield) - } else { - // This is an exported field! - appendRValueOrIfaceNext(fmt.SetIsKey(false), vfield) - } - - // Iter written count - fmt.buf.WriteByte(' ') - w++ - } - - // Drop last space - if w > 0 { - fmt.buf.Truncate(1) - } - - fmt.buf.WriteByte('}') -} - -// containsSpaceOrTab checks if "s" contains space or tabs -func containsSpaceOrTab(s string) bool { - for _, r := range s { - if r == ' ' || r == '\t' { - return true - } - } - return false -} diff --git a/vendor/codeberg.org/gruf/go-logger/hook.go b/vendor/codeberg.org/gruf/go-logger/hook.go deleted file mode 100644 index 2345ca93b..000000000 --- a/vendor/codeberg.org/gruf/go-logger/hook.go +++ /dev/null @@ -1,13 +0,0 @@ -package logger - -// Hook defines a log Entry modifier -type Hook interface { - Do(*Entry) -} - -// HookFunc is a simple adapter to allow functions to satisfy the Hook interface -type HookFunc func(*Entry) - -func (hook HookFunc) Do(entry *Entry) { - hook(entry) -} diff --git a/vendor/codeberg.org/gruf/go-logger/level.go b/vendor/codeberg.org/gruf/go-logger/level.go deleted file mode 100644 index 0a076c246..000000000 --- a/vendor/codeberg.org/gruf/go-logger/level.go +++ /dev/null @@ -1,38 +0,0 @@ -package logger - -// LEVEL defines a level of logging -type LEVEL uint8 - -// Available levels of logging. -const ( - unset LEVEL = ^LEVEL(0) - DEBUG LEVEL = 5 - INFO LEVEL = 10 - WARN LEVEL = 15 - ERROR LEVEL = 20 - FATAL LEVEL = 25 -) - -var unknownLevel = "unknown" - -// Levels defines a mapping of log LEVELs to formatted level strings -type Levels [^LEVEL(0)]string - -// DefaultLevels returns the default set of log levels -func DefaultLevels() Levels { - return Levels{ - DEBUG: "DEBUG", - INFO: "INFO", - WARN: "WARN", - ERROR: "ERROR", - FATAL: "FATAL", - } -} - -// Get fetches the level string for the provided value, or "unknown" -func (l Levels) Get(lvl LEVEL) string { - if str := l[int(lvl)]; str != "" { - return str - } - return unknownLevel -} diff --git a/vendor/codeberg.org/gruf/go-logger/logger.go b/vendor/codeberg.org/gruf/go-logger/logger.go deleted file mode 100644 index 94d3ab8ca..000000000 --- a/vendor/codeberg.org/gruf/go-logger/logger.go +++ /dev/null @@ -1,187 +0,0 @@ -package logger - -import ( - "context" - "fmt" - "io" - "os" - "sync" - "sync/atomic" - - "codeberg.org/gruf/go-bytes" -) - -type Logger struct { - // Hooks defines a list of hooks which are called before an entry - // is written. This should NOT be modified while the Logger is in use - Hooks []Hook - - // Level is the current log LEVEL, entries at level below the - // currently set level will not be output. This should NOT - // be modified while the Logger is in use - Level LEVEL - - // Timestamp defines whether to automatically append timestamps - // to entries written via Logger convience methods and specifically - // Entry.TimestampIf(). This should NOT be modified while Logger in use - Timestamp bool - - // Format is the log entry LogFormat to use. This should NOT - // be modified while the Logger is in use - Format LogFormat - - // BufferSize is the Entry buffer size to use when allocating - // new Entry objects. This should be modified atomically - BufSize int64 - - // Output is the log's output writer. This should NOT be - // modified while the Logger is in use - Output io.Writer - - // entry pool - pool sync.Pool -} - -// New returns a new Logger instance with defaults -func New(out io.Writer) *Logger { - return NewWith(0 /* all */, true, DefaultTextFormat, 512, out) -} - -// NewWith returns a new Logger instance with supplied configuration -func NewWith(lvl LEVEL, timestamp bool, fmt LogFormat, bufsize int64, out io.Writer) *Logger { - // Create new logger object - log := &Logger{ - Level: lvl, - Timestamp: timestamp, - Format: fmt, - BufSize: bufsize, - Output: out, - } - - // Ensure clock running - startClock() - - // Set-up logger Entry pool - log.pool.New = func() interface{} { - return &Entry{ - lvl: unset, - buf: &bytes.Buffer{B: make([]byte, 0, atomic.LoadInt64(&log.BufSize))}, - log: log, - } - } - - return log -} - -// Entry returns a new Entry from the Logger's pool with background context -func (l *Logger) Entry() *Entry { - entry, _ := l.pool.Get().(*Entry) - entry.ctx = context.Background() - return entry -} - -// Debug prints the provided arguments with the debug prefix -func (l *Logger) Debug(a ...interface{}) { - l.Log(DEBUG, a...) -} - -// Debugf prints the provided format string and arguments with the debug prefix -func (l *Logger) Debugf(s string, a ...interface{}) { - l.Logf(DEBUG, s, a...) -} - -// Info prints the provided arguments with the info prefix -func (l *Logger) Info(a ...interface{}) { - l.Log(INFO, a...) -} - -// Infof prints the provided format string and arguments with the info prefix -func (l *Logger) Infof(s string, a ...interface{}) { - l.Logf(INFO, s, a...) -} - -// Warn prints the provided arguments with the warn prefix -func (l *Logger) Warn(a ...interface{}) { - l.Log(WARN, a...) -} - -// Warnf prints the provided format string and arguments with the warn prefix -func (l *Logger) Warnf(s string, a ...interface{}) { - l.Logf(WARN, s, a...) -} - -// Error prints the provided arguments with the error prefix -func (l *Logger) Error(a ...interface{}) { - l.Log(ERROR, a...) -} - -// Errorf prints the provided format string and arguments with the error prefix -func (l *Logger) Errorf(s string, a ...interface{}) { - l.Logf(ERROR, s, a...) -} - -// Fatal prints provided arguments with the fatal prefix before exiting the program -// with os.Exit(1) -func (l *Logger) Fatal(a ...interface{}) { - defer os.Exit(1) - l.Log(FATAL, a...) -} - -// Fatalf prints provided the provided format string and arguments with the fatal prefix -// before exiting the program with os.Exit(1) -func (l *Logger) Fatalf(s string, a ...interface{}) { - defer os.Exit(1) - l.Logf(FATAL, s, a...) -} - -// Log prints the provided arguments at the supplied log level -func (l *Logger) Log(lvl LEVEL, a ...interface{}) { - if lvl >= l.Level { - l.Entry().TimestampIf().Level(lvl).Hooks().Msg(a...) - } -} - -// Logf prints the provided format string and arguments at the supplied log level -func (l *Logger) Logf(lvl LEVEL, s string, a ...interface{}) { - if lvl >= l.Level { - l.Entry().TimestampIf().Level(lvl).Hooks().Msgf(s, a...) - } -} - -// LogFields prints the provided fields formatted as key-value pairs at the supplied log level -func (l *Logger) LogFields(lvl LEVEL, fields map[string]interface{}) { - if lvl >= l.Level { - l.Entry().TimestampIf().Level(lvl).Fields(fields).Hooks().Send() - } -} - -// LogValues prints the provided values formatted as-so at the supplied log level -func (l *Logger) LogValues(lvl LEVEL, a ...interface{}) { - if lvl >= l.Level { - l.Entry().TimestampIf().Level(lvl).Values(a...).Hooks().Send() - } -} - -// Print simply prints provided arguments -func (l *Logger) Print(a ...interface{}) { - e := l.Entry().TimestampIf() - fmt.Fprint(e.buf, a...) - e.Send() -} - -// Printf simply prints provided the provided format string and arguments -func (l *Logger) Printf(s string, a ...interface{}) { - e := l.Entry().TimestampIf() - fmt.Fprintf(e.buf, s, a...) - e.Send() -} - -// PrintFields prints the provided fields formatted as key-value pairs -func (l *Logger) PrintFields(fields map[string]interface{}) { - l.Entry().TimestampIf().Fields(fields).Send() -} - -// PrintValues prints the provided values formatted as-so -func (l *Logger) PrintValues(a ...interface{}) { - l.Entry().TimestampIf().Values(a...).Send() -} diff --git a/vendor/codeberg.org/gruf/go-logger/writer.go b/vendor/codeberg.org/gruf/go-logger/writer.go deleted file mode 100644 index 72321f518..000000000 --- a/vendor/codeberg.org/gruf/go-logger/writer.go +++ /dev/null @@ -1,29 +0,0 @@ -package logger - -import ( - "io" - "io/ioutil" - "sync" -) - -// AddSafety wraps an io.Writer to provide mutex locking protection -func AddSafety(w io.Writer) io.Writer { - if w == nil { - w = ioutil.Discard - } else if sw, ok := w.(*safeWriter); ok { - return sw - } - return &safeWriter{wr: w} -} - -// safeWriter wraps an io.Writer to provide mutex locking on write -type safeWriter struct { - wr io.Writer - mu sync.Mutex -} - -func (w *safeWriter) Write(b []byte) (int, error) { - w.mu.Lock() - defer w.mu.Unlock() - return w.wr.Write(b) -} diff --git a/vendor/codeberg.org/gruf/go-store/kv/iterator.go b/vendor/codeberg.org/gruf/go-store/kv/iterator.go index d3999273f..ddaaf60cf 100644 --- a/vendor/codeberg.org/gruf/go-store/kv/iterator.go +++ b/vendor/codeberg.org/gruf/go-store/kv/iterator.go @@ -60,5 +60,5 @@ func (i *KVIterator) Value() ([]byte, error) { } // Attempt to fetch from store - return i.store.get(i.key) + return i.store.get(i.store.mutexMap.RLock, i.key) } diff --git a/vendor/codeberg.org/gruf/go-store/kv/state.go b/vendor/codeberg.org/gruf/go-store/kv/state.go index 20a3e951d..330928bce 100644 --- a/vendor/codeberg.org/gruf/go-store/kv/state.go +++ b/vendor/codeberg.org/gruf/go-store/kv/state.go @@ -30,7 +30,7 @@ func (st *StateRO) Get(key string) ([]byte, error) { } // Pass request to store - return st.store.get(key) + return st.store.get(st.store.mutexMap.RLock, key) } func (st *StateRO) GetStream(key string) (io.ReadCloser, error) { @@ -44,7 +44,7 @@ func (st *StateRO) GetStream(key string) (io.ReadCloser, error) { } // Pass request to store - return st.store.getStream(key) + return st.store.getStream(st.store.mutexMap.RLock, key) } func (st *StateRO) Has(key string) (bool, error) { @@ -58,7 +58,7 @@ func (st *StateRO) Has(key string) (bool, error) { } // Pass request to store - return st.store.has(key) + return st.store.has(st.store.mutexMap.RLock, key) } func (st *StateRO) Release() { @@ -94,7 +94,7 @@ func (st *StateRW) Get(key string) ([]byte, error) { } // Pass request to store - return st.store.get(key) + return st.store.get(st.store.mutexMap.RLock, key) } func (st *StateRW) GetStream(key string) (io.ReadCloser, error) { @@ -108,7 +108,7 @@ func (st *StateRW) GetStream(key string) (io.ReadCloser, error) { } // Pass request to store - return st.store.getStream(key) + return st.store.getStream(st.store.mutexMap.RLock, key) } func (st *StateRW) Put(key string, value []byte) error { @@ -122,7 +122,7 @@ func (st *StateRW) Put(key string, value []byte) error { } // Pass request to store - return st.store.put(key, value) + return st.store.put(st.store.mutexMap.Lock, key, value) } func (st *StateRW) PutStream(key string, r io.Reader) error { @@ -136,7 +136,7 @@ func (st *StateRW) PutStream(key string, r io.Reader) error { } // Pass request to store - return st.store.putStream(key, r) + return st.store.putStream(st.store.mutexMap.Lock, key, r) } func (st *StateRW) Has(key string) (bool, error) { @@ -150,7 +150,7 @@ func (st *StateRW) Has(key string) (bool, error) { } // Pass request to store - return st.store.has(key) + return st.store.has(st.store.mutexMap.RLock, key) } func (st *StateRW) Delete(key string) error { @@ -164,7 +164,7 @@ func (st *StateRW) Delete(key string) error { } // Pass request to store - return st.store.delete(key) + return st.store.delete(st.store.mutexMap.Lock, key) } func (st *StateRW) Release() { diff --git a/vendor/codeberg.org/gruf/go-store/kv/store.go b/vendor/codeberg.org/gruf/go-store/kv/store.go index 34fe91987..4c3a31140 100644 --- a/vendor/codeberg.org/gruf/go-store/kv/store.go +++ b/vendor/codeberg.org/gruf/go-store/kv/store.go @@ -53,19 +53,30 @@ func OpenStorage(storage storage.Storage) (*KVStore, error) { }, nil } -// Get fetches the bytes for supplied key in the store -func (st *KVStore) Get(key string) ([]byte, error) { - // Acquire store read lock +// RLock acquires a read-lock on supplied key, returning unlock function. +func (st *KVStore) RLock(key string) (runlock func()) { st.mutex.RLock() - defer st.mutex.RUnlock() - - // Pass to unprotected fn - return st.get(key) + runlock = st.mutexMap.RLock(key) + st.mutex.RUnlock() + return runlock } -func (st *KVStore) get(key string) ([]byte, error) { +// Lock acquires a write-lock on supplied key, returning unlock function. +func (st *KVStore) Lock(key string) (unlock func()) { + st.mutex.Lock() + unlock = st.mutexMap.Lock(key) + st.mutex.Unlock() + return unlock +} + +// Get fetches the bytes for supplied key in the store +func (st *KVStore) Get(key string) ([]byte, error) { + return st.get(st.RLock, key) +} + +func (st *KVStore) get(rlock func(string) func(), key string) ([]byte, error) { // Acquire read lock for key - runlock := st.mutexMap.RLock(key) + runlock := rlock(key) defer runlock() // Read file bytes @@ -74,17 +85,12 @@ func (st *KVStore) get(key string) ([]byte, error) { // GetStream fetches a ReadCloser for the bytes at the supplied key location in the store func (st *KVStore) GetStream(key string) (io.ReadCloser, error) { - // Acquire store read lock - st.mutex.RLock() - defer st.mutex.RUnlock() - - // Pass to unprotected fn - return st.getStream(key) + return st.getStream(st.RLock, key) } -func (st *KVStore) getStream(key string) (io.ReadCloser, error) { +func (st *KVStore) getStream(rlock func(string) func(), key string) (io.ReadCloser, error) { // Acquire read lock for key - runlock := st.mutexMap.RLock(key) + runlock := rlock(key) // Attempt to open stream for read rd, err := st.storage.ReadStream(key) @@ -99,17 +105,12 @@ func (st *KVStore) getStream(key string) (io.ReadCloser, error) { // Put places the bytes at the supplied key location in the store func (st *KVStore) Put(key string, value []byte) error { - // Acquire store write lock - st.mutex.Lock() - defer st.mutex.Unlock() - - // Pass to unprotected fn - return st.put(key, value) + return st.put(st.Lock, key, value) } -func (st *KVStore) put(key string, value []byte) error { +func (st *KVStore) put(lock func(string) func(), key string, value []byte) error { // Acquire write lock for key - unlock := st.mutexMap.Lock(key) + unlock := lock(key) defer unlock() // Write file bytes @@ -118,17 +119,12 @@ func (st *KVStore) put(key string, value []byte) error { // PutStream writes the bytes from the supplied Reader at the supplied key location in the store func (st *KVStore) PutStream(key string, r io.Reader) error { - // Acquire store write lock - st.mutex.Lock() - defer st.mutex.Unlock() - - // Pass to unprotected fn - return st.putStream(key, r) + return st.putStream(st.Lock, key, r) } -func (st *KVStore) putStream(key string, r io.Reader) error { +func (st *KVStore) putStream(lock func(string) func(), key string, r io.Reader) error { // Acquire write lock for key - unlock := st.mutexMap.Lock(key) + unlock := lock(key) defer unlock() // Write file stream @@ -137,17 +133,12 @@ func (st *KVStore) putStream(key string, r io.Reader) error { // Has checks whether the supplied key exists in the store func (st *KVStore) Has(key string) (bool, error) { - // Acquire store read lock - st.mutex.RLock() - defer st.mutex.RUnlock() - - // Pass to unprotected fn - return st.has(key) + return st.has(st.RLock, key) } -func (st *KVStore) has(key string) (bool, error) { +func (st *KVStore) has(rlock func(string) func(), key string) (bool, error) { // Acquire read lock for key - runlock := st.mutexMap.RLock(key) + runlock := rlock(key) defer runlock() // Stat file on disk @@ -156,17 +147,12 @@ func (st *KVStore) has(key string) (bool, error) { // Delete removes the supplied key-value pair from the store func (st *KVStore) Delete(key string) error { - // Acquire store write lock - st.mutex.Lock() - defer st.mutex.Unlock() - - // Pass to unprotected fn - return st.delete(key) + return st.delete(st.Lock, key) } -func (st *KVStore) delete(key string) error { +func (st *KVStore) delete(lock func(string) func(), key string) error { // Acquire write lock for key - unlock := st.mutexMap.Lock(key) + unlock := lock(key) defer unlock() // Remove file from disk diff --git a/vendor/codeberg.org/gruf/go-store/storage/block.go b/vendor/codeberg.org/gruf/go-store/storage/block.go index 9a8c4dc7d..bc35b07ac 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/block.go +++ b/vendor/codeberg.org/gruf/go-store/storage/block.go @@ -1,7 +1,6 @@ package storage import ( - "crypto/sha256" "io" "io/fs" "os" @@ -14,6 +13,7 @@ import ( "codeberg.org/gruf/go-hashenc" "codeberg.org/gruf/go-pools" "codeberg.org/gruf/go-store/util" + "github.com/zeebo/blake3" ) var ( @@ -77,7 +77,7 @@ func getBlockConfig(cfg *BlockConfig) BlockConfig { // BlockStorage is a Storage implementation that stores input data as chunks on // a filesystem. Each value is chunked into blocks of configured size and these -// blocks are stored with name equal to their base64-encoded SHA256 hash-sum. A +// blocks are stored with name equal to their base64-encoded BLAKE3 hash-sum. A // "node" file is finally created containing an array of hashes contained within // this value type BlockStorage struct { @@ -87,6 +87,7 @@ type BlockStorage struct { config BlockConfig // cfg is the supplied configuration for this store hashPool sync.Pool // hashPool is this store's hashEncoder pool bufpool pools.BufferPool // bufpool is this store's bytes.Buffer pool + lock *LockableFile // lock is the opened lockfile for this storage instance // NOTE: // BlockStorage does not need to lock each of the underlying block files @@ -138,6 +139,14 @@ func OpenBlock(path string, cfg *BlockConfig) (*BlockStorage, error) { return nil, errPathIsFile } + // Open and acquire storage lock for path + lock, err := OpenLock(pb.Join(path, LockFile)) + if err != nil { + return nil, err + } else if err := lock.Lock(); err != nil { + return nil, err + } + // Figure out the largest size for bufpool slices bufSz := encodedHashLen if bufSz < config.BlockSize { @@ -159,6 +168,7 @@ func OpenBlock(path string, cfg *BlockConfig) (*BlockStorage, error) { }, }, bufpool: pools.NewBufferPool(bufSz), + lock: lock, }, nil } @@ -443,11 +453,16 @@ loop: continue loop } - // Write in separate goroutine + // Check if reached EOF + atEOF := (n < buf.Len()) + wg.Add(1) go func() { - // Defer buffer release + signal done + // Perform writes in goroutine + defer func() { + // Defer release + + // signal we're done st.bufpool.Put(buf) wg.Done() }() @@ -460,8 +475,8 @@ loop: } }() - // We reached EOF - if n < buf.Len() { + // Break at end + if atEOF { break loop } } @@ -568,6 +583,12 @@ func (st *BlockStorage) Remove(key string) error { return os.Remove(kpath) } +// Close implements Storage.Close() +func (st *BlockStorage) Close() error { + defer st.lock.Close() + return st.lock.Unlock() +} + // WalkKeys implements Storage.WalkKeys() func (st *BlockStorage) WalkKeys(opts WalkKeysOptions) error { // Acquire path builder @@ -610,7 +631,7 @@ func (st *BlockStorage) blockPathForKey(hash string) string { } // hashSeparator is the separating byte between block hashes -const hashSeparator = byte(':') +const hashSeparator = byte('\n') // node represents the contents of a node file in storage type node struct { @@ -773,24 +794,28 @@ func (r *blockReader) Read(b []byte) (int, error) { } } +var ( + // base64Encoding is our base64 encoding object. + base64Encoding = hashenc.Base64() + + // encodedHashLen is the once-calculated encoded hash-sum length + encodedHashLen = base64Encoding.EncodedLen( + blake3.New().Size(), + ) +) + // hashEncoder is a HashEncoder with built-in encode buffer type hashEncoder struct { henc hashenc.HashEncoder ebuf []byte } -// encodedHashLen is the once-calculated encoded hash-sum length -var encodedHashLen = hashenc.Base64().EncodedLen( - sha256.New().Size(), -) - // newHashEncoder returns a new hashEncoder instance func newHashEncoder() *hashEncoder { - hash := sha256.New() - enc := hashenc.Base64() + hash := blake3.New() return &hashEncoder{ - henc: hashenc.New(hash, enc), - ebuf: make([]byte, enc.EncodedLen(hash.Size())), + henc: hashenc.New(hash, base64Encoding), + ebuf: make([]byte, encodedHashLen), } } diff --git a/vendor/codeberg.org/gruf/go-store/storage/disk.go b/vendor/codeberg.org/gruf/go-store/storage/disk.go index 060d56688..9b5430437 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/disk.go +++ b/vendor/codeberg.org/gruf/go-store/storage/disk.go @@ -71,6 +71,7 @@ type DiskStorage struct { path string // path is the root path of this store bufp pools.BufferPool // bufp is the buffer pool for this DiskStorage config DiskConfig // cfg is the supplied configuration for this store + lock *LockableFile // lock is the opened lockfile for this storage instance } // OpenFile opens a DiskStorage instance for given folder path and configuration @@ -81,13 +82,13 @@ func OpenFile(path string, cfg *DiskConfig) (*DiskStorage, error) { // Clean provided path, ensure ends in '/' (should // be dir, this helps with file path trimming later) - path = pb.Clean(path) + "/" + storePath := pb.Join(path, "store") + "/" // Get checked config config := getDiskConfig(cfg) // Attempt to open dir path - file, err := os.OpenFile(path, defaultFileROFlags, defaultDirPerms) + file, err := os.OpenFile(storePath, defaultFileROFlags, defaultDirPerms) if err != nil { // If not a not-exist error, return if !os.IsNotExist(err) { @@ -95,13 +96,13 @@ func OpenFile(path string, cfg *DiskConfig) (*DiskStorage, error) { } // Attempt to make store path dirs - err = os.MkdirAll(path, defaultDirPerms) + err = os.MkdirAll(storePath, defaultDirPerms) if err != nil { return nil, err } // Reopen dir now it's been created - file, err = os.OpenFile(path, defaultFileROFlags, defaultDirPerms) + file, err = os.OpenFile(storePath, defaultFileROFlags, defaultDirPerms) if err != nil { return nil, err } @@ -116,11 +117,20 @@ func OpenFile(path string, cfg *DiskConfig) (*DiskStorage, error) { return nil, errPathIsFile } + // Open and acquire storage lock for path + lock, err := OpenLock(pb.Join(path, LockFile)) + if err != nil { + return nil, err + } else if err := lock.Lock(); err != nil { + return nil, err + } + // Return new DiskStorage return &DiskStorage{ - path: path, + path: storePath, bufp: pools.NewBufferPool(config.WriteBufSize), config: config, + lock: lock, }, nil } @@ -248,6 +258,12 @@ func (st *DiskStorage) Remove(key string) error { return os.Remove(kpath) } +// Close implements Storage.Close() +func (st *DiskStorage) Close() error { + defer st.lock.Close() + return st.lock.Unlock() +} + // WalkKeys implements Storage.WalkKeys() func (st *DiskStorage) WalkKeys(opts WalkKeysOptions) error { // Acquire path builder @@ -256,8 +272,9 @@ func (st *DiskStorage) WalkKeys(opts WalkKeysOptions) error { // Walk dir for entries return util.WalkDir(pb, st.path, func(kpath string, fsentry fs.DirEntry) { - // Only deal with regular files if fsentry.Type().IsRegular() { + // Only deal with regular files + // Get full item path (without root) kpath = pb.Join(kpath, fsentry.Name())[len(st.path):] diff --git a/vendor/codeberg.org/gruf/go-store/storage/fs.go b/vendor/codeberg.org/gruf/go-store/storage/fs.go index 444cee4b0..ff4c857c3 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/fs.go +++ b/vendor/codeberg.org/gruf/go-store/storage/fs.go @@ -39,7 +39,7 @@ func stat(path string) (bool, error) { return syscall.Stat(path, &stat) }) if err != nil { - if err == syscall.ENOENT { + if err == syscall.ENOENT { //nolint err = nil } return false, err diff --git a/vendor/codeberg.org/gruf/go-store/storage/lock.go b/vendor/codeberg.org/gruf/go-store/storage/lock.go index 3d794cda9..a757830cc 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/lock.go +++ b/vendor/codeberg.org/gruf/go-store/storage/lock.go @@ -7,27 +7,31 @@ import ( "codeberg.org/gruf/go-store/util" ) -type lockableFile struct { +// LockFile is our standard lockfile name. +const LockFile = "store.lock" + +type LockableFile struct { *os.File } -func openLock(path string) (*lockableFile, error) { +// OpenLock opens a lockfile at path. +func OpenLock(path string) (*LockableFile, error) { file, err := open(path, defaultFileLockFlags) if err != nil { return nil, err } - return &lockableFile{file}, nil + return &LockableFile{file}, nil } -func (f *lockableFile) lock() error { +func (f *LockableFile) Lock() error { return f.flock(syscall.LOCK_EX | syscall.LOCK_NB) } -func (f *lockableFile) unlock() error { +func (f *LockableFile) Unlock() error { return f.flock(syscall.LOCK_UN | syscall.LOCK_NB) } -func (f *lockableFile) flock(how int) error { +func (f *LockableFile) flock(how int) error { return util.RetryOnEINTR(func() error { return syscall.Flock(int(f.Fd()), how) }) diff --git a/vendor/codeberg.org/gruf/go-store/storage/memory.go b/vendor/codeberg.org/gruf/go-store/storage/memory.go index be60fa464..7daa4a483 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/memory.go +++ b/vendor/codeberg.org/gruf/go-store/storage/memory.go @@ -2,6 +2,7 @@ package storage import ( "io" + "sync" "codeberg.org/gruf/go-bytes" "codeberg.org/gruf/go-store/util" @@ -10,13 +11,17 @@ import ( // MemoryStorage is a storage implementation that simply stores key-value // pairs in a Go map in-memory. The map is protected by a mutex. type MemoryStorage struct { + ow bool // overwrites fs map[string][]byte + mu sync.Mutex } // OpenMemory opens a new MemoryStorage instance with internal map of 'size'. -func OpenMemory(size int) *MemoryStorage { +func OpenMemory(size int, overwrites bool) *MemoryStorage { return &MemoryStorage{ fs: make(map[string][]byte, size), + mu: sync.Mutex{}, + ow: overwrites, } } @@ -27,19 +32,33 @@ func (st *MemoryStorage) Clean() error { // ReadBytes implements Storage.ReadBytes(). func (st *MemoryStorage) ReadBytes(key string) ([]byte, error) { + // Safely check store + st.mu.Lock() b, ok := st.fs[key] + st.mu.Unlock() + + // Return early if not exist if !ok { return nil, ErrNotFound } + + // Create return copy return bytes.Copy(b), nil } // ReadStream implements Storage.ReadStream(). func (st *MemoryStorage) ReadStream(key string) (io.ReadCloser, error) { + // Safely check store + st.mu.Lock() b, ok := st.fs[key] + st.mu.Unlock() + + // Return early if not exist if !ok { return nil, ErrNotFound } + + // Create io.ReadCloser from 'b' copy b = bytes.Copy(b) r := bytes.NewReader(b) return util.NopReadCloser(r), nil @@ -47,43 +66,73 @@ func (st *MemoryStorage) ReadStream(key string) (io.ReadCloser, error) { // WriteBytes implements Storage.WriteBytes(). func (st *MemoryStorage) WriteBytes(key string, b []byte) error { + // Safely check store + st.mu.Lock() _, ok := st.fs[key] - if ok { + + // Check for already exist + if ok && !st.ow { + st.mu.Unlock() return ErrAlreadyExists } + + // Write + unlock st.fs[key] = bytes.Copy(b) + st.mu.Unlock() return nil } // WriteStream implements Storage.WriteStream(). func (st *MemoryStorage) WriteStream(key string, r io.Reader) error { + // Read all from reader b, err := io.ReadAll(r) if err != nil { return err } + + // Write to storage return st.WriteBytes(key, b) } // Stat implements Storage.Stat(). func (st *MemoryStorage) Stat(key string) (bool, error) { + st.mu.Lock() _, ok := st.fs[key] + st.mu.Unlock() return ok, nil } // Remove implements Storage.Remove(). func (st *MemoryStorage) Remove(key string) error { + // Safely check store + st.mu.Lock() _, ok := st.fs[key] + + // Check in store if !ok { + st.mu.Unlock() return ErrNotFound } + + // Delete + unlock delete(st.fs, key) + st.mu.Unlock() + return nil +} + +// Close implements Storage.Close(). +func (st *MemoryStorage) Close() error { return nil } // WalkKeys implements Storage.WalkKeys(). func (st *MemoryStorage) WalkKeys(opts WalkKeysOptions) error { + // Safely walk storage keys + st.mu.Lock() for key := range st.fs { opts.WalkFn(entry(key)) } + st.mu.Unlock() + return nil } diff --git a/vendor/codeberg.org/gruf/go-store/storage/storage.go b/vendor/codeberg.org/gruf/go-store/storage/storage.go index b160267a4..346aff097 100644 --- a/vendor/codeberg.org/gruf/go-store/storage/storage.go +++ b/vendor/codeberg.org/gruf/go-store/storage/storage.go @@ -19,9 +19,6 @@ func (e entry) Key() string { // Storage defines a means of storing and accessing key value pairs type Storage interface { - // Clean removes unused values and unclutters the storage (e.g. removing empty folders) - Clean() error - // ReadBytes returns the byte value for key in storage ReadBytes(key string) ([]byte, error) @@ -40,6 +37,12 @@ type Storage interface { // Remove attempts to remove the supplied key-value pair from storage Remove(key string) error + // Close will close the storage, releasing any file locks + Close() error + + // Clean removes unused values and unclutters the storage (e.g. removing empty folders) + Clean() error + // WalkKeys walks the keys in the storage WalkKeys(opts WalkKeysOptions) error } diff --git a/vendor/github.com/zeebo/blake3/.gitignore b/vendor/github.com/zeebo/blake3/.gitignore new file mode 100644 index 000000000..c6bfdf2c3 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/.gitignore @@ -0,0 +1,6 @@ +*.pprof +*.test +*.txt +*.out + +/upstream diff --git a/vendor/github.com/zeebo/blake3/LICENSE b/vendor/github.com/zeebo/blake3/LICENSE new file mode 100644 index 000000000..3a63575d3 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/LICENSE @@ -0,0 +1,125 @@ +This work is released into the public domain with CC0 1.0. + +------------------------------------------------------------------------------- + +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. diff --git a/vendor/github.com/zeebo/blake3/Makefile b/vendor/github.com/zeebo/blake3/Makefile new file mode 100644 index 000000000..f98f0f093 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/Makefile @@ -0,0 +1,11 @@ +asm: internal/alg/hash/hash_avx2/impl_amd64.s internal/alg/compress/compress_sse41/impl_amd64.s + +internal/alg/hash/hash_avx2/impl_amd64.s: avo/avx2/*.go + ( cd avo; go run ./avx2 ) > internal/alg/hash/hash_avx2/impl_amd64.s + +internal/alg/compress/compress_sse41/impl_amd64.s: avo/sse41/*.go + ( cd avo; go run ./sse41 ) > internal/alg/compress/compress_sse41/impl_amd64.s + +.PHONY: test +test: + go test -race -bench=. -benchtime=1x diff --git a/vendor/github.com/zeebo/blake3/README.md b/vendor/github.com/zeebo/blake3/README.md new file mode 100644 index 000000000..0a0f2e186 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/README.md @@ -0,0 +1,77 @@ +# BLAKE3 + +
+ +Pure Go implementation of [BLAKE3](https://blake3.io) with AVX2 and SSE4.1 acceleration. + +Special thanks to the excellent [avo](https://github.com/mmcloughlin/avo) making writing vectorized version much easier. + +# Benchmarks + +## Caveats + +This library makes some different design decisions than the upstream Rust crate around internal buffering. Specifically, because it does not target the embedded system space, nor does it support multithreading, it elects to do its own internal buffering. This means that a user does not have to worry about providing large enough buffers to get the best possible performance, but it does worse on smaller input sizes. So some notes: + +- The Rust benchmarks below are all single-threaded to match this Go implementation. +- I make no attempt to get precise measurements (cpu throttling, noisy environment, etc.) so please benchmark on your own systems. +- These benchmarks are run on an i7-6700K which does not support AVX-512, so Rust is limited to use AVX2 at sizes above 8 kib. +- I tried my best to make them benchmark the same thing, but who knows? :smile: + +## Charts + +In this case, both libraries are able to avoid a lot of data copying and will use vectorized instructions to hash as fast as possible, and perform similarly. + +![Large Full Buffer](/assets/large-full-buffer.svg) + +For incremental writes, you must provide the Rust version large enough buffers so that it can use vectorized instructions. This Go library performs consistently regardless of the size being sent into the update function. + +![Incremental](/assets/incremental.svg) + +The downside of internal buffering is most apparent with small sizes as most time is spent initializing the hasher state. In terms of hashing rate, the difference is 3-4x, but in an absolute sense it's ~100ns (see tables below). If you wish to hash a large number of very small strings and you care about those nanoseconds, be sure to use the Reset method to avoid re-initializing the state. + +![Small Full Buffer](/assets/small-full-buffer.svg) + +## Timing Tables + +### Small + +| Size | Full Buffer | Reset | | Full Buffer Rate | Reset Rate | +|--------|-------------|------------|-|------------------|--------------| +| 64 b | `205ns` | `86.5ns` | | `312MB/s` | `740MB/s` | +| 256 b | `364ns` | `250ns` | | `703MB/s` | `1.03GB/s` | +| 512 b | `575ns` | `468ns` | | `892MB/s` | `1.10GB/s` | +| 768 b | `795ns` | `682ns` | | `967MB/s` | `1.13GB/s` | + +### Large + +| Size | Incremental | Full Buffer | Reset | | Incremental Rate | Full Buffer Rate | Reset Rate | +|----------|-------------|-------------|------------|-|------------------|------------------|--------------| +| 1 kib | `1.02µs` | `1.01µs` | `891ns` | | `1.00GB/s` | `1.01GB/s` | `1.15GB/s` | +| 2 kib | `2.11µs` | `2.07µs` | `1.95µs` | | `968MB/s` | `990MB/s` | `1.05GB/s` | +| 4 kib | `2.28µs` | `2.15µs` | `2.05µs` | | `1.80GB/s` | `1.90GB/s` | `2.00GB/s` | +| 8 kib | `2.64µs` | `2.52µs` | `2.44µs` | | `3.11GB/s` | `3.25GB/s` | `3.36GB/s` | +| 16 kib | `4.93µs` | `4.54µs` | `4.48µs` | | `3.33GB/s` | `3.61GB/s` | `3.66GB/s` | +| 32 kib | `9.41µs` | `8.62µs` | `8.54µs` | | `3.48GB/s` | `3.80GB/s` | `3.84GB/s` | +| 64 kib | `18.2µs` | `16.7µs` | `16.6µs` | | `3.59GB/s` | `3.91GB/s` | `3.94GB/s` | +| 128 kib | `36.3µs` | `32.9µs` | `33.1µs` | | `3.61GB/s` | `3.99GB/s` | `3.96GB/s` | +| 256 kib | `72.5µs` | `65.7µs` | `66.0µs` | | `3.62GB/s` | `3.99GB/s` | `3.97GB/s` | +| 512 kib | `145µs` | `131µs` | `132µs` | | `3.60GB/s` | `4.00GB/s` | `3.97GB/s` | +| 1024 kib | `290µs` | `262µs` | `262µs` | | `3.62GB/s` | `4.00GB/s` | `4.00GB/s` | + +### No ASM + +| Size | Incremental | Full Buffer | Reset | | Incremental Rate | Full Buffer Rate | Reset Rate | +|----------|-------------|-------------|------------|-|------------------|------------------|-------------| +| 64 b | `253ns` | `254ns` | `134ns` | | `253MB/s` | `252MB/s` | `478MB/s` | +| 256 b | `553ns` | `557ns` | `441ns` | | `463MB/s` | `459MB/s` | `580MB/s` | +| 512 b | `948ns` | `953ns` | `841ns` | | `540MB/s` | `538MB/s` | `609MB/s` | +| 768 b | `1.38µs` | `1.40µs` | `1.35µs` | | `558MB/s` | `547MB/s` | `570MB/s` | +| 1 kib | `1.77µs` | `1.77µs` | `1.70µs` | | `577MB/s` | `580MB/s` | `602MB/s` | +| | | | | | | | | +| 1024 kib | `880µs` | `883µs` | `878µs` | | `596MB/s` | `595MB/s` | `598MB/s` | + +The speed caps out at around 1 kib, so most rows have been elided from the presentation. diff --git a/vendor/github.com/zeebo/blake3/api.go b/vendor/github.com/zeebo/blake3/api.go new file mode 100644 index 000000000..5de263f08 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/api.go @@ -0,0 +1,166 @@ +// Package blake3 provides an SSE4.1/AVX2 accelerated BLAKE3 implementation. +package blake3 + +import ( + "errors" + + "github.com/zeebo/blake3/internal/consts" + "github.com/zeebo/blake3/internal/utils" +) + +// Hasher is a hash.Hash for BLAKE3. +type Hasher struct { + size int + h hasher +} + +// New returns a new Hasher that has a digest size of 32 bytes. +// +// If you need more or less output bytes than that, use Digest method. +func New() *Hasher { + return &Hasher{ + size: 32, + h: hasher{ + key: consts.IV, + }, + } +} + +// NewKeyed returns a new Hasher that uses the 32 byte input key and has +// a digest size of 32 bytes. +// +// If you need more or less output bytes than that, use the Digest method. +func NewKeyed(key []byte) (*Hasher, error) { + if len(key) != 32 { + return nil, errors.New("invalid key size") + } + + h := &Hasher{ + size: 32, + h: hasher{ + flags: consts.Flag_Keyed, + }, + } + utils.KeyFromBytes(key, &h.h.key) + + return h, nil +} + +// DeriveKey derives a key based on reusable key material of any +// length, in the given context. The key will be stored in out, using +// all of its current length. +// +// Context strings must be hardcoded constants, and the recommended +// format is "[application] [commit timestamp] [purpose]", e.g., +// "example.com 2019-12-25 16:18:03 session tokens v1". +func DeriveKey(context string, material []byte, out []byte) { + h := NewDeriveKey(context) + _, _ = h.Write(material) + _, _ = h.Digest().Read(out) +} + +// NewDeriveKey returns a Hasher that is initialized with the context +// string. See DeriveKey for details. It has a digest size of 32 bytes. +// +// If you need more or less output bytes than that, use the Digest method. +func NewDeriveKey(context string) *Hasher { + // hash the context string and use that instead of IV + h := &Hasher{ + size: 32, + h: hasher{ + key: consts.IV, + flags: consts.Flag_DeriveKeyContext, + }, + } + + var buf [32]byte + _, _ = h.WriteString(context) + _, _ = h.Digest().Read(buf[:]) + + h.Reset() + utils.KeyFromBytes(buf[:], &h.h.key) + h.h.flags = consts.Flag_DeriveKeyMaterial + + return h +} + +// Write implements part of the hash.Hash interface. It never returns an error. +func (h *Hasher) Write(p []byte) (int, error) { + h.h.update(p) + return len(p), nil +} + +// WriteString is like Write but specialized to strings to avoid allocations. +func (h *Hasher) WriteString(p string) (int, error) { + h.h.updateString(p) + return len(p), nil +} + +// Reset implements part of the hash.Hash interface. It causes the Hasher to +// act as if it was newly created. +func (h *Hasher) Reset() { + h.h.reset() +} + +// Clone returns a new Hasher with the same internal state. +// +// Modifying the resulting Hasher will not modify the original Hasher, and vice versa. +func (h *Hasher) Clone() *Hasher { + return &Hasher{size: h.size, h: h.h} +} + +// Size implements part of the hash.Hash interface. It returns the number of +// bytes the hash will output in Sum. +func (h *Hasher) Size() int { + return h.size +} + +// BlockSize implements part of the hash.Hash interface. It returns the most +// natural size to write to the Hasher. +func (h *Hasher) BlockSize() int { + // TODO: is there a downside to picking this large size? + return 8192 +} + +// Sum implements part of the hash.Hash interface. It appends the digest of +// the Hasher to the provided buffer and returns it. +func (h *Hasher) Sum(b []byte) []byte { + if top := len(b) + h.size; top <= cap(b) && top >= len(b) { + h.h.finalize(b[len(b):top]) + return b[:top] + } + + tmp := make([]byte, h.size) + h.h.finalize(tmp) + return append(b, tmp...) +} + +// Digest takes a snapshot of the hash state and returns an object that can +// be used to read and seek through 2^64 bytes of digest output. +func (h *Hasher) Digest() *Digest { + var d Digest + h.h.finalizeDigest(&d) + return &d +} + +// Sum256 returns the first 256 bits of the unkeyed digest of the data. +func Sum256(data []byte) (sum [32]byte) { + out := Sum512(data) + copy(sum[:], out[:32]) + return sum +} + +// Sum512 returns the first 512 bits of the unkeyed digest of the data. +func Sum512(data []byte) (sum [64]byte) { + if len(data) <= consts.ChunkLen { + var d Digest + compressAll(&d, data, 0, consts.IV) + _, _ = d.Read(sum[:]) + return sum + } else { + h := hasher{key: consts.IV} + h.update(data) + h.finalize(sum[:]) + return sum + } +} diff --git a/vendor/github.com/zeebo/blake3/blake3.go b/vendor/github.com/zeebo/blake3/blake3.go new file mode 100644 index 000000000..98dedcabe --- /dev/null +++ b/vendor/github.com/zeebo/blake3/blake3.go @@ -0,0 +1,285 @@ +package blake3 + +import ( + "math/bits" + "unsafe" + + "github.com/zeebo/blake3/internal/alg" + "github.com/zeebo/blake3/internal/consts" + "github.com/zeebo/blake3/internal/utils" +) + +// +// hasher contains state for a blake3 hash +// + +type hasher struct { + len uint64 + chunks uint64 + flags uint32 + key [8]uint32 + stack cvstack + buf [8192]byte +} + +func (a *hasher) reset() { + a.len = 0 + a.chunks = 0 + a.stack.occ = 0 + a.stack.lvls = [8]uint8{} + a.stack.bufn = 0 +} + +func (a *hasher) update(buf []byte) { + // relies on the first two words of a string being the same as a slice + a.updateString(*(*string)(unsafe.Pointer(&buf))) +} + +func (a *hasher) updateString(buf string) { + var input *[8192]byte + + for len(buf) > 0 { + if a.len == 0 && len(buf) > 8192 { + // relies on the data pointer being the first word in the string header + input = (*[8192]byte)(*(*unsafe.Pointer)(unsafe.Pointer(&buf))) + buf = buf[8192:] + } else if a.len < 8192 { + n := copy(a.buf[a.len:], buf) + a.len += uint64(n) + buf = buf[n:] + continue + } else { + input = &a.buf + } + + a.consume(input) + a.len = 0 + a.chunks += 8 + } +} + +func (a *hasher) consume(input *[8192]byte) { + var out chainVector + var chain [8]uint32 + alg.HashF(input, 8192, a.chunks, a.flags, &a.key, &out, &chain) + a.stack.pushN(0, &out, 8, a.flags, &a.key) +} + +func (a *hasher) finalize(p []byte) { + var d Digest + a.finalizeDigest(&d) + _, _ = d.Read(p) +} + +func (a *hasher) finalizeDigest(d *Digest) { + if a.chunks == 0 && a.len <= consts.ChunkLen { + compressAll(d, a.buf[:a.len], a.flags, a.key) + return + } + + d.chain = a.key + d.flags = a.flags | consts.Flag_ChunkEnd + + if a.len > 64 { + var buf chainVector + alg.HashF(&a.buf, a.len, a.chunks, a.flags, &a.key, &buf, &d.chain) + + if a.len > consts.ChunkLen { + complete := (a.len - 1) / consts.ChunkLen + a.stack.pushN(0, &buf, int(complete), a.flags, &a.key) + a.chunks += complete + a.len = uint64(copy(a.buf[:], a.buf[complete*consts.ChunkLen:a.len])) + } + } + + if a.len <= 64 { + d.flags |= consts.Flag_ChunkStart + } + + d.counter = a.chunks + d.blen = uint32(a.len) % 64 + + base := a.len / 64 * 64 + if a.len > 0 && d.blen == 0 { + d.blen = 64 + base -= 64 + } + + if consts.IsLittleEndian { + copy((*[64]byte)(unsafe.Pointer(&d.block[0]))[:], a.buf[base:a.len]) + } else { + var tmp [64]byte + copy(tmp[:], a.buf[base:a.len]) + utils.BytesToWords(&tmp, &d.block) + } + + for a.stack.bufn > 0 { + a.stack.flush(a.flags, &a.key) + } + + var tmp [16]uint32 + for occ := a.stack.occ; occ != 0; occ &= occ - 1 { + col := uint(bits.TrailingZeros64(occ)) % 64 + + alg.Compress(&d.chain, &d.block, d.counter, d.blen, d.flags, &tmp) + + *(*[8]uint32)(unsafe.Pointer(&d.block[0])) = a.stack.stack[col] + *(*[8]uint32)(unsafe.Pointer(&d.block[8])) = *(*[8]uint32)(unsafe.Pointer(&tmp[0])) + + if occ == a.stack.occ { + d.chain = a.key + d.counter = 0 + d.blen = consts.BlockLen + d.flags = a.flags | consts.Flag_Parent + } + } + + d.flags |= consts.Flag_Root +} + +// +// chain value stack +// + +type chainVector = [64]uint32 + +type cvstack struct { + occ uint64 // which levels in stack are occupied + lvls [8]uint8 // what level the buf input was in + bufn int // how many pairs are loaded into buf + buf [2]chainVector + stack [64][8]uint32 +} + +func (a *cvstack) pushN(l uint8, cv *chainVector, n int, flags uint32, key *[8]uint32) { + for i := 0; i < n; i++ { + a.pushL(l, cv, i) + for a.bufn == 8 { + a.flush(flags, key) + } + } +} + +func (a *cvstack) pushL(l uint8, cv *chainVector, n int) { + bit := uint64(1) << (l & 63) + if a.occ&bit == 0 { + readChain(cv, n, &a.stack[l&63]) + a.occ ^= bit + return + } + + a.lvls[a.bufn&7] = l + writeChain(&a.stack[l&63], &a.buf[0], a.bufn) + copyChain(cv, n, &a.buf[1], a.bufn) + a.bufn++ + a.occ ^= bit +} + +func (a *cvstack) flush(flags uint32, key *[8]uint32) { + var out chainVector + alg.HashP(&a.buf[0], &a.buf[1], flags|consts.Flag_Parent, key, &out, a.bufn) + + bufn, lvls := a.bufn, a.lvls + a.bufn, a.lvls = 0, [8]uint8{} + + for i := 0; i < bufn; i++ { + a.pushL(lvls[i]+1, &out, i) + } +} + +// +// helpers to deal with reading/writing transposed values +// + +func copyChain(in *chainVector, icol int, out *chainVector, ocol int) { + type u = uintptr + type p = unsafe.Pointer + type a = *uint32 + + i := p(u(p(in)) + u(icol*4)) + o := p(u(p(out)) + u(ocol*4)) + + *a(p(u(o) + 0*32)) = *a(p(u(i) + 0*32)) + *a(p(u(o) + 1*32)) = *a(p(u(i) + 1*32)) + *a(p(u(o) + 2*32)) = *a(p(u(i) + 2*32)) + *a(p(u(o) + 3*32)) = *a(p(u(i) + 3*32)) + *a(p(u(o) + 4*32)) = *a(p(u(i) + 4*32)) + *a(p(u(o) + 5*32)) = *a(p(u(i) + 5*32)) + *a(p(u(o) + 6*32)) = *a(p(u(i) + 6*32)) + *a(p(u(o) + 7*32)) = *a(p(u(i) + 7*32)) +} + +func readChain(in *chainVector, col int, out *[8]uint32) { + type u = uintptr + type p = unsafe.Pointer + type a = *uint32 + + i := p(u(p(in)) + u(col*4)) + + out[0] = *a(p(u(i) + 0*32)) + out[1] = *a(p(u(i) + 1*32)) + out[2] = *a(p(u(i) + 2*32)) + out[3] = *a(p(u(i) + 3*32)) + out[4] = *a(p(u(i) + 4*32)) + out[5] = *a(p(u(i) + 5*32)) + out[6] = *a(p(u(i) + 6*32)) + out[7] = *a(p(u(i) + 7*32)) +} + +func writeChain(in *[8]uint32, out *chainVector, col int) { + type u = uintptr + type p = unsafe.Pointer + type a = *uint32 + + o := p(u(p(out)) + u(col*4)) + + *a(p(u(o) + 0*32)) = in[0] + *a(p(u(o) + 1*32)) = in[1] + *a(p(u(o) + 2*32)) = in[2] + *a(p(u(o) + 3*32)) = in[3] + *a(p(u(o) + 4*32)) = in[4] + *a(p(u(o) + 5*32)) = in[5] + *a(p(u(o) + 6*32)) = in[6] + *a(p(u(o) + 7*32)) = in[7] +} + +// +// compress <= chunkLen bytes in one shot +// + +func compressAll(d *Digest, in []byte, flags uint32, key [8]uint32) { + var compressed [16]uint32 + + d.chain = key + d.flags = flags | consts.Flag_ChunkStart + + for len(in) > 64 { + buf := (*[64]byte)(unsafe.Pointer(&in[0])) + + var block *[16]uint32 + if consts.IsLittleEndian { + block = (*[16]uint32)(unsafe.Pointer(buf)) + } else { + block = &d.block + utils.BytesToWords(buf, block) + } + + alg.Compress(&d.chain, block, 0, consts.BlockLen, d.flags, &compressed) + + d.chain = *(*[8]uint32)(unsafe.Pointer(&compressed[0])) + d.flags &^= consts.Flag_ChunkStart + + in = in[64:] + } + + if consts.IsLittleEndian { + copy((*[64]byte)(unsafe.Pointer(&d.block[0]))[:], in) + } else { + var tmp [64]byte + copy(tmp[:], in) + utils.BytesToWords(&tmp, &d.block) + } + + d.blen = uint32(len(in)) + d.flags |= consts.Flag_ChunkEnd | consts.Flag_Root +} diff --git a/vendor/github.com/zeebo/blake3/digest.go b/vendor/github.com/zeebo/blake3/digest.go new file mode 100644 index 000000000..58365d5ab --- /dev/null +++ b/vendor/github.com/zeebo/blake3/digest.go @@ -0,0 +1,100 @@ +package blake3 + +import ( + "fmt" + "io" + "unsafe" + + "github.com/zeebo/blake3/internal/alg" + "github.com/zeebo/blake3/internal/consts" + "github.com/zeebo/blake3/internal/utils" +) + +// Digest captures the state of a Hasher allowing reading and seeking through +// the output stream. +type Digest struct { + counter uint64 + chain [8]uint32 + block [16]uint32 + blen uint32 + flags uint32 + buf [16]uint32 + bufn int +} + +// Read reads data frm the hasher into out. It always fills the entire buffer and +// never errors. The stream will wrap around when reading past 2^64 bytes. +func (d *Digest) Read(p []byte) (n int, err error) { + n = len(p) + + if d.bufn > 0 { + n := d.slowCopy(p) + p = p[n:] + d.bufn -= n + } + + for len(p) >= 64 { + d.fillBuf() + + if consts.IsLittleEndian { + *(*[64]byte)(unsafe.Pointer(&p[0])) = *(*[64]byte)(unsafe.Pointer(&d.buf[0])) + } else { + utils.WordsToBytes(&d.buf, p) + } + + p = p[64:] + d.bufn = 0 + } + + if len(p) == 0 { + return n, nil + } + + d.fillBuf() + d.bufn -= d.slowCopy(p) + + return n, nil +} + +// Seek sets the position to the provided location. Only SeekStart and +// SeekCurrent are allowed. +func (d *Digest) Seek(offset int64, whence int) (int64, error) { + switch whence { + case io.SeekStart: + case io.SeekEnd: + return 0, fmt.Errorf("seek from end not supported") + case io.SeekCurrent: + offset += int64(consts.BlockLen*d.counter) - int64(d.bufn) + default: + return 0, fmt.Errorf("invalid whence: %d", whence) + } + if offset < 0 { + return 0, fmt.Errorf("seek before start") + } + d.setPosition(uint64(offset)) + return offset, nil +} + +func (d *Digest) setPosition(pos uint64) { + d.counter = pos / consts.BlockLen + d.fillBuf() + d.bufn -= int(pos % consts.BlockLen) +} + +func (d *Digest) slowCopy(p []byte) (n int) { + off := uint(consts.BlockLen-d.bufn) % consts.BlockLen + if consts.IsLittleEndian { + n = copy(p, (*[consts.BlockLen]byte)(unsafe.Pointer(&d.buf[0]))[off:]) + } else { + var tmp [consts.BlockLen]byte + utils.WordsToBytes(&d.buf, tmp[:]) + n = copy(p, tmp[off:]) + } + return n +} + +func (d *Digest) fillBuf() { + alg.Compress(&d.chain, &d.block, d.counter, d.blen, d.flags, &d.buf) + d.counter++ + d.bufn = consts.BlockLen +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/alg.go b/vendor/github.com/zeebo/blake3/internal/alg/alg.go new file mode 100644 index 000000000..239fdec5b --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/alg.go @@ -0,0 +1,18 @@ +package alg + +import ( + "github.com/zeebo/blake3/internal/alg/compress" + "github.com/zeebo/blake3/internal/alg/hash" +) + +func HashF(input *[8192]byte, length, counter uint64, flags uint32, key *[8]uint32, out *[64]uint32, chain *[8]uint32) { + hash.HashF(input, length, counter, flags, key, out, chain) +} + +func HashP(left, right *[64]uint32, flags uint32, key *[8]uint32, out *[64]uint32, n int) { + hash.HashP(left, right, flags, key, out, n) +} + +func Compress(chain *[8]uint32, block *[16]uint32, counter uint64, blen uint32, flags uint32, out *[16]uint32) { + compress.Compress(chain, block, counter, blen, flags, out) +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/compress/compress.go b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress.go new file mode 100644 index 000000000..0b2685408 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress.go @@ -0,0 +1,15 @@ +package compress + +import ( + "github.com/zeebo/blake3/internal/alg/compress/compress_pure" + "github.com/zeebo/blake3/internal/alg/compress/compress_sse41" + "github.com/zeebo/blake3/internal/consts" +) + +func Compress(chain *[8]uint32, block *[16]uint32, counter uint64, blen uint32, flags uint32, out *[16]uint32) { + if consts.HasSSE41 { + compress_sse41.Compress(chain, block, counter, blen, flags, out) + } else { + compress_pure.Compress(chain, block, counter, blen, flags, out) + } +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_pure/compress.go b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_pure/compress.go new file mode 100644 index 000000000..66ea1fb75 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_pure/compress.go @@ -0,0 +1,135 @@ +package compress_pure + +import ( + "math/bits" + + "github.com/zeebo/blake3/internal/consts" +) + +func Compress( + chain *[8]uint32, + block *[16]uint32, + counter uint64, + blen uint32, + flags uint32, + out *[16]uint32, +) { + + *out = [16]uint32{ + chain[0], chain[1], chain[2], chain[3], + chain[4], chain[5], chain[6], chain[7], + consts.IV0, consts.IV1, consts.IV2, consts.IV3, + uint32(counter), uint32(counter >> 32), blen, flags, + } + + rcompress(out, block) +} + +func g(a, b, c, d, mx, my uint32) (uint32, uint32, uint32, uint32) { + a += b + mx + d = bits.RotateLeft32(d^a, -16) + c += d + b = bits.RotateLeft32(b^c, -12) + a += b + my + d = bits.RotateLeft32(d^a, -8) + c += d + b = bits.RotateLeft32(b^c, -7) + return a, b, c, d +} + +func rcompress(s *[16]uint32, m *[16]uint32) { + const ( + a = 10 + b = 11 + c = 12 + d = 13 + e = 14 + f = 15 + ) + + s0, s1, s2, s3 := s[0+0], s[0+1], s[0+2], s[0+3] + s4, s5, s6, s7 := s[0+4], s[0+5], s[0+6], s[0+7] + s8, s9, sa, sb := s[8+0], s[8+1], s[8+2], s[8+3] + sc, sd, se, sf := s[8+4], s[8+5], s[8+6], s[8+7] + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[0], m[1]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[2], m[3]) + s2, s6, sa, se = g(s2, s6, sa, se, m[4], m[5]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[6], m[7]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[8], m[9]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[a], m[b]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[c], m[d]) + s3, s4, s9, se = g(s3, s4, s9, se, m[e], m[f]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[2], m[6]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[3], m[a]) + s2, s6, sa, se = g(s2, s6, sa, se, m[7], m[0]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[4], m[d]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[1], m[b]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[c], m[5]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[9], m[e]) + s3, s4, s9, se = g(s3, s4, s9, se, m[f], m[8]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[3], m[4]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[a], m[c]) + s2, s6, sa, se = g(s2, s6, sa, se, m[d], m[2]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[7], m[e]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[6], m[5]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[9], m[0]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[b], m[f]) + s3, s4, s9, se = g(s3, s4, s9, se, m[8], m[1]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[a], m[7]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[c], m[9]) + s2, s6, sa, se = g(s2, s6, sa, se, m[e], m[3]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[d], m[f]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[4], m[0]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[b], m[2]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[5], m[8]) + s3, s4, s9, se = g(s3, s4, s9, se, m[1], m[6]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[c], m[d]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[9], m[b]) + s2, s6, sa, se = g(s2, s6, sa, se, m[f], m[a]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[e], m[8]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[7], m[2]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[5], m[3]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[0], m[1]) + s3, s4, s9, se = g(s3, s4, s9, se, m[6], m[4]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[9], m[e]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[b], m[5]) + s2, s6, sa, se = g(s2, s6, sa, se, m[8], m[c]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[f], m[1]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[d], m[3]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[0], m[a]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[2], m[6]) + s3, s4, s9, se = g(s3, s4, s9, se, m[4], m[7]) + + s0, s4, s8, sc = g(s0, s4, s8, sc, m[b], m[f]) + s1, s5, s9, sd = g(s1, s5, s9, sd, m[5], m[0]) + s2, s6, sa, se = g(s2, s6, sa, se, m[1], m[9]) + s3, s7, sb, sf = g(s3, s7, sb, sf, m[8], m[6]) + s0, s5, sa, sf = g(s0, s5, sa, sf, m[e], m[a]) + s1, s6, sb, sc = g(s1, s6, sb, sc, m[2], m[c]) + s2, s7, s8, sd = g(s2, s7, s8, sd, m[3], m[4]) + s3, s4, s9, se = g(s3, s4, s9, se, m[7], m[d]) + + s[8+0] = s8 ^ s[0] + s[8+1] = s9 ^ s[1] + s[8+2] = sa ^ s[2] + s[8+3] = sb ^ s[3] + s[8+4] = sc ^ s[4] + s[8+5] = sd ^ s[5] + s[8+6] = se ^ s[6] + s[8+7] = sf ^ s[7] + + s[0] = s0 ^ s8 + s[1] = s1 ^ s9 + s[2] = s2 ^ sa + s[3] = s3 ^ sb + s[4] = s4 ^ sc + s[5] = s5 ^ sd + s[6] = s6 ^ se + s[7] = s7 ^ sf +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_amd64.s b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_amd64.s new file mode 100644 index 000000000..0fedf0b3a --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_amd64.s @@ -0,0 +1,560 @@ +// Code generated by command: go run compress.go. DO NOT EDIT. + +#include "textflag.h" + +DATA iv<>+0(SB)/4, $0x6a09e667 +DATA iv<>+4(SB)/4, $0xbb67ae85 +DATA iv<>+8(SB)/4, $0x3c6ef372 +DATA iv<>+12(SB)/4, $0xa54ff53a +DATA iv<>+16(SB)/4, $0x510e527f +DATA iv<>+20(SB)/4, $0x9b05688c +DATA iv<>+24(SB)/4, $0x1f83d9ab +DATA iv<>+28(SB)/4, $0x5be0cd19 +GLOBL iv<>(SB), RODATA|NOPTR, $32 + +DATA rot16_shuf<>+0(SB)/1, $0x02 +DATA rot16_shuf<>+1(SB)/1, $0x03 +DATA rot16_shuf<>+2(SB)/1, $0x00 +DATA rot16_shuf<>+3(SB)/1, $0x01 +DATA rot16_shuf<>+4(SB)/1, $0x06 +DATA rot16_shuf<>+5(SB)/1, $0x07 +DATA rot16_shuf<>+6(SB)/1, $0x04 +DATA rot16_shuf<>+7(SB)/1, $0x05 +DATA rot16_shuf<>+8(SB)/1, $0x0a +DATA rot16_shuf<>+9(SB)/1, $0x0b +DATA rot16_shuf<>+10(SB)/1, $0x08 +DATA rot16_shuf<>+11(SB)/1, $0x09 +DATA rot16_shuf<>+12(SB)/1, $0x0e +DATA rot16_shuf<>+13(SB)/1, $0x0f +DATA rot16_shuf<>+14(SB)/1, $0x0c +DATA rot16_shuf<>+15(SB)/1, $0x0d +DATA rot16_shuf<>+16(SB)/1, $0x12 +DATA rot16_shuf<>+17(SB)/1, $0x13 +DATA rot16_shuf<>+18(SB)/1, $0x10 +DATA rot16_shuf<>+19(SB)/1, $0x11 +DATA rot16_shuf<>+20(SB)/1, $0x16 +DATA rot16_shuf<>+21(SB)/1, $0x17 +DATA rot16_shuf<>+22(SB)/1, $0x14 +DATA rot16_shuf<>+23(SB)/1, $0x15 +DATA rot16_shuf<>+24(SB)/1, $0x1a +DATA rot16_shuf<>+25(SB)/1, $0x1b +DATA rot16_shuf<>+26(SB)/1, $0x18 +DATA rot16_shuf<>+27(SB)/1, $0x19 +DATA rot16_shuf<>+28(SB)/1, $0x1e +DATA rot16_shuf<>+29(SB)/1, $0x1f +DATA rot16_shuf<>+30(SB)/1, $0x1c +DATA rot16_shuf<>+31(SB)/1, $0x1d +GLOBL rot16_shuf<>(SB), RODATA|NOPTR, $32 + +DATA rot8_shuf<>+0(SB)/1, $0x01 +DATA rot8_shuf<>+1(SB)/1, $0x02 +DATA rot8_shuf<>+2(SB)/1, $0x03 +DATA rot8_shuf<>+3(SB)/1, $0x00 +DATA rot8_shuf<>+4(SB)/1, $0x05 +DATA rot8_shuf<>+5(SB)/1, $0x06 +DATA rot8_shuf<>+6(SB)/1, $0x07 +DATA rot8_shuf<>+7(SB)/1, $0x04 +DATA rot8_shuf<>+8(SB)/1, $0x09 +DATA rot8_shuf<>+9(SB)/1, $0x0a +DATA rot8_shuf<>+10(SB)/1, $0x0b +DATA rot8_shuf<>+11(SB)/1, $0x08 +DATA rot8_shuf<>+12(SB)/1, $0x0d +DATA rot8_shuf<>+13(SB)/1, $0x0e +DATA rot8_shuf<>+14(SB)/1, $0x0f +DATA rot8_shuf<>+15(SB)/1, $0x0c +DATA rot8_shuf<>+16(SB)/1, $0x11 +DATA rot8_shuf<>+17(SB)/1, $0x12 +DATA rot8_shuf<>+18(SB)/1, $0x13 +DATA rot8_shuf<>+19(SB)/1, $0x10 +DATA rot8_shuf<>+20(SB)/1, $0x15 +DATA rot8_shuf<>+21(SB)/1, $0x16 +DATA rot8_shuf<>+22(SB)/1, $0x17 +DATA rot8_shuf<>+23(SB)/1, $0x14 +DATA rot8_shuf<>+24(SB)/1, $0x19 +DATA rot8_shuf<>+25(SB)/1, $0x1a +DATA rot8_shuf<>+26(SB)/1, $0x1b +DATA rot8_shuf<>+27(SB)/1, $0x18 +DATA rot8_shuf<>+28(SB)/1, $0x1d +DATA rot8_shuf<>+29(SB)/1, $0x1e +DATA rot8_shuf<>+30(SB)/1, $0x1f +DATA rot8_shuf<>+31(SB)/1, $0x1c +GLOBL rot8_shuf<>(SB), RODATA|NOPTR, $32 + +// func Compress(chain *[8]uint32, block *[16]uint32, counter uint64, blen uint32, flags uint32, out *[16]uint32) +// Requires: SSE, SSE2, SSE4.1, SSSE3 +TEXT ·Compress(SB), NOSPLIT, $0-40 + MOVQ chain+0(FP), AX + MOVQ block+8(FP), CX + MOVQ counter+16(FP), DX + MOVL blen+24(FP), BX + MOVL flags+28(FP), BP + MOVQ out+32(FP), SI + MOVUPS (AX), X0 + MOVUPS 16(AX), X1 + MOVUPS iv<>+0(SB), X2 + PINSRD $0x00, DX, X3 + SHRQ $0x20, DX + PINSRD $0x01, DX, X3 + PINSRD $0x02, BX, X3 + PINSRD $0x03, BP, X3 + MOVUPS (CX), X4 + MOVUPS 16(CX), X5 + MOVUPS 32(CX), X6 + MOVUPS 48(CX), X7 + MOVUPS rot16_shuf<>+0(SB), X8 + MOVUPS rot8_shuf<>+0(SB), X9 + + // round 1 + MOVAPS X4, X10 + SHUFPS $0x88, X5, X10 + PADDD X10, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X11 + PSRLL $0x0c, X1 + PSLLL $0x14, X11 + POR X11, X1 + MOVAPS X4, X4 + SHUFPS $0xdd, X5, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X6, X5 + SHUFPS $0x88, X7, X5 + SHUFPS $0x93, X5, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X11 + PSRLL $0x0c, X1 + PSLLL $0x14, X11 + POR X11, X1 + MOVAPS X6, X6 + SHUFPS $0xdd, X7, X6 + SHUFPS $0x93, X6, X6 + PADDD X6, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x07, X1 + PSLLL $0x19, X7 + POR X7, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 2 + MOVAPS X10, X7 + SHUFPS $0xd6, X4, X7 + SHUFPS $0x39, X7, X7 + PADDD X7, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X11 + PSRLL $0x0c, X1 + PSLLL $0x14, X11 + POR X11, X1 + MOVAPS X5, X11 + SHUFPS $0xfa, X6, X11 + PSHUFD $0x0f, X10, X10 + PBLENDW $0x33, X10, X11 + PADDD X11, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X10 + PSRLL $0x07, X1 + PSLLL $0x19, X10 + POR X10, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X6, X12 + PUNPCKLLQ X4, X12 + PBLENDW $0xc0, X5, X12 + SHUFPS $0xb4, X12, X12 + PADDD X12, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X10 + PSRLL $0x0c, X1 + PSLLL $0x14, X10 + POR X10, X1 + MOVAPS X4, X10 + PUNPCKHLQ X6, X10 + MOVAPS X5, X4 + PUNPCKLLQ X10, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 3 + MOVAPS X7, X5 + SHUFPS $0xd6, X11, X5 + SHUFPS $0x39, X5, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X6 + PSRLL $0x0c, X1 + PSLLL $0x14, X6 + POR X6, X1 + MOVAPS X12, X6 + SHUFPS $0xfa, X4, X6 + PSHUFD $0x0f, X7, X7 + PBLENDW $0x33, X7, X6 + PADDD X6, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x07, X1 + PSLLL $0x19, X7 + POR X7, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X4, X10 + PUNPCKLLQ X11, X10 + PBLENDW $0xc0, X12, X10 + SHUFPS $0xb4, X10, X10 + PADDD X10, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x0c, X1 + PSLLL $0x14, X7 + POR X7, X1 + MOVAPS X11, X7 + PUNPCKHLQ X4, X7 + MOVAPS X12, X4 + PUNPCKLLQ X7, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x07, X1 + PSLLL $0x19, X7 + POR X7, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 4 + MOVAPS X5, X7 + SHUFPS $0xd6, X6, X7 + SHUFPS $0x39, X7, X7 + PADDD X7, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X11 + PSRLL $0x0c, X1 + PSLLL $0x14, X11 + POR X11, X1 + MOVAPS X10, X11 + SHUFPS $0xfa, X4, X11 + PSHUFD $0x0f, X5, X5 + PBLENDW $0x33, X5, X11 + PADDD X11, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X4, X12 + PUNPCKLLQ X6, X12 + PBLENDW $0xc0, X10, X12 + SHUFPS $0xb4, X12, X12 + PADDD X12, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x0c, X1 + PSLLL $0x14, X5 + POR X5, X1 + MOVAPS X6, X5 + PUNPCKHLQ X4, X5 + MOVAPS X10, X4 + PUNPCKLLQ X5, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 5 + MOVAPS X7, X5 + SHUFPS $0xd6, X11, X5 + SHUFPS $0x39, X5, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X6 + PSRLL $0x0c, X1 + PSLLL $0x14, X6 + POR X6, X1 + MOVAPS X12, X6 + SHUFPS $0xfa, X4, X6 + PSHUFD $0x0f, X7, X7 + PBLENDW $0x33, X7, X6 + PADDD X6, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x07, X1 + PSLLL $0x19, X7 + POR X7, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X4, X10 + PUNPCKLLQ X11, X10 + PBLENDW $0xc0, X12, X10 + SHUFPS $0xb4, X10, X10 + PADDD X10, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x0c, X1 + PSLLL $0x14, X7 + POR X7, X1 + MOVAPS X11, X7 + PUNPCKHLQ X4, X7 + MOVAPS X12, X4 + PUNPCKLLQ X7, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X7 + PSRLL $0x07, X1 + PSLLL $0x19, X7 + POR X7, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 6 + MOVAPS X5, X7 + SHUFPS $0xd6, X6, X7 + SHUFPS $0x39, X7, X7 + PADDD X7, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X11 + PSRLL $0x0c, X1 + PSLLL $0x14, X11 + POR X11, X1 + MOVAPS X10, X11 + SHUFPS $0xfa, X4, X11 + PSHUFD $0x0f, X5, X5 + PBLENDW $0x33, X5, X11 + PADDD X11, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X4, X12 + PUNPCKLLQ X6, X12 + PBLENDW $0xc0, X10, X12 + SHUFPS $0xb4, X12, X12 + PADDD X12, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x0c, X1 + PSLLL $0x14, X5 + POR X5, X1 + MOVAPS X6, X5 + PUNPCKHLQ X4, X5 + MOVAPS X10, X4 + PUNPCKLLQ X5, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // round 7 + MOVAPS X7, X5 + SHUFPS $0xd6, X11, X5 + SHUFPS $0x39, X5, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x0c, X1 + PSLLL $0x14, X5 + POR X5, X1 + MOVAPS X12, X5 + SHUFPS $0xfa, X4, X5 + PSHUFD $0x0f, X7, X6 + PBLENDW $0x33, X6, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x07, X1 + PSLLL $0x19, X5 + POR X5, X1 + PSHUFD $0x93, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x39, X2, X2 + MOVAPS X4, X5 + PUNPCKLLQ X11, X5 + PBLENDW $0xc0, X12, X5 + SHUFPS $0xb4, X5, X5 + PADDD X5, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X8, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X5 + PSRLL $0x0c, X1 + PSLLL $0x14, X5 + POR X5, X1 + MOVAPS X11, X6 + PUNPCKHLQ X4, X6 + MOVAPS X12, X4 + PUNPCKLLQ X6, X4 + SHUFPS $0x1e, X4, X4 + PADDD X4, X0 + PADDD X1, X0 + PXOR X0, X3 + PSHUFB X9, X3 + PADDD X3, X2 + PXOR X2, X1 + MOVAPS X1, X4 + PSRLL $0x07, X1 + PSLLL $0x19, X4 + POR X4, X1 + PSHUFD $0x39, X0, X0 + PSHUFD $0x4e, X3, X3 + PSHUFD $0x93, X2, X2 + + // finalize + PXOR X2, X0 + PXOR X3, X1 + MOVUPS (AX), X4 + PXOR X4, X2 + MOVUPS 16(AX), X4 + PXOR X4, X3 + MOVUPS X0, (SI) + MOVUPS X1, 16(SI) + MOVUPS X2, 32(SI) + MOVUPS X3, 48(SI) + RET diff --git a/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_other.go b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_other.go new file mode 100644 index 000000000..cd63e9740 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/impl_other.go @@ -0,0 +1,9 @@ +// +build !amd64 + +package compress_sse41 + +import "github.com/zeebo/blake3/internal/alg/compress/compress_pure" + +func Compress(chain *[8]uint32, block *[16]uint32, counter uint64, blen uint32, flags uint32, out *[16]uint32) { + compress_pure.Compress(chain, block, counter, blen, flags, out) +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/stubs.go b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/stubs.go new file mode 100644 index 000000000..ffd932d3c --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/compress/compress_sse41/stubs.go @@ -0,0 +1,6 @@ +// +build amd64 + +package compress_sse41 + +//go:noescape +func Compress(chain *[8]uint32, block *[16]uint32, counter uint64, blen uint32, flags uint32, out *[16]uint32) diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash.go b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash.go new file mode 100644 index 000000000..ac43abb69 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash.go @@ -0,0 +1,23 @@ +package hash + +import ( + "github.com/zeebo/blake3/internal/alg/hash/hash_avx2" + "github.com/zeebo/blake3/internal/alg/hash/hash_pure" + "github.com/zeebo/blake3/internal/consts" +) + +func HashF(input *[8192]byte, length, counter uint64, flags uint32, key *[8]uint32, out *[64]uint32, chain *[8]uint32) { + if consts.HasAVX2 && length > 2*consts.ChunkLen { + hash_avx2.HashF(input, length, counter, flags, key, out, chain) + } else { + hash_pure.HashF(input, length, counter, flags, key, out, chain) + } +} + +func HashP(left, right *[64]uint32, flags uint32, key *[8]uint32, out *[64]uint32, n int) { + if consts.HasAVX2 && n >= 2 { + hash_avx2.HashP(left, right, flags, key, out, n) + } else { + hash_pure.HashP(left, right, flags, key, out, n) + } +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_amd64.s b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_amd64.s new file mode 100644 index 000000000..d7531664b --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_amd64.s @@ -0,0 +1,2561 @@ +// Code generated by command: go run main.go. DO NOT EDIT. + +#include "textflag.h" + +DATA iv<>+0(SB)/4, $0x6a09e667 +DATA iv<>+4(SB)/4, $0xbb67ae85 +DATA iv<>+8(SB)/4, $0x3c6ef372 +DATA iv<>+12(SB)/4, $0xa54ff53a +DATA iv<>+16(SB)/4, $0x510e527f +DATA iv<>+20(SB)/4, $0x9b05688c +DATA iv<>+24(SB)/4, $0x1f83d9ab +DATA iv<>+28(SB)/4, $0x5be0cd19 +GLOBL iv<>(SB), RODATA|NOPTR, $32 + +DATA rot16_shuf<>+0(SB)/1, $0x02 +DATA rot16_shuf<>+1(SB)/1, $0x03 +DATA rot16_shuf<>+2(SB)/1, $0x00 +DATA rot16_shuf<>+3(SB)/1, $0x01 +DATA rot16_shuf<>+4(SB)/1, $0x06 +DATA rot16_shuf<>+5(SB)/1, $0x07 +DATA rot16_shuf<>+6(SB)/1, $0x04 +DATA rot16_shuf<>+7(SB)/1, $0x05 +DATA rot16_shuf<>+8(SB)/1, $0x0a +DATA rot16_shuf<>+9(SB)/1, $0x0b +DATA rot16_shuf<>+10(SB)/1, $0x08 +DATA rot16_shuf<>+11(SB)/1, $0x09 +DATA rot16_shuf<>+12(SB)/1, $0x0e +DATA rot16_shuf<>+13(SB)/1, $0x0f +DATA rot16_shuf<>+14(SB)/1, $0x0c +DATA rot16_shuf<>+15(SB)/1, $0x0d +DATA rot16_shuf<>+16(SB)/1, $0x12 +DATA rot16_shuf<>+17(SB)/1, $0x13 +DATA rot16_shuf<>+18(SB)/1, $0x10 +DATA rot16_shuf<>+19(SB)/1, $0x11 +DATA rot16_shuf<>+20(SB)/1, $0x16 +DATA rot16_shuf<>+21(SB)/1, $0x17 +DATA rot16_shuf<>+22(SB)/1, $0x14 +DATA rot16_shuf<>+23(SB)/1, $0x15 +DATA rot16_shuf<>+24(SB)/1, $0x1a +DATA rot16_shuf<>+25(SB)/1, $0x1b +DATA rot16_shuf<>+26(SB)/1, $0x18 +DATA rot16_shuf<>+27(SB)/1, $0x19 +DATA rot16_shuf<>+28(SB)/1, $0x1e +DATA rot16_shuf<>+29(SB)/1, $0x1f +DATA rot16_shuf<>+30(SB)/1, $0x1c +DATA rot16_shuf<>+31(SB)/1, $0x1d +GLOBL rot16_shuf<>(SB), RODATA|NOPTR, $32 + +DATA rot8_shuf<>+0(SB)/1, $0x01 +DATA rot8_shuf<>+1(SB)/1, $0x02 +DATA rot8_shuf<>+2(SB)/1, $0x03 +DATA rot8_shuf<>+3(SB)/1, $0x00 +DATA rot8_shuf<>+4(SB)/1, $0x05 +DATA rot8_shuf<>+5(SB)/1, $0x06 +DATA rot8_shuf<>+6(SB)/1, $0x07 +DATA rot8_shuf<>+7(SB)/1, $0x04 +DATA rot8_shuf<>+8(SB)/1, $0x09 +DATA rot8_shuf<>+9(SB)/1, $0x0a +DATA rot8_shuf<>+10(SB)/1, $0x0b +DATA rot8_shuf<>+11(SB)/1, $0x08 +DATA rot8_shuf<>+12(SB)/1, $0x0d +DATA rot8_shuf<>+13(SB)/1, $0x0e +DATA rot8_shuf<>+14(SB)/1, $0x0f +DATA rot8_shuf<>+15(SB)/1, $0x0c +DATA rot8_shuf<>+16(SB)/1, $0x11 +DATA rot8_shuf<>+17(SB)/1, $0x12 +DATA rot8_shuf<>+18(SB)/1, $0x13 +DATA rot8_shuf<>+19(SB)/1, $0x10 +DATA rot8_shuf<>+20(SB)/1, $0x15 +DATA rot8_shuf<>+21(SB)/1, $0x16 +DATA rot8_shuf<>+22(SB)/1, $0x17 +DATA rot8_shuf<>+23(SB)/1, $0x14 +DATA rot8_shuf<>+24(SB)/1, $0x19 +DATA rot8_shuf<>+25(SB)/1, $0x1a +DATA rot8_shuf<>+26(SB)/1, $0x1b +DATA rot8_shuf<>+27(SB)/1, $0x18 +DATA rot8_shuf<>+28(SB)/1, $0x1d +DATA rot8_shuf<>+29(SB)/1, $0x1e +DATA rot8_shuf<>+30(SB)/1, $0x1f +DATA rot8_shuf<>+31(SB)/1, $0x1c +GLOBL rot8_shuf<>(SB), RODATA|NOPTR, $32 + +DATA block_len<>+0(SB)/4, $0x00000040 +DATA block_len<>+4(SB)/4, $0x00000040 +DATA block_len<>+8(SB)/4, $0x00000040 +DATA block_len<>+12(SB)/4, $0x00000040 +DATA block_len<>+16(SB)/4, $0x00000040 +DATA block_len<>+20(SB)/4, $0x00000040 +DATA block_len<>+24(SB)/4, $0x00000040 +DATA block_len<>+28(SB)/4, $0x00000040 +GLOBL block_len<>(SB), RODATA|NOPTR, $32 + +DATA zero<>+0(SB)/4, $0x00000000 +DATA zero<>+4(SB)/4, $0x00000000 +DATA zero<>+8(SB)/4, $0x00000000 +DATA zero<>+12(SB)/4, $0x00000000 +DATA zero<>+16(SB)/4, $0x00000000 +DATA zero<>+20(SB)/4, $0x00000000 +DATA zero<>+24(SB)/4, $0x00000000 +DATA zero<>+28(SB)/4, $0x00000000 +GLOBL zero<>(SB), RODATA|NOPTR, $32 + +DATA counter<>+0(SB)/8, $0x0000000000000000 +DATA counter<>+8(SB)/8, $0x0000000000000001 +DATA counter<>+16(SB)/8, $0x0000000000000002 +DATA counter<>+24(SB)/8, $0x0000000000000003 +DATA counter<>+32(SB)/8, $0x0000000000000004 +DATA counter<>+40(SB)/8, $0x0000000000000005 +DATA counter<>+48(SB)/8, $0x0000000000000006 +DATA counter<>+56(SB)/8, $0x0000000000000007 +GLOBL counter<>(SB), RODATA|NOPTR, $64 + +// func HashF(input *[8192]byte, length uint64, counter uint64, flags uint32, key *[8]uint32, out *[32]uint32, chain *[8]uint32) +// Requires: AVX, AVX2 +TEXT ·HashF(SB), $688-56 + MOVQ input+0(FP), AX + MOVQ length+8(FP), CX + MOVQ counter+16(FP), DX + MOVL flags+24(FP), BX + MOVQ key+32(FP), BP + MOVQ out+40(FP), SI + MOVQ chain+48(FP), DI + + // Allocate local space and align it + LEAQ 31(SP), R10 + MOVQ $0x000000000000001f, R8 + NOTQ R8 + ANDQ R8, R10 + + // Skip if the length is zero + XORQ R8, R8 + XORQ R9, R9 + TESTQ CX, CX + JZ skip_compute + + // Compute complete chunks and blocks + SUBQ $0x01, CX + MOVQ CX, R8 + SHRQ $0x0a, R8 + MOVQ CX, R9 + ANDQ $0x000003c0, R9 + +skip_compute: + // Load some params into the stack (avo improvment?) + MOVL BX, 64(SP) + MOVQ DX, 72(SP) + + // Load IV into vectors + VPBROADCASTD (BP), Y0 + VPBROADCASTD 4(BP), Y1 + VPBROADCASTD 8(BP), Y2 + VPBROADCASTD 12(BP), Y3 + VPBROADCASTD 16(BP), Y4 + VPBROADCASTD 20(BP), Y5 + VPBROADCASTD 24(BP), Y6 + VPBROADCASTD 28(BP), Y7 + + // Build and store counter data on the stack + VPBROADCASTQ 72(SP), Y8 + VPADDQ counter<>+0(SB), Y8, Y8 + VPBROADCASTQ 72(SP), Y9 + VPADDQ counter<>+32(SB), Y9, Y9 + VPUNPCKLDQ Y9, Y8, Y10 + VPUNPCKHDQ Y9, Y8, Y8 + VPUNPCKLDQ Y8, Y10, Y9 + VPUNPCKHDQ Y8, Y10, Y8 + VPERMQ $0xd8, Y9, Y9 + VPERMQ $0xd8, Y8, Y8 + VMOVDQU Y9, 112(SP) + VMOVDQU Y8, 144(SP) + + // Set up block flags and variables for iteration + XORQ CX, CX + ORL $0x01, 64(SP) + +loop: + // Include end flags if last block + CMPQ CX, $0x000003c0 + JNE round_setup + ORL $0x02, 64(SP) + +round_setup: + // Load and transpose message vectors + VMOVDQU (AX)(CX*1), Y8 + VMOVDQU 1024(AX)(CX*1), Y9 + VMOVDQU 2048(AX)(CX*1), Y10 + VMOVDQU 3072(AX)(CX*1), Y11 + VMOVDQU 4096(AX)(CX*1), Y12 + VMOVDQU 5120(AX)(CX*1), Y13 + VMOVDQU 6144(AX)(CX*1), Y14 + VMOVDQU 7168(AX)(CX*1), Y15 + VMOVDQA Y0, (R10) + VPUNPCKLDQ Y9, Y8, Y0 + VPUNPCKHDQ Y9, Y8, Y8 + VPUNPCKLDQ Y11, Y10, Y9 + VPUNPCKHDQ Y11, Y10, Y10 + VPUNPCKLDQ Y13, Y12, Y11 + VPUNPCKHDQ Y13, Y12, Y12 + VPUNPCKLDQ Y15, Y14, Y13 + VPUNPCKHDQ Y15, Y14, Y14 + VPUNPCKLQDQ Y9, Y0, Y15 + VPUNPCKHQDQ Y9, Y0, Y0 + VPUNPCKLQDQ Y10, Y8, Y9 + VPUNPCKHQDQ Y10, Y8, Y8 + VPUNPCKLQDQ Y13, Y11, Y10 + VPUNPCKHQDQ Y13, Y11, Y11 + VPUNPCKLQDQ Y14, Y12, Y13 + VPUNPCKHQDQ Y14, Y12, Y12 + VINSERTI128 $0x01, X10, Y15, Y14 + VPERM2I128 $0x31, Y10, Y15, Y10 + VINSERTI128 $0x01, X11, Y0, Y15 + VPERM2I128 $0x31, Y11, Y0, Y0 + VINSERTI128 $0x01, X13, Y9, Y11 + VPERM2I128 $0x31, Y13, Y9, Y9 + VINSERTI128 $0x01, X12, Y8, Y13 + VPERM2I128 $0x31, Y12, Y8, Y8 + VMOVDQU Y14, 176(SP) + VMOVDQU Y15, 208(SP) + VMOVDQU Y11, 240(SP) + VMOVDQU Y13, 272(SP) + VMOVDQU Y10, 304(SP) + VMOVDQU Y0, 336(SP) + VMOVDQU Y9, 368(SP) + VMOVDQU Y8, 400(SP) + VMOVDQU 32(AX)(CX*1), Y0 + VMOVDQU 1056(AX)(CX*1), Y8 + VMOVDQU 2080(AX)(CX*1), Y9 + VMOVDQU 3104(AX)(CX*1), Y10 + VMOVDQU 4128(AX)(CX*1), Y11 + VMOVDQU 5152(AX)(CX*1), Y12 + VMOVDQU 6176(AX)(CX*1), Y13 + VMOVDQU 7200(AX)(CX*1), Y14 + VPUNPCKLDQ Y8, Y0, Y15 + VPUNPCKHDQ Y8, Y0, Y0 + VPUNPCKLDQ Y10, Y9, Y8 + VPUNPCKHDQ Y10, Y9, Y9 + VPUNPCKLDQ Y12, Y11, Y10 + VPUNPCKHDQ Y12, Y11, Y11 + VPUNPCKLDQ Y14, Y13, Y12 + VPUNPCKHDQ Y14, Y13, Y13 + VPUNPCKLQDQ Y8, Y15, Y14 + VPUNPCKHQDQ Y8, Y15, Y8 + VPUNPCKLQDQ Y9, Y0, Y15 + VPUNPCKHQDQ Y9, Y0, Y0 + VPUNPCKLQDQ Y12, Y10, Y9 + VPUNPCKHQDQ Y12, Y10, Y10 + VPUNPCKLQDQ Y13, Y11, Y12 + VPUNPCKHQDQ Y13, Y11, Y11 + VINSERTI128 $0x01, X9, Y14, Y13 + VPERM2I128 $0x31, Y9, Y14, Y9 + VINSERTI128 $0x01, X10, Y8, Y14 + VPERM2I128 $0x31, Y10, Y8, Y8 + VINSERTI128 $0x01, X12, Y15, Y10 + VPERM2I128 $0x31, Y12, Y15, Y12 + VINSERTI128 $0x01, X11, Y0, Y15 + VPERM2I128 $0x31, Y11, Y0, Y0 + VMOVDQU Y13, 432(SP) + VMOVDQU Y14, 464(SP) + VMOVDQU Y10, 496(SP) + VMOVDQU Y15, 528(SP) + VMOVDQU Y9, 560(SP) + VMOVDQU Y8, 592(SP) + VMOVDQU Y12, 624(SP) + VMOVDQU Y0, 656(SP) + + // Load constants for the round + VMOVDQA (R10), Y0 + VMOVDQU block_len<>+0(SB), Y8 + VPBROADCASTD 64(SP), Y9 + VPBROADCASTD iv<>+0(SB), Y10 + VPBROADCASTD iv<>+4(SB), Y11 + VPBROADCASTD iv<>+8(SB), Y12 + VPBROADCASTD iv<>+12(SB), Y13 + VMOVDQU 112(SP), Y14 + VMOVDQU 144(SP), Y15 + + // Save state for partial chunk if necessary + CMPQ CX, R9 + JNE begin_rounds + VMOVDQU Y0, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, (DI) + VMOVDQU Y1, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 4(DI) + VMOVDQU Y2, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 8(DI) + VMOVDQU Y3, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 12(DI) + VMOVDQU Y4, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 16(DI) + VMOVDQU Y5, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 20(DI) + VMOVDQU Y6, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 24(DI) + VMOVDQU Y7, 80(SP) + MOVL 80(SP)(R8*4), DX + MOVL DX, 28(DI) + +begin_rounds: + // Perform the rounds + // Round 1 + VPADDD 176(SP), Y0, Y0 + VPADDD 240(SP), Y1, Y1 + VPADDD 304(SP), Y2, Y2 + VPADDD 368(SP), Y3, Y3 + VPADDD Y4, Y0, Y0 + VPXOR Y0, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y7, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y4, Y4 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y5, Y5 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y6, Y6 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y7, Y7 + VMOVDQA Y0, (R10) + VPSRLD $0x0c, Y4, Y0 + VPSLLD $0x14, Y4, Y4 + VPOR Y0, Y4, Y0 + VPSRLD $0x0c, Y5, Y4 + VPSLLD $0x14, Y5, Y5 + VPOR Y4, Y5, Y4 + VPSRLD $0x0c, Y6, Y5 + VPSLLD $0x14, Y6, Y6 + VPOR Y5, Y6, Y5 + VPSRLD $0x0c, Y7, Y6 + VPSLLD $0x14, Y7, Y7 + VPOR Y6, Y7, Y6 + VMOVDQA (R10), Y7 + VPADDD 208(SP), Y7, Y7 + VPADDD 272(SP), Y1, Y1 + VPADDD 336(SP), Y2, Y2 + VPADDD 400(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 432(SP), Y7, Y7 + VPADDD 496(SP), Y1, Y1 + VPADDD 560(SP), Y2, Y2 + VPADDD 624(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 464(SP), Y7, Y7 + VPADDD 528(SP), Y1, Y1 + VPADDD 592(SP), Y2, Y2 + VPADDD 656(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 2 + VMOVDQA (R10), Y7 + VPADDD 240(SP), Y7, Y7 + VPADDD 272(SP), Y1, Y1 + VPADDD 400(SP), Y2, Y2 + VPADDD 304(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 368(SP), Y7, Y7 + VPADDD 496(SP), Y1, Y1 + VPADDD 176(SP), Y2, Y2 + VPADDD 592(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 208(SP), Y7, Y7 + VPADDD 560(SP), Y1, Y1 + VPADDD 464(SP), Y2, Y2 + VPADDD 656(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 528(SP), Y7, Y7 + VPADDD 336(SP), Y1, Y1 + VPADDD 624(SP), Y2, Y2 + VPADDD 432(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 3 + VMOVDQA (R10), Y7 + VPADDD 272(SP), Y7, Y7 + VPADDD 496(SP), Y1, Y1 + VPADDD 592(SP), Y2, Y2 + VPADDD 400(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 304(SP), Y7, Y7 + VPADDD 560(SP), Y1, Y1 + VPADDD 240(SP), Y2, Y2 + VPADDD 624(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 368(SP), Y7, Y7 + VPADDD 464(SP), Y1, Y1 + VPADDD 528(SP), Y2, Y2 + VPADDD 432(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 336(SP), Y7, Y7 + VPADDD 176(SP), Y1, Y1 + VPADDD 656(SP), Y2, Y2 + VPADDD 208(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 4 + VMOVDQA (R10), Y7 + VPADDD 496(SP), Y7, Y7 + VPADDD 560(SP), Y1, Y1 + VPADDD 624(SP), Y2, Y2 + VPADDD 592(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 400(SP), Y7, Y7 + VPADDD 464(SP), Y1, Y1 + VPADDD 272(SP), Y2, Y2 + VPADDD 656(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 304(SP), Y7, Y7 + VPADDD 528(SP), Y1, Y1 + VPADDD 336(SP), Y2, Y2 + VPADDD 208(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 176(SP), Y7, Y7 + VPADDD 240(SP), Y1, Y1 + VPADDD 432(SP), Y2, Y2 + VPADDD 368(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 5 + VMOVDQA (R10), Y7 + VPADDD 560(SP), Y7, Y7 + VPADDD 464(SP), Y1, Y1 + VPADDD 656(SP), Y2, Y2 + VPADDD 624(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 592(SP), Y7, Y7 + VPADDD 528(SP), Y1, Y1 + VPADDD 496(SP), Y2, Y2 + VPADDD 432(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 400(SP), Y7, Y7 + VPADDD 336(SP), Y1, Y1 + VPADDD 176(SP), Y2, Y2 + VPADDD 368(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 240(SP), Y7, Y7 + VPADDD 272(SP), Y1, Y1 + VPADDD 208(SP), Y2, Y2 + VPADDD 304(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 6 + VMOVDQA (R10), Y7 + VPADDD 464(SP), Y7, Y7 + VPADDD 528(SP), Y1, Y1 + VPADDD 432(SP), Y2, Y2 + VPADDD 656(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 624(SP), Y7, Y7 + VPADDD 336(SP), Y1, Y1 + VPADDD 560(SP), Y2, Y2 + VPADDD 208(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 592(SP), Y7, Y7 + VPADDD 176(SP), Y1, Y1 + VPADDD 240(SP), Y2, Y2 + VPADDD 304(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 272(SP), Y7, Y7 + VPADDD 496(SP), Y1, Y1 + VPADDD 368(SP), Y2, Y2 + VPADDD 400(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Round 7 + VMOVDQA (R10), Y7 + VPADDD 528(SP), Y7, Y7 + VPADDD 336(SP), Y1, Y1 + VPADDD 208(SP), Y2, Y2 + VPADDD 432(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 656(SP), Y7, Y7 + VPADDD 176(SP), Y1, Y1 + VPADDD 464(SP), Y2, Y2 + VPADDD 368(SP), Y3, Y3 + VPADDD Y0, Y7, Y7 + VPXOR Y7, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y5, Y2, Y2 + VPXOR Y2, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y6, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y14, Y10, Y10 + VPXOR Y10, Y0, Y0 + VPADDD Y15, Y11, Y11 + VPXOR Y11, Y4, Y4 + VPADDD Y8, Y12, Y12 + VPXOR Y12, Y5, Y5 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y6, Y6 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VMOVDQA (R10), Y7 + VPADDD 624(SP), Y7, Y7 + VPADDD 240(SP), Y1, Y1 + VPADDD 272(SP), Y2, Y2 + VPADDD 400(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot16_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot16_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot16_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x0c, Y4, Y7 + VPSLLD $0x14, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x0c, Y5, Y7 + VPSLLD $0x14, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x0c, Y6, Y7 + VPSLLD $0x14, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x0c, Y0, Y7 + VPSLLD $0x14, Y0, Y0 + VPOR Y7, Y0, Y0 + VMOVDQA (R10), Y7 + VPADDD 496(SP), Y7, Y7 + VPADDD 560(SP), Y1, Y1 + VPADDD 304(SP), Y2, Y2 + VPADDD 592(SP), Y3, Y3 + VPADDD Y4, Y7, Y7 + VPXOR Y7, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y5, Y1, Y1 + VPXOR Y1, Y14, Y14 + VPSHUFB rot8_shuf<>+0(SB), Y14, Y14 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y15, Y15 + VPSHUFB rot8_shuf<>+0(SB), Y15, Y15 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y8, Y8 + VPSHUFB rot8_shuf<>+0(SB), Y8, Y8 + VPADDD Y9, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPADDD Y14, Y13, Y13 + VPXOR Y13, Y5, Y5 + VPADDD Y15, Y10, Y10 + VPXOR Y10, Y6, Y6 + VPADDD Y8, Y11, Y11 + VPXOR Y11, Y0, Y0 + VMOVDQA Y7, (R10) + VPSRLD $0x07, Y4, Y7 + VPSLLD $0x19, Y4, Y4 + VPOR Y7, Y4, Y4 + VPSRLD $0x07, Y5, Y7 + VPSLLD $0x19, Y5, Y5 + VPOR Y7, Y5, Y5 + VPSRLD $0x07, Y6, Y7 + VPSLLD $0x19, Y6, Y6 + VPOR Y7, Y6, Y6 + VPSRLD $0x07, Y0, Y7 + VPSLLD $0x19, Y0, Y0 + VPOR Y7, Y0, Y0 + + // Finalize rounds + VPXOR Y9, Y6, Y6 + VPXOR (R10), Y10, Y7 + VPXOR Y11, Y1, Y1 + VPXOR Y12, Y2, Y2 + VPXOR Y13, Y3, Y3 + VPXOR Y14, Y0, Y0 + VPXOR Y15, Y4, Y4 + VPXOR Y8, Y5, Y5 + + // Fix up registers for next iteration + VMOVDQU Y7, Y8 + VMOVDQU Y6, Y7 + VMOVDQU Y5, Y6 + VMOVDQU Y4, Y5 + VMOVDQU Y0, Y4 + VMOVDQU Y8, Y0 + + // If we have zero complete chunks, we're done + CMPQ R8, $0x00 + JNE loop_trailer + CMPQ R9, CX + JEQ finalize + +loop_trailer: + // Increment, reset flags, and loop + CMPQ CX, $0x000003c0 + JEQ finalize + ADDQ $0x40, CX + MOVL BX, 64(SP) + JMP loop + +finalize: + // Store result into out + VMOVDQU Y0, (SI) + VMOVDQU Y1, 32(SI) + VMOVDQU Y2, 64(SI) + VMOVDQU Y3, 96(SI) + VMOVDQU Y4, 128(SI) + VMOVDQU Y5, 160(SI) + VMOVDQU Y6, 192(SI) + VMOVDQU Y7, 224(SI) + VZEROUPPER + RET + +// func HashP(left *[32]uint32, right *[32]uint32, flags uint8, key *[8]uint32, out *[32]uint32, n int) +// Requires: AVX, AVX2 +TEXT ·HashP(SB), NOSPLIT, $72-48 + MOVQ left+0(FP), AX + MOVQ right+8(FP), CX + MOVBLZX flags+16(FP), DX + MOVQ key+24(FP), BX + MOVQ out+32(FP), BP + + // Allocate local space and align it + LEAQ 31(SP), SI + MOVQ $0x000000000000001f, DI + NOTQ DI + ANDQ DI, SI + + // Set up flags value + MOVL DX, 64(SP) + + // Perform the rounds + // Round 1 + VPBROADCASTD (BX), Y0 + VPADDD (AX), Y0, Y0 + VPBROADCASTD 4(BX), Y1 + VPADDD 64(AX), Y1, Y1 + VPBROADCASTD 8(BX), Y2 + VPADDD 128(AX), Y2, Y2 + VPBROADCASTD 12(BX), Y3 + VPADDD 192(AX), Y3, Y3 + VPBROADCASTD 16(BX), Y4 + VPADDD Y4, Y0, Y0 + VMOVDQU zero<>+0(SB), Y5 + VPXOR Y0, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPBROADCASTD 20(BX), Y6 + VPADDD Y6, Y1, Y1 + VMOVDQU zero<>+0(SB), Y7 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPBROADCASTD 24(BX), Y8 + VPADDD Y8, Y2, Y2 + VMOVDQU block_len<>+0(SB), Y9 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPBROADCASTD 28(BX), Y10 + VPADDD Y10, Y3, Y3 + VPBROADCASTD 64(SP), Y11 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPBROADCASTD iv<>+0(SB), Y12 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y4, Y4 + VPBROADCASTD iv<>+4(SB), Y13 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y6, Y6 + VPBROADCASTD iv<>+8(SB), Y14 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y8, Y8 + VPBROADCASTD iv<>+12(SB), Y15 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y10, Y10 + VMOVDQA Y0, (SI) + VPSRLD $0x0c, Y4, Y0 + VPSLLD $0x14, Y4, Y4 + VPOR Y0, Y4, Y0 + VPSRLD $0x0c, Y6, Y4 + VPSLLD $0x14, Y6, Y6 + VPOR Y4, Y6, Y4 + VPSRLD $0x0c, Y8, Y6 + VPSLLD $0x14, Y8, Y8 + VPOR Y6, Y8, Y6 + VPSRLD $0x0c, Y10, Y8 + VPSLLD $0x14, Y10, Y10 + VPOR Y8, Y10, Y8 + VMOVDQA (SI), Y10 + VPADDD 32(AX), Y10, Y10 + VPADDD 96(AX), Y1, Y1 + VPADDD 160(AX), Y2, Y2 + VPADDD 224(AX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD (CX), Y10, Y10 + VPADDD 64(CX), Y1, Y1 + VPADDD 128(CX), Y2, Y2 + VPADDD 192(CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 32(CX), Y10, Y10 + VPADDD 96(CX), Y1, Y1 + VPADDD 160(CX), Y2, Y2 + VPADDD 224(CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 2 + VMOVDQA (SI), Y10 + VPADDD 64(AX), Y10, Y10 + VPADDD 96(AX), Y1, Y1 + VPADDD 224(AX), Y2, Y2 + VPADDD 128(AX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 192(AX), Y10, Y10 + VPADDD 64(CX), Y1, Y1 + VPADDD (AX), Y2, Y2 + VPADDD 160(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 32(AX), Y10, Y10 + VPADDD 128(CX), Y1, Y1 + VPADDD 32(CX), Y2, Y2 + VPADDD 224(CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 96(CX), Y10, Y10 + VPADDD 160(AX), Y1, Y1 + VPADDD 192(CX), Y2, Y2 + VPADDD (CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 3 + VMOVDQA (SI), Y10 + VPADDD 96(AX), Y10, Y10 + VPADDD 64(CX), Y1, Y1 + VPADDD 160(CX), Y2, Y2 + VPADDD 224(AX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 128(AX), Y10, Y10 + VPADDD 128(CX), Y1, Y1 + VPADDD 64(AX), Y2, Y2 + VPADDD 192(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 192(AX), Y10, Y10 + VPADDD 32(CX), Y1, Y1 + VPADDD 96(CX), Y2, Y2 + VPADDD (CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 160(AX), Y10, Y10 + VPADDD (AX), Y1, Y1 + VPADDD 224(CX), Y2, Y2 + VPADDD 32(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 4 + VMOVDQA (SI), Y10 + VPADDD 64(CX), Y10, Y10 + VPADDD 128(CX), Y1, Y1 + VPADDD 192(CX), Y2, Y2 + VPADDD 160(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 224(AX), Y10, Y10 + VPADDD 32(CX), Y1, Y1 + VPADDD 96(AX), Y2, Y2 + VPADDD 224(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 128(AX), Y10, Y10 + VPADDD 96(CX), Y1, Y1 + VPADDD 160(AX), Y2, Y2 + VPADDD 32(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD (AX), Y10, Y10 + VPADDD 64(AX), Y1, Y1 + VPADDD (CX), Y2, Y2 + VPADDD 192(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 5 + VMOVDQA (SI), Y10 + VPADDD 128(CX), Y10, Y10 + VPADDD 32(CX), Y1, Y1 + VPADDD 224(CX), Y2, Y2 + VPADDD 192(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 160(CX), Y10, Y10 + VPADDD 96(CX), Y1, Y1 + VPADDD 64(CX), Y2, Y2 + VPADDD (CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 224(AX), Y10, Y10 + VPADDD 160(AX), Y1, Y1 + VPADDD (AX), Y2, Y2 + VPADDD 192(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 64(AX), Y10, Y10 + VPADDD 96(AX), Y1, Y1 + VPADDD 32(AX), Y2, Y2 + VPADDD 128(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 6 + VMOVDQA (SI), Y10 + VPADDD 32(CX), Y10, Y10 + VPADDD 96(CX), Y1, Y1 + VPADDD (CX), Y2, Y2 + VPADDD 224(CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 192(CX), Y10, Y10 + VPADDD 160(AX), Y1, Y1 + VPADDD 128(CX), Y2, Y2 + VPADDD 32(AX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 160(CX), Y10, Y10 + VPADDD (AX), Y1, Y1 + VPADDD 64(AX), Y2, Y2 + VPADDD 128(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 96(AX), Y10, Y10 + VPADDD 64(CX), Y1, Y1 + VPADDD 192(AX), Y2, Y2 + VPADDD 224(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Round 7 + VMOVDQA (SI), Y10 + VPADDD 96(CX), Y10, Y10 + VPADDD 160(AX), Y1, Y1 + VPADDD 32(AX), Y2, Y2 + VPADDD (CX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 224(CX), Y10, Y10 + VPADDD (AX), Y1, Y1 + VPADDD 32(CX), Y2, Y2 + VPADDD 192(AX), Y3, Y3 + VPADDD Y0, Y10, Y10 + VPXOR Y10, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y4, Y1, Y1 + VPXOR Y1, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y6, Y2, Y2 + VPXOR Y2, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y8, Y3, Y3 + VPXOR Y3, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y5, Y12, Y12 + VPXOR Y12, Y0, Y0 + VPADDD Y7, Y13, Y13 + VPXOR Y13, Y4, Y4 + VPADDD Y9, Y14, Y14 + VPXOR Y14, Y6, Y6 + VPADDD Y11, Y15, Y15 + VPXOR Y15, Y8, Y8 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VMOVDQA (SI), Y10 + VPADDD 192(CX), Y10, Y10 + VPADDD 64(AX), Y1, Y1 + VPADDD 96(AX), Y2, Y2 + VPADDD 224(AX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot16_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot16_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot16_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot16_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x0c, Y4, Y10 + VPSLLD $0x14, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x0c, Y6, Y10 + VPSLLD $0x14, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x0c, Y8, Y10 + VPSLLD $0x14, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x0c, Y0, Y10 + VPSLLD $0x14, Y0, Y0 + VPOR Y10, Y0, Y0 + VMOVDQA (SI), Y10 + VPADDD 64(CX), Y10, Y10 + VPADDD 128(CX), Y1, Y1 + VPADDD 128(AX), Y2, Y2 + VPADDD 160(CX), Y3, Y3 + VPADDD Y4, Y10, Y10 + VPXOR Y10, Y11, Y11 + VPSHUFB rot8_shuf<>+0(SB), Y11, Y11 + VPADDD Y6, Y1, Y1 + VPXOR Y1, Y5, Y5 + VPSHUFB rot8_shuf<>+0(SB), Y5, Y5 + VPADDD Y8, Y2, Y2 + VPXOR Y2, Y7, Y7 + VPSHUFB rot8_shuf<>+0(SB), Y7, Y7 + VPADDD Y0, Y3, Y3 + VPXOR Y3, Y9, Y9 + VPSHUFB rot8_shuf<>+0(SB), Y9, Y9 + VPADDD Y11, Y14, Y14 + VPXOR Y14, Y4, Y4 + VPADDD Y5, Y15, Y15 + VPXOR Y15, Y6, Y6 + VPADDD Y7, Y12, Y12 + VPXOR Y12, Y8, Y8 + VPADDD Y9, Y13, Y13 + VPXOR Y13, Y0, Y0 + VMOVDQA Y10, (SI) + VPSRLD $0x07, Y4, Y10 + VPSLLD $0x19, Y4, Y4 + VPOR Y10, Y4, Y4 + VPSRLD $0x07, Y6, Y10 + VPSLLD $0x19, Y6, Y6 + VPOR Y10, Y6, Y6 + VPSRLD $0x07, Y8, Y10 + VPSLLD $0x19, Y8, Y8 + VPOR Y10, Y8, Y8 + VPSRLD $0x07, Y0, Y10 + VPSLLD $0x19, Y0, Y0 + VPOR Y10, Y0, Y0 + + // Finalize + VPXOR (SI), Y12, Y10 + VPXOR Y13, Y1, Y1 + VPXOR Y14, Y2, Y2 + VPXOR Y15, Y3, Y3 + VPXOR Y5, Y0, Y0 + VPXOR Y7, Y4, Y4 + VPXOR Y9, Y6, Y5 + VPXOR Y11, Y8, Y6 + + // Store result into out + VMOVDQU Y10, (BP) + VMOVDQU Y1, 32(BP) + VMOVDQU Y2, 64(BP) + VMOVDQU Y3, 96(BP) + VMOVDQU Y0, 128(BP) + VMOVDQU Y4, 160(BP) + VMOVDQU Y5, 192(BP) + VMOVDQU Y6, 224(BP) + VZEROUPPER + RET diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_other.go b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_other.go new file mode 100644 index 000000000..613972814 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/impl_other.go @@ -0,0 +1,13 @@ +// +build !amd64 + +package hash_avx2 + +import "github.com/zeebo/blake3/internal/alg/hash/hash_pure" + +func HashF(input *[8192]byte, length, counter uint64, flags uint32, key *[8]uint32, out *[64]uint32, chain *[8]uint32) { + hash_pure.HashF(input, length, counter, flags, key, out, chain) +} + +func HashP(left, right *[64]uint32, flags uint32, key *[8]uint32, out *[64]uint32, n int) { + hash_pure.HashP(left, right, flags, key, out, n) +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/stubs.go b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/stubs.go new file mode 100644 index 000000000..10e949550 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_avx2/stubs.go @@ -0,0 +1,9 @@ +// +build amd64 + +package hash_avx2 + +//go:noescape +func HashF(input *[8192]byte, length, counter uint64, flags uint32, key *[8]uint32, out *[64]uint32, chain *[8]uint32) + +//go:noescape +func HashP(left, right *[64]uint32, flags uint32, key *[8]uint32, out *[64]uint32, n int) diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashf.go b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashf.go new file mode 100644 index 000000000..0c6fd63cd --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashf.go @@ -0,0 +1,56 @@ +package hash_pure + +import ( + "unsafe" + + "github.com/zeebo/blake3/internal/alg/compress" + "github.com/zeebo/blake3/internal/consts" + "github.com/zeebo/blake3/internal/utils" +) + +func HashF(input *[8192]byte, length, counter uint64, flags uint32, key *[8]uint32, out *[64]uint32, chain *[8]uint32) { + var tmp [16]uint32 + + for i := uint64(0); consts.ChunkLen*i < length && i < 8; i++ { + bchain := *key + bflags := flags | consts.Flag_ChunkStart + start := consts.ChunkLen * i + + for n := uint64(0); n < 16; n++ { + if n == 15 { + bflags |= consts.Flag_ChunkEnd + } + if start+64*n >= length { + break + } + if start+64+64*n >= length { + *chain = bchain + } + + var blockPtr *[16]uint32 + if consts.IsLittleEndian { + blockPtr = (*[16]uint32)(unsafe.Pointer(&input[consts.ChunkLen*i+consts.BlockLen*n])) + } else { + var block [16]uint32 + utils.BytesToWords((*[64]uint8)(unsafe.Pointer(&input[consts.ChunkLen*i+consts.BlockLen*n])), &block) + blockPtr = &block + } + + compress.Compress(&bchain, blockPtr, counter, consts.BlockLen, bflags, &tmp) + + bchain = *(*[8]uint32)(unsafe.Pointer(&tmp[0])) + bflags = flags + } + + out[i+0] = bchain[0] + out[i+8] = bchain[1] + out[i+16] = bchain[2] + out[i+24] = bchain[3] + out[i+32] = bchain[4] + out[i+40] = bchain[5] + out[i+48] = bchain[6] + out[i+56] = bchain[7] + + counter++ + } +} diff --git a/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashp.go b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashp.go new file mode 100644 index 000000000..bee5d8dd0 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/alg/hash/hash_pure/hashp.go @@ -0,0 +1,38 @@ +package hash_pure + +import "github.com/zeebo/blake3/internal/alg/compress" + +func HashP(left, right *[64]uint32, flags uint32, key *[8]uint32, out *[64]uint32, n int) { + var tmp [16]uint32 + var block [16]uint32 + + for i := 0; i < n && i < 8; i++ { + block[0] = left[i+0] + block[1] = left[i+8] + block[2] = left[i+16] + block[3] = left[i+24] + block[4] = left[i+32] + block[5] = left[i+40] + block[6] = left[i+48] + block[7] = left[i+56] + block[8] = right[i+0] + block[9] = right[i+8] + block[10] = right[i+16] + block[11] = right[i+24] + block[12] = right[i+32] + block[13] = right[i+40] + block[14] = right[i+48] + block[15] = right[i+56] + + compress.Compress(key, &block, 0, 64, flags, &tmp) + + out[i+0] = tmp[0] + out[i+8] = tmp[1] + out[i+16] = tmp[2] + out[i+24] = tmp[3] + out[i+32] = tmp[4] + out[i+40] = tmp[5] + out[i+48] = tmp[6] + out[i+56] = tmp[7] + } +} diff --git a/vendor/github.com/zeebo/blake3/internal/consts/consts.go b/vendor/github.com/zeebo/blake3/internal/consts/consts.go new file mode 100644 index 000000000..89f08fe10 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/consts/consts.go @@ -0,0 +1,29 @@ +package consts + +var IV = [...]uint32{IV0, IV1, IV2, IV3, IV4, IV5, IV6, IV7} + +const ( + IV0 = 0x6A09E667 + IV1 = 0xBB67AE85 + IV2 = 0x3C6EF372 + IV3 = 0xA54FF53A + IV4 = 0x510E527F + IV5 = 0x9B05688C + IV6 = 0x1F83D9AB + IV7 = 0x5BE0CD19 +) + +const ( + Flag_ChunkStart uint32 = 1 << 0 + Flag_ChunkEnd uint32 = 1 << 1 + Flag_Parent uint32 = 1 << 2 + Flag_Root uint32 = 1 << 3 + Flag_Keyed uint32 = 1 << 4 + Flag_DeriveKeyContext uint32 = 1 << 5 + Flag_DeriveKeyMaterial uint32 = 1 << 6 +) + +const ( + BlockLen = 64 + ChunkLen = 1024 +) diff --git a/vendor/github.com/zeebo/blake3/internal/consts/cpu.go b/vendor/github.com/zeebo/blake3/internal/consts/cpu.go new file mode 100644 index 000000000..1eebff943 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/consts/cpu.go @@ -0,0 +1,17 @@ +package consts + +import ( + "os" + + "golang.org/x/sys/cpu" +) + +var ( + HasAVX2 = cpu.X86.HasAVX2 && + os.Getenv("BLAKE3_DISABLE_AVX2") == "" && + os.Getenv("BLAKE3_PUREGO") == "" + + HasSSE41 = cpu.X86.HasSSE41 && + os.Getenv("BLAKE3_DISABLE_SSE41") == "" && + os.Getenv("BLAKE3_PUREGO") == "" +) diff --git a/vendor/github.com/zeebo/blake3/internal/consts/cpu_big.go b/vendor/github.com/zeebo/blake3/internal/consts/cpu_big.go new file mode 100644 index 000000000..fb730464f --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/consts/cpu_big.go @@ -0,0 +1,5 @@ +// +build mips mips64 ppc64 s390x + +package consts + +const IsLittleEndian = false diff --git a/vendor/github.com/zeebo/blake3/internal/consts/cpu_little.go b/vendor/github.com/zeebo/blake3/internal/consts/cpu_little.go new file mode 100644 index 000000000..1bae02a74 --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/consts/cpu_little.go @@ -0,0 +1,5 @@ +// +build amd64 386 arm arm64 mipsle mips64le ppc64le riscv64 wasm + +package consts + +const IsLittleEndian = true diff --git a/vendor/github.com/zeebo/blake3/internal/consts/cpu_other.go b/vendor/github.com/zeebo/blake3/internal/consts/cpu_other.go new file mode 100644 index 000000000..5f7407a6a --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/consts/cpu_other.go @@ -0,0 +1,7 @@ +// +build !mips,!mips64,!ppc64,!s390x,!amd64,!386,!arm,!arm64,!mipsle,!mips64le,!ppc64le,!riscv64,!wasm + +package consts + +import "unsafe" + +var IsLittleEndian = *(*uint16)(unsafe.Pointer(&[2]byte{0, 1})) != 1 diff --git a/vendor/github.com/zeebo/blake3/internal/utils/utils.go b/vendor/github.com/zeebo/blake3/internal/utils/utils.go new file mode 100644 index 000000000..0b36f0f0f --- /dev/null +++ b/vendor/github.com/zeebo/blake3/internal/utils/utils.go @@ -0,0 +1,60 @@ +package utils + +import ( + "encoding/binary" + "unsafe" +) + +func SliceToArray32(bytes []byte) *[32]uint8 { return (*[32]uint8)(unsafe.Pointer(&bytes[0])) } +func SliceToArray64(bytes []byte) *[64]uint8 { return (*[64]uint8)(unsafe.Pointer(&bytes[0])) } + +func BytesToWords(bytes *[64]uint8, words *[16]uint32) { + words[0] = binary.LittleEndian.Uint32(bytes[0*4:]) + words[1] = binary.LittleEndian.Uint32(bytes[1*4:]) + words[2] = binary.LittleEndian.Uint32(bytes[2*4:]) + words[3] = binary.LittleEndian.Uint32(bytes[3*4:]) + words[4] = binary.LittleEndian.Uint32(bytes[4*4:]) + words[5] = binary.LittleEndian.Uint32(bytes[5*4:]) + words[6] = binary.LittleEndian.Uint32(bytes[6*4:]) + words[7] = binary.LittleEndian.Uint32(bytes[7*4:]) + words[8] = binary.LittleEndian.Uint32(bytes[8*4:]) + words[9] = binary.LittleEndian.Uint32(bytes[9*4:]) + words[10] = binary.LittleEndian.Uint32(bytes[10*4:]) + words[11] = binary.LittleEndian.Uint32(bytes[11*4:]) + words[12] = binary.LittleEndian.Uint32(bytes[12*4:]) + words[13] = binary.LittleEndian.Uint32(bytes[13*4:]) + words[14] = binary.LittleEndian.Uint32(bytes[14*4:]) + words[15] = binary.LittleEndian.Uint32(bytes[15*4:]) +} + +func WordsToBytes(words *[16]uint32, bytes []byte) { + bytes = bytes[:64] + binary.LittleEndian.PutUint32(bytes[0*4:1*4], words[0]) + binary.LittleEndian.PutUint32(bytes[1*4:2*4], words[1]) + binary.LittleEndian.PutUint32(bytes[2*4:3*4], words[2]) + binary.LittleEndian.PutUint32(bytes[3*4:4*4], words[3]) + binary.LittleEndian.PutUint32(bytes[4*4:5*4], words[4]) + binary.LittleEndian.PutUint32(bytes[5*4:6*4], words[5]) + binary.LittleEndian.PutUint32(bytes[6*4:7*4], words[6]) + binary.LittleEndian.PutUint32(bytes[7*4:8*4], words[7]) + binary.LittleEndian.PutUint32(bytes[8*4:9*4], words[8]) + binary.LittleEndian.PutUint32(bytes[9*4:10*4], words[9]) + binary.LittleEndian.PutUint32(bytes[10*4:11*4], words[10]) + binary.LittleEndian.PutUint32(bytes[11*4:12*4], words[11]) + binary.LittleEndian.PutUint32(bytes[12*4:13*4], words[12]) + binary.LittleEndian.PutUint32(bytes[13*4:14*4], words[13]) + binary.LittleEndian.PutUint32(bytes[14*4:15*4], words[14]) + binary.LittleEndian.PutUint32(bytes[15*4:16*4], words[15]) +} + +func KeyFromBytes(key []byte, out *[8]uint32) { + key = key[:32] + out[0] = binary.LittleEndian.Uint32(key[0:]) + out[1] = binary.LittleEndian.Uint32(key[4:]) + out[2] = binary.LittleEndian.Uint32(key[8:]) + out[3] = binary.LittleEndian.Uint32(key[12:]) + out[4] = binary.LittleEndian.Uint32(key[16:]) + out[5] = binary.LittleEndian.Uint32(key[20:]) + out[6] = binary.LittleEndian.Uint32(key[24:]) + out[7] = binary.LittleEndian.Uint32(key[28:]) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index c403b7d2d..8637feaec 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -1,18 +1,20 @@ # codeberg.org/gruf/go-bytes v1.0.2 ## explicit; go 1.14 codeberg.org/gruf/go-bytes -# codeberg.org/gruf/go-errors v1.0.4 +# codeberg.org/gruf/go-errors v1.0.5 ## explicit; go 1.15 codeberg.org/gruf/go-errors # codeberg.org/gruf/go-fastpath v1.0.2 ## explicit; go 1.14 codeberg.org/gruf/go-fastpath +# codeberg.org/gruf/go-format v1.0.3 +## explicit; go 1.17 +codeberg.org/gruf/go-format # codeberg.org/gruf/go-hashenc v1.0.1 ## explicit; go 1.16 codeberg.org/gruf/go-hashenc # codeberg.org/gruf/go-logger v1.3.2 ## explicit; go 1.14 -codeberg.org/gruf/go-logger # codeberg.org/gruf/go-mutexes v1.0.1 ## explicit; go 1.14 codeberg.org/gruf/go-mutexes @@ -25,7 +27,7 @@ codeberg.org/gruf/go-pools # codeberg.org/gruf/go-runners v1.2.0 ## explicit; go 1.14 codeberg.org/gruf/go-runners -# codeberg.org/gruf/go-store v1.1.5 +# codeberg.org/gruf/go-store v1.2.2 ## explicit; go 1.14 codeberg.org/gruf/go-store/kv codeberg.org/gruf/go-store/storage @@ -524,6 +526,18 @@ github.com/vmihailenco/tagparser/v2/internal/parser # github.com/wagslane/go-password-validator v0.3.0 ## explicit; go 1.16 github.com/wagslane/go-password-validator +# github.com/zeebo/blake3 v0.2.1 +## explicit; go 1.13 +github.com/zeebo/blake3 +github.com/zeebo/blake3/internal/alg +github.com/zeebo/blake3/internal/alg/compress +github.com/zeebo/blake3/internal/alg/compress/compress_pure +github.com/zeebo/blake3/internal/alg/compress/compress_sse41 +github.com/zeebo/blake3/internal/alg/hash +github.com/zeebo/blake3/internal/alg/hash/hash_avx2 +github.com/zeebo/blake3/internal/alg/hash/hash_pure +github.com/zeebo/blake3/internal/consts +github.com/zeebo/blake3/internal/utils # golang.org/x/crypto v0.0.0-20211209193657-4570a0811e8b ## explicit; go 1.17 golang.org/x/crypto/acme