diff --git a/go.mod b/go.mod index 1fe0bf5d2..149a88117 100644 --- a/go.mod +++ b/go.mod @@ -44,7 +44,7 @@ require ( github.com/miekg/dns v1.1.59 github.com/minio/minio-go/v7 v7.0.71 github.com/mitchellh/mapstructure v1.5.0 - github.com/ncruces/go-sqlite3 v0.16.1 + github.com/ncruces/go-sqlite3 v0.16.2 github.com/oklog/ulid v1.3.1 github.com/prometheus/client_golang v1.19.1 github.com/spf13/cobra v1.8.0 @@ -199,7 +199,7 @@ require ( github.com/superseriousbusiness/go-jpeg-image-structure/v2 v2.0.0-20220321154430-d89a106fdabe // indirect github.com/superseriousbusiness/go-png-image-structure/v2 v2.0.1-SSB // indirect github.com/tdewolff/parse/v2 v2.7.14 // indirect - github.com/tetratelabs/wazero v1.7.2 // indirect + github.com/tetratelabs/wazero v1.7.3 // indirect github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc // indirect github.com/toqueteos/webbrowser v1.2.0 // indirect github.com/twitchyliquid64/golang-asm v0.15.1 // indirect diff --git a/go.sum b/go.sum index 81d950a8b..df382c545 100644 --- a/go.sum +++ b/go.sum @@ -445,8 +445,8 @@ github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9G github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs= github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= -github.com/ncruces/go-sqlite3 v0.16.1 h1:1wHv7s8y+fWK44UIliotJ42ZV41A5T0sjIAqGmnMrkc= -github.com/ncruces/go-sqlite3 v0.16.1/go.mod h1:feFXbBcbLtxNk6XWG1ROt8MS9+E45yCW3G8o4ixIqZ8= +github.com/ncruces/go-sqlite3 v0.16.2 h1:HesVRr0BC6QSGSEQfEXOntFWS9wd4Z8ms4nJzfUv4Rg= +github.com/ncruces/go-sqlite3 v0.16.2/go.mod h1:wkUIvOrAjFQnefVlivJfcowKUcfMHs4mvLfhVanzHHI= github.com/ncruces/go-strftime v0.1.9 h1:bY0MQC28UADQmHmaF5dgpLmImcShSi2kHU9XLdhx/f4= github.com/ncruces/go-strftime v0.1.9/go.mod h1:Fwc5htZGVVkseilnfgOVb9mKy6w1naJmn9CehxcKcls= github.com/ncruces/julianday v1.0.0 h1:fH0OKwa7NWvniGQtxdJRxAgkBMolni2BjDHaWTxqt7M= @@ -562,8 +562,8 @@ github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739 h1:IkjBCtQOOjIn03 github.com/tdewolff/test v1.0.11-0.20240106005702-7de5f7df4739/go.mod h1:XPuWBzvdUzhCuxWO1ojpXsyzsA5bFoS3tO/Q3kFuTG8= github.com/technologize/otel-go-contrib v1.1.1 h1:wZH9aSPNWZWIkEh3vfaKfMb15AJ80jJ1aVj/4GZdqIw= github.com/technologize/otel-go-contrib v1.1.1/go.mod h1:dCN/wj2WyUO8aFZFdIN+6tfJHImjTML/8r2YVYAy3So= -github.com/tetratelabs/wazero v1.7.2 h1:1+z5nXJNwMLPAWaTePFi49SSTL0IMx/i3Fg8Yc25GDc= -github.com/tetratelabs/wazero v1.7.2/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= +github.com/tetratelabs/wazero v1.7.3 h1:PBH5KVahrt3S2AHgEjKu4u+LlDbbk+nsGE3KLucy6Rw= +github.com/tetratelabs/wazero v1.7.3/go.mod h1:ytl6Zuh20R/eROuyDaGPkp82O9C/DJfXAwJfQ3X6/7Y= github.com/tidwall/btree v0.0.0-20191029221954-400434d76274 h1:G6Z6HvJuPjG6XfNGi/feOATzeJrfgTNJY+rGrHbA04E= github.com/tidwall/btree v0.0.0-20191029221954-400434d76274/go.mod h1:huei1BkDWJ3/sLXmO+bsCNELL+Bp2Kks9OLyQFkzvA8= github.com/tidwall/buntdb v1.1.2 h1:noCrqQXL9EKMtcdwJcmuVKSEjqu1ua99RHHgbLTEHRo= diff --git a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go index c0ba38cf0..7f6849a42 100644 --- a/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/internal/util/json.go @@ -26,7 +26,7 @@ func (j JSON) Scan(value any) error { buf = v.AppendFormat(buf, time.RFC3339Nano) buf = append(buf, '"') case nil: - buf = append(buf, "null"...) + buf = []byte("null") default: panic(AssertErr()) } diff --git a/vendor/github.com/ncruces/go-sqlite3/json.go b/vendor/github.com/ncruces/go-sqlite3/json.go index 9b2565e87..2b762c092 100644 --- a/vendor/github.com/ncruces/go-sqlite3/json.go +++ b/vendor/github.com/ncruces/go-sqlite3/json.go @@ -5,7 +5,8 @@ import "github.com/ncruces/go-sqlite3/internal/util" // JSON returns a value that can be used as an argument to // [database/sql.DB.Exec], [database/sql.Row.Scan] and similar methods to // store value as JSON, or decode JSON into value. -// JSON should NOT be used with [BindJSON] or [ResultJSON]. +// JSON should NOT be used with [Stmt.BindJSON], [Stmt.ColumnJSON], +// [Value.JSON], or [Context.ResultJSON]. func JSON(value any) any { return util.JSON{Value: value} } diff --git a/vendor/github.com/ncruces/go-sqlite3/pointer.go b/vendor/github.com/ncruces/go-sqlite3/pointer.go index 611c1528c..0e2418b99 100644 --- a/vendor/github.com/ncruces/go-sqlite3/pointer.go +++ b/vendor/github.com/ncruces/go-sqlite3/pointer.go @@ -4,7 +4,8 @@ import "github.com/ncruces/go-sqlite3/internal/util" // Pointer returns a pointer to a value that can be used as an argument to // [database/sql.DB.Exec] and similar methods. -// Pointer should NOT be used with [BindPointer] or [ResultPointer]. +// Pointer should NOT be used with [Stmt.BindPointer], +// [Value.Pointer], or [Context.ResultPointer]. // // https://sqlite.org/bindptr.html func Pointer[T any](value T) any { diff --git a/vendor/github.com/ncruces/go-sqlite3/stmt.go b/vendor/github.com/ncruces/go-sqlite3/stmt.go index ac40e3802..381a7d06b 100644 --- a/vendor/github.com/ncruces/go-sqlite3/stmt.go +++ b/vendor/github.com/ncruces/go-sqlite3/stmt.go @@ -564,7 +564,7 @@ func (s *Stmt) ColumnJSON(col int, ptr any) error { var data []byte switch s.ColumnType(col) { case NULL: - data = append(data, "null"...) + data = []byte("null") case TEXT: data = s.ColumnRawText(col) case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/value.go b/vendor/github.com/ncruces/go-sqlite3/value.go index d0edf215b..1894ff4f1 100644 --- a/vendor/github.com/ncruces/go-sqlite3/value.go +++ b/vendor/github.com/ncruces/go-sqlite3/value.go @@ -177,7 +177,7 @@ func (v Value) JSON(ptr any) error { var data []byte switch v.Type() { case NULL: - data = append(data, "null"...) + data = []byte("null") case TEXT: data = v.RawText() case BLOB: diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go index 8dc57ab9c..f21335d8e 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/memdb/memdb.go @@ -75,11 +75,6 @@ func (memVFS) FullPathname(name string) (string, error) { type memDB struct { name string - // +checklocks:lockMtx - pending *memFile - // +checklocks:lockMtx - reserved *memFile - // +checklocks:dataMtx data []*[sectorSize]byte @@ -88,6 +83,10 @@ type memDB struct { // +checklocks:lockMtx shared int + // +checklocks:lockMtx + reserved bool + // +checklocks:lockMtx + pending bool // +checklocks:memoryMtx refs int @@ -214,24 +213,24 @@ func (m *memFile) Lock(lock vfs.LockLevel) error { switch lock { case vfs.LOCK_SHARED: - if m.pending != nil { + if m.pending { return sqlite3.BUSY } m.shared++ case vfs.LOCK_RESERVED: - if m.reserved != nil { + if m.reserved { return sqlite3.BUSY } - m.reserved = m + m.reserved = true case vfs.LOCK_EXCLUSIVE: if m.lock < vfs.LOCK_PENDING { - if m.pending != nil { + if m.pending { return sqlite3.BUSY } m.lock = vfs.LOCK_PENDING - m.pending = m + m.pending = true } for before := time.Now(); m.shared > 1; { @@ -256,11 +255,11 @@ func (m *memFile) Unlock(lock vfs.LockLevel) error { m.lockMtx.Lock() defer m.lockMtx.Unlock() - if m.pending == m { - m.pending = nil + if m.pending && m.lock >= vfs.LOCK_PENDING { + m.pending = false } - if m.reserved == m { - m.reserved = nil + if m.reserved && m.lock >= vfs.LOCK_RESERVED { + m.reserved = false } if lock < vfs.LOCK_SHARED { m.shared-- @@ -275,7 +274,7 @@ func (m *memFile) CheckReservedLock() (bool, error) { } m.lockMtx.Lock() defer m.lockMtx.Unlock() - return m.reserved != nil, nil + return m.reserved, nil } func (m *memFile) SectorSize() int { diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go index 58da34df4..7b0d4b677 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm.go @@ -125,6 +125,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext return 0, _IOERR_SHMMAP } s.regions = append(s.regions, r) + if s.readOnly { + return r.Ptr, _READONLY + } return r.Ptr, _OK } diff --git a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go index 3b45b3087..8c2abee81 100644 --- a/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go +++ b/vendor/github.com/ncruces/go-sqlite3/vfs/shm_bsd.go @@ -101,13 +101,13 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) { return _OK } - // Open file read-write, as it will be shared. + // Always open file read-write, as it will be shared. f, err := os.OpenFile(s.path, unix.O_RDWR|unix.O_CREAT|unix.O_NOFOLLOW, 0666) if err != nil { return _CANTOPEN } - // Close if file if it's not nil. + // Closes file if it's not nil. defer func() { f.Close() }() fi, err := f.Stat() @@ -145,17 +145,14 @@ func (s *vfsShm) shmOpen() (rc _ErrorCode) { info: fi, refs: 1, } - f = nil - add := true + f = nil // Don't close the file. for i, g := range vfsShmFiles { if g == nil { vfsShmFiles[i] = s.vfsShmFile - add = false + return rc } } - if add { - vfsShmFiles = append(vfsShmFiles, s.vfsShmFile) - } + vfsShmFiles = append(vfsShmFiles, s.vfsShmFile) return rc } @@ -195,6 +192,9 @@ func (s *vfsShm) shmMap(ctx context.Context, mod api.Module, id, size int32, ext return 0, _IOERR_SHMMAP } s.regions = append(s.regions, r) + if s.readOnly { + return r.Ptr, _READONLY + } return r.Ptr, _OK } diff --git a/vendor/github.com/tetratelabs/wazero/config.go b/vendor/github.com/tetratelabs/wazero/config.go index 819a76df5..d3656849c 100644 --- a/vendor/github.com/tetratelabs/wazero/config.go +++ b/vendor/github.com/tetratelabs/wazero/config.go @@ -148,7 +148,7 @@ type RuntimeConfig interface { // customSections := c.CustomSections() WithCustomSections(bool) RuntimeConfig - // WithCloseOnContextDone ensures the executions of functions to be closed under one of the following circumstances: + // WithCloseOnContextDone ensures the executions of functions to be terminated under one of the following circumstances: // // - context.Context passed to the Call method of api.Function is canceled during execution. (i.e. ctx by context.WithCancel) // - context.Context passed to the Call method of api.Function reaches timeout during execution. (i.e. ctx by context.WithTimeout or context.WithDeadline) @@ -159,6 +159,8 @@ type RuntimeConfig interface { // entire underlying OS thread which runs the api.Function call. See "Why it's safe to execute runtime-generated // machine codes against async Goroutine preemption" section in RATIONALE.md for detail. // + // Upon the termination of the function executions, api.Module is closed. + // // Note that this comes with a bit of extra cost when enabled. The reason is that internally this forces // interpreter and compiler runtimes to insert the periodical checks on the conditions above. For that reason, // this is disabled by default. @@ -217,9 +219,18 @@ const ( // part. wazero automatically performs ahead-of-time compilation as needed when // Runtime.CompileModule is invoked. // -// Warning: This panics at runtime if the runtime.GOOS or runtime.GOARCH does not -// support compiler. Use NewRuntimeConfig to safely detect and fallback to -// NewRuntimeConfigInterpreter if needed. +// # Warning +// +// - This panics at runtime if the runtime.GOOS or runtime.GOARCH does not +// support compiler. Use NewRuntimeConfig to safely detect and fallback to +// NewRuntimeConfigInterpreter if needed. +// +// - If you are using wazero in buildmode=c-archive or c-shared, make sure that you set up the alternate signal stack +// by using, e.g. `sigaltstack` combined with `SA_ONSTACK` flag on `sigaction` on Linux, +// before calling any api.Function. This is because the Go runtime does not set up the alternate signal stack +// for c-archive or c-shared modes, and wazero uses the different stack than the calling Goroutine. +// Hence, the signal handler might get invoked on the wazero's stack, which may cause a stack overflow. +// https://github.com/tetratelabs/wazero/blob/2092c0a879f30d49d7b37f333f4547574b8afe0d/internal/integration_test/fuzz/fuzz/tests/sigstack.rs#L19-L36 func NewRuntimeConfigCompiler() RuntimeConfig { ret := engineLessConfig.clone() ret.engineKind = engineKindCompiler diff --git a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go index 443c5a294..c75db615e 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/checkpoint.go @@ -21,13 +21,6 @@ type Snapshotter interface { Snapshot() Snapshot } -// EnableSnapshotterKey is a context key to indicate that snapshotting should be enabled. -// The context.Context passed to a exported function invocation should have this key set -// to a non-nil value, and host functions will be able to retrieve it using SnapshotterKey. -// -// Deprecated: use WithSnapshotter to enable snapshots. -type EnableSnapshotterKey = expctxkeys.EnableSnapshotterKey - // WithSnapshotter enables snapshots. // Passing the returned context to a exported function invocation enables snapshots, // and allows host functions to retrieve the Snapshotter using GetSnapshotter. @@ -35,12 +28,6 @@ func WithSnapshotter(ctx context.Context) context.Context { return context.WithValue(ctx, expctxkeys.EnableSnapshotterKey{}, struct{}{}) } -// SnapshotterKey is a context key to access a Snapshotter from a host function. -// It is only present if EnableSnapshotter was set in the function invocation context. -// -// Deprecated: use GetSnapshotter to get the snapshotter. -type SnapshotterKey = expctxkeys.SnapshotterKey - // GetSnapshotter gets the Snapshotter from a host function. // It is only present if WithSnapshotter was called with the function invocation context. func GetSnapshotter(ctx context.Context) Snapshotter { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/listener.go b/vendor/github.com/tetratelabs/wazero/experimental/listener.go index b2ba1fe83..55fc6b668 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/listener.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/listener.go @@ -24,12 +24,6 @@ type StackIterator interface { ProgramCounter() ProgramCounter } -// FunctionListenerFactoryKey is a context.Context Value key. -// Its associated value should be a FunctionListenerFactory. -// -// Deprecated: use WithFunctionListenerFactory to enable snapshots. -type FunctionListenerFactoryKey = expctxkeys.FunctionListenerFactoryKey - // WithFunctionListenerFactory registers a FunctionListenerFactory // with the context. func WithFunctionListenerFactory(ctx context.Context, factory FunctionListenerFactory) context.Context { diff --git a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go index 761a1f9dc..5ebc1780f 100644 --- a/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go +++ b/vendor/github.com/tetratelabs/wazero/experimental/sys/syscall_errno_windows.go @@ -23,6 +23,10 @@ const ( // instead of syscall.ENOTDIR _ERROR_DIRECTORY = syscall.Errno(0x10B) + // _ERROR_NOT_A_REPARSE_POINT is a Windows error returned by os.Readlink + // instead of syscall.EINVAL + _ERROR_NOT_A_REPARSE_POINT = syscall.Errno(0x1126) + // _ERROR_INVALID_SOCKET is a Windows error returned by winsock_select // when a given handle is not a socket. _ERROR_INVALID_SOCKET = syscall.Errno(0x2736) @@ -51,7 +55,7 @@ func errorToErrno(err error) Errno { return EBADF case syscall.ERROR_PRIVILEGE_NOT_HELD: return EPERM - case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME: + case _ERROR_NEGATIVE_SEEK, _ERROR_INVALID_NAME, _ERROR_NOT_A_REPARSE_POINT: return EINVAL } errno, _ := syscallToErrno(err) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go index a89ddc457..18c5f4252 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/interpreter/interpreter.go @@ -98,6 +98,9 @@ func (e *moduleEngine) SetGlobalValue(idx wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (e *moduleEngine) OwnsGlobals() bool { return false } +// MemoryGrown implements wasm.ModuleEngine. +func (e *moduleEngine) MemoryGrown() {} + // callEngine holds context per moduleEngine.Call, and shared across all the // function calls originating from the same moduleEngine.Call execution. // diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go index 81c6a6b62..8e9571b20 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/executable_context.go @@ -43,7 +43,7 @@ type ExecutableContextT[Instr any] struct { labelPositionPool wazevoapi.Pool[LabelPosition[Instr]] NextLabel Label // LabelPositions maps a label to the instructions of the region which the label represents. - LabelPositions map[Label]*LabelPosition[Instr] + LabelPositions []*LabelPosition[Instr] OrderedBlockLabels []*LabelPosition[Instr] // PerBlockHead and PerBlockEnd are the head and tail of the instruction list per currently-compiled ssa.BasicBlock. @@ -67,7 +67,6 @@ func NewExecutableContextT[Instr any]( setNext: setNext, setPrev: setPrev, labelPositionPool: wazevoapi.NewPool[LabelPosition[Instr]](resetLabelPosition[Instr]), - LabelPositions: make(map[Label]*LabelPosition[Instr]), NextLabel: LabelInvalid, } } @@ -97,11 +96,7 @@ func (e *ExecutableContextT[Instr]) StartBlock(blk ssa.BasicBlock) { end := e.allocateNop0() e.PerBlockHead, e.PerBlockEnd = end, end - labelPos, ok := e.LabelPositions[l] - if !ok { - labelPos = e.AllocateLabelPosition(l) - e.LabelPositions[l] = labelPos - } + labelPos := e.GetOrAllocateLabelPosition(l) e.OrderedBlockLabels = append(e.OrderedBlockLabels, labelPos) labelPos.Begin, labelPos.End = end, end labelPos.SB = blk @@ -146,8 +141,8 @@ func (e *ExecutableContextT[T]) FlushPendingInstructions() { func (e *ExecutableContextT[T]) Reset() { e.labelPositionPool.Reset() e.InstructionPool.Reset() - for l := Label(0); l <= e.NextLabel; l++ { - delete(e.LabelPositions, l) + for i := range e.LabelPositions { + e.LabelPositions[i] = nil } e.PendingInstructions = e.PendingInstructions[:0] e.OrderedBlockLabels = e.OrderedBlockLabels[:0] @@ -163,10 +158,17 @@ func (e *ExecutableContextT[T]) AllocateLabel() Label { return e.NextLabel } -func (e *ExecutableContextT[T]) AllocateLabelPosition(la Label) *LabelPosition[T] { - l := e.labelPositionPool.Allocate() - l.L = la - return l +func (e *ExecutableContextT[T]) GetOrAllocateLabelPosition(l Label) *LabelPosition[T] { + if len(e.LabelPositions) <= int(l) { + e.LabelPositions = append(e.LabelPositions, make([]*LabelPosition[T], int(l)+1-len(e.LabelPositions))...) + } + ret := e.LabelPositions[l] + if ret == nil { + ret = e.labelPositionPool.Allocate() + ret.L = l + e.LabelPositions[l] = ret + } + return ret } func (e *ExecutableContextT[T]) GetOrAllocateSSABlockLabel(blk ssa.BasicBlock) Label { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go index 310ad2203..61ae6f406 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/amd64/machine.go @@ -1906,8 +1906,10 @@ func (m *machine) InsertMove(dst, src regalloc.VReg, typ ssa.Type) { func (m *machine) Format() string { ectx := m.ectx begins := map[*instruction]backend.Label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l + for _, pos := range ectx.LabelPositions { + if pos != nil { + begins[pos.Begin] = pos.L + } } irBlocks := map[backend.Label]ssa.BasicBlockID{} @@ -1950,7 +1952,10 @@ func (m *machine) encodeWithoutSSA(root *instruction) { offset := int64(len(*bufPtr)) if cur.kind == nop0 { l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if int(l) >= len(ectx.LabelPositions) { + continue + } + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset } } @@ -2005,7 +2010,7 @@ func (m *machine) Encode(ctx context.Context) (err error) { switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset } case sourceOffsetInfo: @@ -2165,8 +2170,7 @@ func (m *machine) allocateBrTarget() (nop *instruction, l backend.Label) { //nol func (m *machine) allocateLabel() *labelPosition { ectx := m.ectx l := ectx.AllocateLabel() - pos := ectx.AllocateLabelPosition(l) - ectx.LabelPositions[l] = pos + pos := ectx.GetOrAllocateLabelPosition(l) return pos } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go index 6615471c6..4eaa13ce1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi.go @@ -101,13 +101,14 @@ func (m *machine) LowerParams(args []ssa.Value) { bits := arg.Type.Bits() // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `argStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: arg.Offset, rn: spVReg, kind: addressModeKindArgStackSpace} load := m.allocateInstr() switch arg.Type { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(reg), amode, bits) + load.asULoad(reg, amode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - load.asFpuLoad(operandNR(reg), amode, bits) + load.asFpuLoad(reg, amode, bits) default: panic("BUG") } @@ -169,7 +170,8 @@ func (m *machine) LowerReturns(rets []ssa.Value) { // At this point of compilation, we don't yet know how much space exist below the return address. // So we instruct the address mode to add the `retStackOffset` to the offset at the later phase of compilation. - amode := addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} + amode := m.amodePool.Allocate() + *amode = addressMode{imm: r.Offset, rn: spVReg, kind: addressModeKindResultStackSpace} store := m.allocateInstr() store.asStore(operandNR(reg), amode, bits) m.insert(store) @@ -215,9 +217,9 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i ldr := m.allocateInstr() switch r.Type { case ssa.TypeI32, ssa.TypeI64: - ldr.asULoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asULoad(reg, amode, r.Type.Bits()) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - ldr.asFpuLoad(operandNR(reg), amode, r.Type.Bits()) + ldr.asFpuLoad(reg, amode, r.Type.Bits()) default: panic("BUG") } @@ -225,7 +227,7 @@ func (m *machine) callerGenFunctionReturnVReg(a *backend.FunctionABI, retIndex i } } -func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, addressMode) { +func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) (*instruction, *addressMode) { exct := m.executableContext exct.PendingInstructions = exct.PendingInstructions[:0] mode := m.resolveAddressModeForOffset(offset, dstBits, rn, allowTmpRegUse) @@ -235,15 +237,15 @@ func (m *machine) resolveAddressModeForOffsetAndInsert(cur *instruction, offset return cur, mode } -func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) addressMode { +func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn regalloc.VReg, allowTmpRegUse bool) *addressMode { if rn.RegType() != regalloc.RegTypeInt { panic("BUG: rn should be a pointer: " + formatVRegSized(rn, 64)) } - var amode addressMode + amode := m.amodePool.Allocate() if offsetFitsInAddressModeKindRegUnsignedImm12(dstBits, offset) { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: rn, imm: offset} } else if offsetFitsInAddressModeKindRegSignedImm9(offset) { - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: rn, imm: offset} } else { var indexReg regalloc.VReg if allowTmpRegUse { @@ -253,7 +255,7 @@ func (m *machine) resolveAddressModeForOffset(offset int64, dstBits byte, rn reg indexReg = m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(indexReg, offset) } - amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: rn, rm: indexReg, extOp: extendOpUXTX /* indicates index rm is 64-bit */} } return amode } @@ -315,7 +317,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), imm12Operand, true) + alu.asALU(ao, rd, operandNR(spVReg), imm12Operand, true) m.insert(alu) } else { m.lowerConstantI64(tmpRegVReg, diff) @@ -326,7 +328,7 @@ func (m *machine) insertAddOrSubStackPointer(rd regalloc.VReg, diff int64, add b } else { ao = aluOpSub } - alu.asALU(ao, operandNR(rd), operandNR(spVReg), operandNR(tmpRegVReg), true) + alu.asALU(ao, rd, operandNR(spVReg), operandNR(tmpRegVReg), true) m.insert(alu) } } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go index 7a9cceb33..f8b5d97ac 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_entry_preamble.go @@ -59,25 +59,26 @@ func (m *machine) goEntryPreamblePassArg(cur *instruction, paramSlicePtr regallo } else { postIndexImm = 8 } - loadMode := addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} + loadMode := m.amodePool.Allocate() + *loadMode = addressMode{kind: addressModeKindPostIndex, rn: paramSlicePtr, imm: postIndexImm} instr := m.allocateInstr() switch typ { case ssa.TypeI32: - instr.asULoad(loadTargetReg, loadMode, 32) + instr.asULoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeI64: - instr.asULoad(loadTargetReg, loadMode, 64) + instr.asULoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeF32: - instr.asFpuLoad(loadTargetReg, loadMode, 32) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 32) case ssa.TypeF64: - instr.asFpuLoad(loadTargetReg, loadMode, 64) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 64) case ssa.TypeV128: - instr.asFpuLoad(loadTargetReg, loadMode, 128) + instr.asFpuLoad(loadTargetReg.reg(), loadMode, 128) } cur = linkInstr(cur, instr) if isStackArg { - var storeMode addressMode + var storeMode *addressMode cur, storeMode = m.resolveAddressModeForOffsetAndInsert(cur, argStartOffsetFromSP+arg.Offset, bits, spVReg, true) toStack := m.allocateInstr() toStack.asStore(loadTargetReg, storeMode, bits) @@ -113,21 +114,22 @@ func (m *machine) goEntryPreamblePassResult(cur *instruction, resultSlicePtr reg } if isStackArg { - var loadMode addressMode + var loadMode *addressMode cur, loadMode = m.resolveAddressModeForOffsetAndInsert(cur, resultStartOffsetFromSP+result.Offset, bits, spVReg, true) toReg := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - toReg.asULoad(storeTargetReg, loadMode, bits) + toReg.asULoad(storeTargetReg.reg(), loadMode, bits) case ssa.TypeF32, ssa.TypeF64, ssa.TypeV128: - toReg.asFpuLoad(storeTargetReg, loadMode, bits) + toReg.asFpuLoad(storeTargetReg.reg(), loadMode, bits) default: panic("TODO?") } cur = linkInstr(cur, toReg) } - mode := addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: resultSlicePtr, imm: postIndexImm} instr := m.allocateInstr() instr.asStore(storeTargetReg, mode, bits) cur = linkInstr(cur, instr) @@ -214,11 +216,12 @@ func (m *machine) move64(dst, src regalloc.VReg, prev *instruction) *instruction func (m *machine) loadOrStoreAtExecutionContext(d regalloc.VReg, offset wazevoapi.Offset, store bool, prev *instruction) *instruction { instr := m.allocateInstr() - mode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: savedExecutionContextPtr, imm: offset.I64()} if store { instr.asStore(operandNR(d), mode, 64) } else { - instr.asULoad(operandNR(d), mode, 64) + instr.asULoad(d, mode, 64) } return linkInstr(prev, instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go index 466b1f960..99e6bb482 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/abi_go_call.go @@ -87,7 +87,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // Module context is always the second argument. moduleCtrPtr := x1VReg store := m.allocateInstr() - amode := addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtrPtr, imm: offset} store.asStore(operandNR(moduleCtrPtr), amode, 64) cur = linkInstr(cur, store) } @@ -120,11 +121,9 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * } else { sizeInBits = 64 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindPostIndex, - rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8), - }, sizeInBits) + amode := m.amodePool.Allocate() + *amode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg, imm: int64(sizeInBits / 8)} + store.asStore(operandNR(v), amode, sizeInBits) cur = linkInstr(cur, store) } @@ -139,7 +138,7 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * frameSizeReg = xzrVReg sliceSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, -16, true) + _amode := addressModePreOrPostIndex(m, spVReg, -16, true) storeP := m.allocateInstr() storeP.asStorePair64(frameSizeReg, sliceSizeReg, _amode) cur = linkInstr(cur, storeP) @@ -165,8 +164,8 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * cur = m.addsAddOrSubStackPointer(cur, spVReg, frameInfoSize+goCallStackSize, true) ldr := m.allocateInstr() // And load the return address. - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + amode := addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */) + ldr.asULoad(lrVReg, amode, 64) cur = linkInstr(cur, ldr) originalRet0Reg := x17VReg // Caller save, so we can use it for whatever we want. @@ -183,23 +182,24 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * r := &abi.Rets[i] if r.Kind == backend.ABIArgKindReg { loadIntoReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asULoad(r.Reg, mode, 32) case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asULoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asULoad(r.Reg, mode, 64) case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 32) + loadIntoReg.asFpuLoad(r.Reg, mode, 32) case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 64) + loadIntoReg.asFpuLoad(r.Reg, mode, 64) case ssa.TypeV128: mode.imm = 16 - loadIntoReg.asFpuLoad(operandNR(r.Reg), mode, 128) + loadIntoReg.asFpuLoad(r.Reg, mode, 128) default: panic("TODO") } @@ -208,28 +208,29 @@ func (m *machine) CompileGoFunctionTrampoline(exitCode wazevoapi.ExitCode, sig * // First we need to load the value to a temporary just like ^^. intTmp, floatTmp := x11VReg, v11VReg loadIntoTmpReg := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: arg0ret0AddrReg} var resultReg regalloc.VReg switch r.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 32) + loadIntoTmpReg.asULoad(intTmp, mode, 32) resultReg = intTmp case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asULoad(operandNR(intTmp), mode, 64) + loadIntoTmpReg.asULoad(intTmp, mode, 64) resultReg = intTmp case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 32) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 32) resultReg = floatTmp case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 64) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 64) resultReg = floatTmp case ssa.TypeV128: mode.imm = 16 - loadIntoTmpReg.asFpuLoad(operandNR(floatTmp), mode, 128) + loadIntoTmpReg.asFpuLoad(floatTmp, mode, 128) resultReg = floatTmp default: panic("TODO") @@ -258,12 +259,13 @@ func (m *machine) saveRegistersInExecutionContext(cur *instruction, regs []regal case regalloc.RegTypeFloat: sizeInBits = 128 } - store.asStore(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + store.asStore(operandNR(v), mode, sizeInBits) store.prev = cur cur.next = store cur = store @@ -276,7 +278,7 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re offset := wazevoapi.ExecutionContextOffsetSavedRegistersBegin.I64() for _, v := range regs { load := m.allocateInstr() - var as func(dst operand, amode addressMode, sizeInBits byte) + var as func(dst regalloc.VReg, amode *addressMode, sizeInBits byte) var sizeInBits byte switch v.RegType() { case regalloc.RegTypeInt: @@ -286,12 +288,13 @@ func (m *machine) restoreRegistersInExecutionContext(cur *instruction, regs []re as = load.asFpuLoad sizeInBits = 128 } - as(operandNR(v), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: offset, - }, sizeInBits) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: offset, + } + as(v, mode, sizeInBits) cur = linkInstr(cur, load) offset += 16 // Imm12 must be aligned 16 for vector regs, so we unconditionally load regs at the offset of multiple of 16. } @@ -324,11 +327,9 @@ func (m *machine) setExitCode(cur *instruction, execCtr regalloc.VReg, exitCode // Set the exit status on the execution context. setExistStatus := m.allocateInstr() - setExistStatus.asStore(operandNR(constReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: execCtr, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64()} + setExistStatus.asStore(operandNR(constReg), mode, 32) cur = linkInstr(cur, setExistStatus) return cur } @@ -340,12 +341,13 @@ func (m *machine) storeReturnAddressAndExit(cur *instruction) *instruction { cur = linkInstr(cur, adr) storeReturnAddr := m.allocateInstr() - storeReturnAddr.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeReturnAddr.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, storeReturnAddr) // Exit the execution. @@ -364,11 +366,12 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe cur = linkInstr(cur, movSp) strSp := m.allocateInstr() - strSp.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtr, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSp.asStore(operandNR(tmpRegVReg), mode, 64) cur = linkInstr(cur, strSp) return cur } @@ -376,27 +379,28 @@ func (m *machine) saveCurrentStackPointer(cur *instruction, execCtr regalloc.VRe func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg regalloc.VReg, arg *backend.ABIArg, intVReg, floatVReg regalloc.VReg) (*instruction, regalloc.VReg) { load := m.allocateInstr() var result regalloc.VReg - mode := addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalArg0Reg} switch arg.Type { case ssa.TypeI32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 32) + load.asULoad(intVReg, mode, 32) result = intVReg case ssa.TypeI64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asULoad(operandNR(intVReg), mode, 64) + load.asULoad(intVReg, mode, 64) result = intVReg case ssa.TypeF32: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 32) + load.asFpuLoad(floatVReg, mode, 32) result = floatVReg case ssa.TypeF64: mode.imm = 8 // We use uint64 for all basic types, except SIMD v128. - load.asFpuLoad(operandNR(floatVReg), mode, 64) + load.asFpuLoad(floatVReg, mode, 64) result = floatVReg case ssa.TypeV128: mode.imm = 16 - load.asFpuLoad(operandNR(floatVReg), mode, 128) + load.asFpuLoad(floatVReg, mode, 128) result = floatVReg default: panic("TODO") @@ -408,7 +412,8 @@ func (m *machine) goFunctionCallLoadStackArg(cur *instruction, originalArg0Reg r func (m *machine) goFunctionCallStoreStackResult(cur *instruction, originalRet0Reg regalloc.VReg, result *backend.ABIArg, resultVReg regalloc.VReg) *instruction { store := m.allocateInstr() - mode := addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} + mode := m.amodePool.Allocate() + *mode = addressMode{kind: addressModeKindPostIndex, rn: originalRet0Reg} var sizeInBits byte switch result.Type { case ssa.TypeI32, ssa.TypeF32: diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go index 8aabc5997..7121cb538 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr.go @@ -3,10 +3,12 @@ package arm64 import ( "fmt" "math" + "unsafe" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend" "github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc" "github.com/tetratelabs/wazero/internal/engine/wazevo/ssa" + "github.com/tetratelabs/wazero/internal/engine/wazevo/wazevoapi" ) type ( @@ -22,9 +24,9 @@ type ( // TODO: optimize the layout later once the impl settles. instruction struct { prev, next *instruction - u1, u2, u3 uint64 - rd, rm, rn, ra operand - amode addressMode + u1, u2 uint64 + rd regalloc.VReg + rm, rn operand kind instructionKind addedBeforeRegAlloc bool } @@ -174,7 +176,7 @@ func (i *instruction) Defs(regs *[]regalloc.VReg) []regalloc.VReg { switch defKinds[i.kind] { case defKindNone: case defKindRD: - *regs = append(*regs, i.rd.nr()) + *regs = append(*regs, i.rd) case defKindCall: _, _, retIntRealRegs, retFloatRealRegs, _ := backend.ABIInfoFromUint64(i.u2) for i := byte(0); i < retIntRealRegs; i++ { @@ -194,7 +196,7 @@ func (i *instruction) AssignDef(reg regalloc.VReg) { switch defKinds[i.kind] { case defKindNone: case defKindRD: - i.rd = i.rd.assignReg(reg) + i.rd = reg case defKindCall: panic("BUG: call instructions shouldn't be assigned") default: @@ -329,7 +331,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { if rm := i.rm.reg(); rm.Valid() { *regs = append(*regs, rm) } - if ra := i.ra.reg(); ra.Valid() { + if ra := regalloc.VReg(i.u2); ra.Valid() { *regs = append(*regs, ra) } case useKindRNRN1RM: @@ -341,18 +343,20 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { *regs = append(*regs, rm) } case useKindAMode: - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindRNAMode: *regs = append(*regs, i.rn.reg()) - if amodeRN := i.amode.rn; amodeRN.Valid() { + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { *regs = append(*regs, amodeRN) } - if amodeRM := i.amode.rm; amodeRM.Valid() { + if amodeRM := amode.rm; amodeRM.Valid() { *regs = append(*regs, amodeRM) } case useKindCond: @@ -374,7 +378,7 @@ func (i *instruction) Uses(regs *[]regalloc.VReg) []regalloc.VReg { case useKindRDRewrite: *regs = append(*regs, i.rn.reg()) *regs = append(*regs, i.rm.reg()) - *regs = append(*regs, i.rd.reg()) + *regs = append(*regs, i.rd) default: panic(fmt.Sprintf("useKind for %v not defined", i)) } @@ -408,8 +412,8 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if rd := i.rd.reg(); rd.Valid() { - i.rd = i.rd.assignReg(reg) + if rd := i.rd; rd.Valid() { + i.rd = reg } } case useKindRNRN1RM: @@ -435,32 +439,36 @@ func (i *instruction) AssignUse(index int, reg regalloc.VReg) { i.rm = i.rm.assignReg(reg) } } else { - if ra := i.ra.reg(); ra.Valid() { - i.ra = i.ra.assignReg(reg) + if ra := regalloc.VReg(i.u2); ra.Valid() { + i.u2 = uint64(reg) } } case useKindAMode: if index == 0 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } } case useKindRNAMode: if index == 0 { i.rn = i.rn.assignReg(reg) } else if index == 1 { - if amodeRN := i.amode.rn; amodeRN.Valid() { - i.amode.rn = reg + amode := i.getAmode() + if amodeRN := amode.rn; amodeRN.Valid() { + amode.rn = reg } else { panic("BUG") } } else { - if amodeRM := i.amode.rm; amodeRM.Valid() { - i.amode.rm = reg + amode := i.getAmode() + if amodeRM := amode.rm; amodeRM.Valid() { + amode.rm = reg } else { panic("BUG") } @@ -503,35 +511,35 @@ func (i *instruction) callFuncRef() ssa.FuncRef { } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVZ(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movZ - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVK(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movK - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // shift must be divided by 16 and must be in range 0-3 (if dst64bit is true) or 0-1 (if dst64bit is false) -func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint64, dst64bit bool) { +func (i *instruction) asMOVN(dst regalloc.VReg, imm uint64, shift uint32, dst64bit bool) { i.kind = movN - i.rd = operandNR(dst) + i.rd = dst i.u1 = imm - i.u2 = shift + i.u2 = uint64(shift) if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -553,21 +561,21 @@ func (i *instruction) asRet() { i.kind = ret } -func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asStorePair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = storeP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode addressMode) { +func (i *instruction) asLoadPair64(src1, src2 regalloc.VReg, amode *addressMode) { i.kind = loadP64 i.rn = operandNR(src1) i.rm = operandNR(src2) - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asStore(src operand, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = store8 @@ -589,10 +597,10 @@ func (i *instruction) asStore(src operand, amode addressMode, sizeInBits byte) { i.kind = fpuStore128 } i.rn = src - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asSLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = sLoad8 @@ -604,10 +612,10 @@ func (i *instruction) asSLoad(dst operand, amode addressMode, sizeInBits byte) { panic("BUG") } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asULoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 8: i.kind = uLoad8 @@ -619,10 +627,10 @@ func (i *instruction) asULoad(dst operand, amode addressMode, sizeInBits byte) { i.kind = uLoad64 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) { +func (i *instruction) asFpuLoad(dst regalloc.VReg, amode *addressMode, sizeInBits byte) { switch sizeInBits { case 32: i.kind = fpuLoad32 @@ -632,10 +640,18 @@ func (i *instruction) asFpuLoad(dst operand, amode addressMode, sizeInBits byte) i.kind = fpuLoad128 } i.rd = dst - i.amode = amode + i.setAmode(amode) } -func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { +func (i *instruction) getAmode() *addressMode { + return wazevoapi.PtrFromUintptr[addressMode](uintptr(i.u1)) +} + +func (i *instruction) setAmode(a *addressMode) { + i.u1 = uint64(uintptr(unsafe.Pointer(a))) +} + +func (i *instruction) asVecLoad1R(rd regalloc.VReg, rn operand, arr vecArrangement) { // NOTE: currently only has support for no-offset loads, though it is suspicious that // we would need to support offset load (that is only available for post-index). i.kind = vecLoad1R @@ -646,32 +662,32 @@ func (i *instruction) asVecLoad1R(rd, rn operand, arr vecArrangement) { func (i *instruction) asCSet(rd regalloc.VReg, mask bool, c condFlag) { i.kind = cSet - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(c) if mask { i.u2 = 1 } } -func (i *instruction) asCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = cSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuCSel(rd, rn, rm operand, c condFlag, _64bit bool) { +func (i *instruction) asFpuCSel(rd regalloc.VReg, rn, rm operand, c condFlag, _64bit bool) { i.kind = fpuCSel i.rd = rd i.rn = rn i.rm = rm i.u1 = uint64(c) if _64bit { - i.u3 = 1 + i.u2 = 1 } } @@ -691,7 +707,7 @@ func (i *instruction) asBrTableSequence(indexReg regalloc.VReg, targetIndex, tar } func (i *instruction) brTableSequenceOffsetsResolved() { - i.u3 = 1 // indicate that the offsets are resolved, for debugging. + i.rm.data = 1 // indicate that the offsets are resolved, for debugging. } func (i *instruction) brLabel() label { @@ -701,7 +717,7 @@ func (i *instruction) brLabel() label { // brOffsetResolved is called when the target label is resolved. func (i *instruction) brOffsetResolve(offset int64) { i.u2 = uint64(offset) - i.u3 = 1 // indicate that the offset is resolved, for debugging. + i.rm.data = 1 // indicate that the offset is resolved, for debugging. } func (i *instruction) brOffset() int64 { @@ -714,7 +730,7 @@ func (i *instruction) asCondBr(c cond, target label, is64bit bool) { i.u1 = c.asUint64() i.u2 = uint64(target) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } @@ -728,17 +744,17 @@ func (i *instruction) condBrLabel() label { // condBrOffsetResolve is called when the target label is resolved. func (i *instruction) condBrOffsetResolve(offset int64) { - i.rd.data = uint64(offset) - i.rd.data2 = 1 // indicate that the offset is resolved, for debugging. + i.rn.data = uint64(offset) + i.rn.data2 = 1 // indicate that the offset is resolved, for debugging. } // condBrOffsetResolved returns true if condBrOffsetResolve is already called. func (i *instruction) condBrOffsetResolved() bool { - return i.rd.data2 == 1 + return i.rn.data2 == 1 } func (i *instruction) condBrOffset() int64 { - return int64(i.rd.data) + return int64(i.rn.data) } func (i *instruction) condBrCond() cond { @@ -746,33 +762,33 @@ func (i *instruction) condBrCond() cond { } func (i *instruction) condBr64bit() bool { - return i.u3 == 1 + return i.u2&(1<<32) != 0 } func (i *instruction) asLoadFpuConst32(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst32 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst64(rd regalloc.VReg, raw uint64) { i.kind = loadFpuConst64 i.u1 = raw - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asLoadFpuConst128(rd regalloc.VReg, lo, hi uint64) { i.kind = loadFpuConst128 i.u1 = lo i.u2 = hi - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asFpuCmp(rn, rm operand, is64bit bool) { i.kind = fpuCmp i.rn, i.rm = rn, rm if is64bit { - i.u3 = 1 + i.u1 = 1 } } @@ -783,12 +799,12 @@ func (i *instruction) asCCmpImm(rn operand, imm uint64, c condFlag, flag byte, i i.u1 = uint64(c) i.u2 = uint64(flag) if is64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALU(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR @@ -804,22 +820,22 @@ func (i *instruction) asALU(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } // asALU setups a basic ALU instruction. -func (i *instruction) asALURRRR(aluOp aluOp, rd, rn, rm, ra operand, dst64bit bool) { +func (i *instruction) asALURRRR(aluOp aluOp, rd regalloc.VReg, rn, rm operand, ra regalloc.VReg, dst64bit bool) { i.kind = aluRRRR i.u1 = uint64(aluOp) - i.rd, i.rn, i.rm, i.ra = rd, rn, rm, ra + i.rd, i.rn, i.rm, i.u2 = rd, rn, rm, uint64(ra) if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } // asALUShift setups a shift based ALU instruction. -func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asALUShift(aluOp aluOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { switch rm.kind { case operandKindNR: i.kind = aluRRR // If the shift amount op is a register, then the instruction is encoded as a normal ALU instruction with two register operands. @@ -831,17 +847,17 @@ func (i *instruction) asALUShift(aluOp aluOp, rd, rn, rm operand, dst64bit bool) i.u1 = uint64(aluOp) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asALUBitmaskImm(aluOp aluOp, rd, rn regalloc.VReg, imm uint64, dst64bit bool) { i.kind = aluRRBitmaskImm i.u1 = uint64(aluOp) - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u2 = imm if dst64bit { - i.u3 = 1 + i.u1 |= 1 << 32 } } @@ -852,76 +868,76 @@ func (i *instruction) asMovToFPSR(rn regalloc.VReg) { func (i *instruction) asMovFromFPSR(rd regalloc.VReg) { i.kind = movFromFPSR - i.rd = operandNR(rd) + i.rd = rd } func (i *instruction) asBitRR(bitOp bitOp, rd, rn regalloc.VReg, is64bit bool) { i.kind = bitRR - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(bitOp) if is64bit { i.u2 = 1 } } -func (i *instruction) asFpuRRR(op fpuBinOp, rd, rn, rm operand, dst64bit bool) { +func (i *instruction) asFpuRRR(op fpuBinOp, rd regalloc.VReg, rn, rm operand, dst64bit bool) { i.kind = fpuRRR i.u1 = uint64(op) i.rd, i.rn, i.rm = rd, rn, rm if dst64bit { - i.u3 = 1 + i.u2 = 1 } } -func (i *instruction) asFpuRR(op fpuUniOp, rd, rn operand, dst64bit bool) { +func (i *instruction) asFpuRR(op fpuUniOp, rd regalloc.VReg, rn operand, dst64bit bool) { i.kind = fpuRR i.u1 = uint64(op) i.rd, i.rn = rd, rn if dst64bit { - i.u3 = 1 + i.u2 = 1 } } func (i *instruction) asExtend(rd, rn regalloc.VReg, fromBits, toBits byte, signed bool) { i.kind = extend - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd i.u1 = uint64(fromBits) i.u2 = uint64(toBits) if signed { - i.u3 = 1 + i.u2 |= 1 << 32 } } func (i *instruction) asMove32(rd, rn regalloc.VReg) { i.kind = mov32 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asMove64(rd, rn regalloc.VReg) *instruction { i.kind = mov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } func (i *instruction) asFpuMov64(rd, rn regalloc.VReg) { i.kind = fpuMov64 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd } func (i *instruction) asFpuMov128(rd, rn regalloc.VReg) *instruction { i.kind = fpuMov128 - i.rn, i.rd = operandNR(rn), operandNR(rd) + i.rn, i.rd = operandNR(rn), rd return i } -func (i *instruction) asMovToVec(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asMovToVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = movToVec i.rd = rd i.rn = rn i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vecIndex, signed bool) { +func (i *instruction) asMovFromVec(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex, signed bool) { if signed { i.kind = movFromVecSigned } else { @@ -932,48 +948,48 @@ func (i *instruction) asMovFromVec(rd, rn operand, arr vecArrangement, index vec i.u1, i.u2 = uint64(arr), uint64(index) } -func (i *instruction) asVecDup(rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecDup(rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecDup i.u1 = uint64(arr) i.rn, i.rd = rn, rd } -func (i *instruction) asVecDupElement(rd, rn operand, arr vecArrangement, index vecIndex) { +func (i *instruction) asVecDupElement(rd regalloc.VReg, rn operand, arr vecArrangement, index vecIndex) { i.kind = vecDupElement i.u1 = uint64(arr) i.rn, i.rd = rn, rd i.u2 = uint64(index) } -func (i *instruction) asVecExtract(rd, rn, rm operand, arr vecArrangement, index uint32) { +func (i *instruction) asVecExtract(rd regalloc.VReg, rn, rm operand, arr vecArrangement, index uint32) { i.kind = vecExtract i.u1 = uint64(arr) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(index) } -func (i *instruction) asVecMovElement(rd, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { +func (i *instruction) asVecMovElement(rd regalloc.VReg, rn operand, arr vecArrangement, rdIndex, rnIndex vecIndex) { i.kind = vecMovElement i.u1 = uint64(arr) - i.u2, i.u3 = uint64(rdIndex), uint64(rnIndex) + i.u2 = uint64(rdIndex) | uint64(rnIndex)<<32 i.rn, i.rd = rn, rd } -func (i *instruction) asVecMisc(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecMisc(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecMisc i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecLanes(op vecOp, rd, rn operand, arr vecArrangement) { +func (i *instruction) asVecLanes(op vecOp, rd regalloc.VReg, rn operand, arr vecArrangement) { i.kind = vecLanes i.u1 = uint64(op) i.rn, i.rd = rn, rd i.u2 = uint64(arr) } -func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecShiftImm(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecShiftImm i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd @@ -981,7 +997,7 @@ func (i *instruction) asVecShiftImm(op vecOp, rd, rn, rm operand, arr vecArrange return i } -func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecTbl(nregs byte, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { switch nregs { case 0, 1: i.kind = vecTbl @@ -1000,14 +1016,14 @@ func (i *instruction) asVecTbl(nregs byte, rd, rn, rm operand, arr vecArrangemen i.u2 = uint64(arr) } -func (i *instruction) asVecPermute(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecPermute(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecPermute i.u1 = uint64(op) i.rn, i.rm, i.rd = rn, rm, rd i.u2 = uint64(arr) } -func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) *instruction { +func (i *instruction) asVecRRR(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) *instruction { i.kind = vecRRR i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1017,7 +1033,7 @@ func (i *instruction) asVecRRR(op vecOp, rd, rn, rm operand, arr vecArrangement) // asVecRRRRewrite encodes a vector instruction that rewrites the destination register. // IMPORTANT: the destination register must be already defined before this instruction. -func (i *instruction) asVecRRRRewrite(op vecOp, rd, rn, rm operand, arr vecArrangement) { +func (i *instruction) asVecRRRRewrite(op vecOp, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { i.kind = vecRRRRewrite i.u1 = uint64(op) i.rn, i.rd, i.rm = rn, rd, rm @@ -1033,8 +1049,8 @@ func (i *instruction) IsCopy() bool { // String implements fmt.Stringer. func (i *instruction) String() (str string) { - is64SizeBitToSize := func(u3 uint64) byte { - if u3 == 0 { + is64SizeBitToSize := func(v uint64) byte { + if v == 0 { return 32 } return 64 @@ -1049,46 +1065,46 @@ func (i *instruction) String() (str string) { str = "nop0" } case aluRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1 >> 32) str = fmt.Sprintf("%s %s, %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.ra.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(regalloc.VReg(i.u2), size)) case aluRRImm12: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size)) case aluRRBitmaskImm: - size := is64SizeBitToSize(i.u3) - rd, rn := formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size) + size := is64SizeBitToSize(i.u1 >> 32) + rd, rn := formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size) if size == 32 { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, uint32(i.u2)) } else { str = fmt.Sprintf("%s %s, %s, #%#x", aluOp(i.u1).String(), rd, rn, i.u2) } case aluRRImmShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %#x", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.shiftImm(), ) case aluRRRShift: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), i.rm.format(size), ) case aluRRRExtend: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("%s %s, %s, %s", aluOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), // Regardless of the source size, the register is formatted in 32-bit. i.rm.format(32), @@ -1097,57 +1113,57 @@ func (i *instruction) String() (str string) { size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s", bitOp(i.u1), - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), ) case uLoad8: - str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad8: - str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsb %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad16: - str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad16: - str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrsh %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case sLoad32: - str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldrs %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case uLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case store8: - str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(8)) + str = fmt.Sprintf("strb %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(8)) case store16: - str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(16)) + str = fmt.Sprintf("strh %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(16)) case store32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(32)) case store64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case storeP64: str = fmt.Sprintf("stp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case loadP64: str = fmt.Sprintf("ldp %s, %s, %s", - formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.amode.format(64)) + formatVRegSized(i.rn.nr(), 64), formatVRegSized(i.rm.nr(), 64), i.getAmode().format(64)) case mov64: str = fmt.Sprintf("mov %s, %s", - formatVRegSized(i.rd.nr(), 64), + formatVRegSized(i.rd, 64), formatVRegSized(i.rn.nr(), 64)) case mov32: - str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd.nr(), 32), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("mov %s, %s", formatVRegSized(i.rd, 32), formatVRegSized(i.rn.nr(), 32)) case movZ: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movz %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movN: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movn %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case movK: - size := is64SizeBitToSize(i.u3) - str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd.nr(), size), uint16(i.u1), i.u2*16) + size := is64SizeBitToSize(i.u2 >> 32) + str = fmt.Sprintf("movk %s, #%#x, lsl %d", formatVRegSized(i.rd, size), uint16(i.u1), uint32(i.u2)*16) case extend: fromBits, toBits := byte(i.u1), byte(i.u2) var signedStr string - if i.u3 == 1 { + if i.u2>>32 == 1 { signedStr = "s" } else { signedStr = "u" @@ -1161,39 +1177,39 @@ func (i *instruction) String() (str string) { case 32: fromStr = "w" } - str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd.nr(), toBits), formatVRegSized(i.rn.nr(), 32)) + str = fmt.Sprintf("%sxt%s %s, %s", signedStr, fromStr, formatVRegSized(i.rd, toBits), formatVRegSized(i.rn.nr(), 32)) case cSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("csel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), ) case cSet: if i.u2 != 0 { - str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("csetm %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } else { - str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd.nr(), 64), condFlag(i.u1)) + str = fmt.Sprintf("cset %s, %s", formatVRegSized(i.rd, 64), condFlag(i.u1)) } case cCmpImm: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) str = fmt.Sprintf("ccmp %s, #%#x, #%#x, %s", formatVRegSized(i.rn.nr(), size), i.rm.data, i.u2&0b1111, condFlag(i.u1)) case fpuMov64: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement8B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement8B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement8B, vecIndexNone)) case fpuMov128: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement16B, vecIndexNone), + formatVRegVec(i.rd, vecArrangement16B, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone)) case fpuMovFromVec: panic("TODO") case fpuRR: - dstSz := is64SizeBitToSize(i.u3) + dstSz := is64SizeBitToSize(i.u2) srcSz := dstSz op := fpuUniOp(i.u1) switch op { @@ -1203,38 +1219,38 @@ func (i *instruction) String() (str string) { srcSz = 64 } str = fmt.Sprintf("%s %s, %s", op.String(), - formatVRegSized(i.rd.nr(), dstSz), formatVRegSized(i.rn.nr(), srcSz)) + formatVRegSized(i.rd, dstSz), formatVRegSized(i.rn.nr(), srcSz)) case fpuRRR: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("%s %s, %s, %s", fpuBinOp(i.u1).String(), - formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuRRI: panic("TODO") case fpuRRRR: panic("TODO") case fpuCmp: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u1) str = fmt.Sprintf("fcmp %s, %s", formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size)) case fpuLoad32: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 32), i.amode.format(32)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 32), i.getAmode().format(32)) case fpuStore32: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 32), i.getAmode().format(64)) case fpuLoad64: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 64), i.getAmode().format(64)) case fpuStore64: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 64), i.getAmode().format(64)) case fpuLoad128: - str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("ldr %s, %s", formatVRegSized(i.rd, 128), i.getAmode().format(64)) case fpuStore128: - str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.amode.format(64)) + str = fmt.Sprintf("str %s, %s", formatVRegSized(i.rn.nr(), 128), i.getAmode().format(64)) case loadFpuConst32: - str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd.nr(), 32), math.Float32frombits(uint32(i.u1))) + str = fmt.Sprintf("ldr %s, #8; b 8; data.f32 %f", formatVRegSized(i.rd, 32), math.Float32frombits(uint32(i.u1))) case loadFpuConst64: - str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd.nr(), 64), math.Float64frombits(i.u1)) + str = fmt.Sprintf("ldr %s, #8; b 16; data.f64 %f", formatVRegSized(i.rd, 64), math.Float64frombits(i.u1)) case loadFpuConst128: str = fmt.Sprintf("ldr %s, #8; b 32; data.v128 %016x %016x", - formatVRegSized(i.rd.nr(), 128), i.u1, i.u2) + formatVRegSized(i.rd, 128), i.u1, i.u2) case fpuToInt: var op, src, dst string if signed := i.u1 == 1; signed { @@ -1242,15 +1258,15 @@ func (i *instruction) String() (str string) { } else { op = "fcvtzu" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegWidthVec(i.rn.nr(), vecArrangementD) } else { src = formatVRegWidthVec(i.rn.nr(), vecArrangementS) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegSized(i.rd.nr(), 64) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegSized(i.rd, 64) } else { - dst = formatVRegSized(i.rd.nr(), 32) + dst = formatVRegSized(i.rd, 32) } str = fmt.Sprintf("%s %s, %s", op, dst, src) @@ -1261,21 +1277,21 @@ func (i *instruction) String() (str string) { } else { op = "ucvtf" } - if src64 := i.u2 == 1; src64 { + if src64 := i.u2&1 != 0; src64 { src = formatVRegSized(i.rn.nr(), 64) } else { src = formatVRegSized(i.rn.nr(), 32) } - if dst64 := i.u3 == 1; dst64 { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementD) + if dst64 := i.u2&2 != 0; dst64 { + dst = formatVRegWidthVec(i.rd, vecArrangementD) } else { - dst = formatVRegWidthVec(i.rd.nr(), vecArrangementS) + dst = formatVRegWidthVec(i.rd, vecArrangementS) } str = fmt.Sprintf("%s %s, %s", op, dst, src) case fpuCSel: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2) str = fmt.Sprintf("fcsel %s, %s, %s, %s", - formatVRegSized(i.rd.nr(), size), + formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), size), formatVRegSized(i.rm.nr(), size), condFlag(i.u1), @@ -1291,7 +1307,7 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd.nr(), arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) + str = fmt.Sprintf("ins %s, %s", formatVRegVec(i.rd, arr, vecIndex(i.u2)), formatVRegSized(i.rn.nr(), size)) case movFromVec, movFromVecSigned: var size byte var opcode string @@ -1315,23 +1331,23 @@ func (i *instruction) String() (str string) { default: panic("unsupported arrangement " + arr.String()) } - str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd.nr(), size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) + str = fmt.Sprintf("%s %s, %s", opcode, formatVRegSized(i.rd, size), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2))) case vecDup: str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64), ) case vecDupElement: arr := vecArrangement(i.u1) str = fmt.Sprintf("dup %s, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndex(i.u2)), ) case vecDupFromFpu: panic("TODO") case vecExtract: str = fmt.Sprintf("ext %s, %s, %s, #%d", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u1), vecIndexNone), uint32(i.u2), @@ -1340,15 +1356,15 @@ func (i *instruction) String() (str string) { panic("TODO") case vecMovElement: str = fmt.Sprintf("mov %s, %s", - formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndex(i.u2)), - formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u3)), + formatVRegVec(i.rd, vecArrangement(i.u1), vecIndex(i.u2&0xffffffff)), + formatVRegVec(i.rn.nr(), vecArrangement(i.u1), vecIndex(i.u2>>32)), ) case vecMiscNarrow: panic("TODO") case vecRRR, vecRRRRewrite: str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rm.nr(), vecArrangement(i.u2), vecIndexNone), ) @@ -1356,12 +1372,12 @@ func (i *instruction) String() (str string) { vop := vecOp(i.u1) if vop == vecOpCmeq0 { str = fmt.Sprintf("cmeq %s, %s, #0", - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } else { str = fmt.Sprintf("%s %s, %s", vop, - formatVRegVec(i.rd.nr(), vecArrangement(i.u2), vecIndexNone), + formatVRegVec(i.rd, vecArrangement(i.u2), vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement(i.u2), vecIndexNone)) } case vecLanes: @@ -1379,24 +1395,24 @@ func (i *instruction) String() (str string) { } str = fmt.Sprintf("%s %s, %s", vecOp(i.u1), - formatVRegWidthVec(i.rd.nr(), destArr), + formatVRegWidthVec(i.rd, destArr), formatVRegVec(i.rn.nr(), arr, vecIndexNone)) case vecShiftImm: arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, #%d", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), i.rm.shiftImm()) case vecTbl: arr := vecArrangement(i.u2) str = fmt.Sprintf("tbl %s, { %s }, %s", - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), vecArrangement16B, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case vecTbl2: arr := vecArrangement(i.u2) - rd, rn, rm := i.rd.nr(), i.rn.nr(), i.rm.nr() + rd, rn, rm := i.rd, i.rn.nr(), i.rm.nr() rn1 := regalloc.FromRealReg(rn.RealReg()+1, rn.RegType()) str = fmt.Sprintf("tbl %s, { %s, %s }, %s", formatVRegVec(rd, arr, vecIndexNone), @@ -1407,13 +1423,13 @@ func (i *instruction) String() (str string) { arr := vecArrangement(i.u2) str = fmt.Sprintf("%s %s, %s, %s", vecOp(i.u1), - formatVRegVec(i.rd.nr(), arr, vecIndexNone), + formatVRegVec(i.rd, arr, vecIndexNone), formatVRegVec(i.rn.nr(), arr, vecIndexNone), formatVRegVec(i.rm.nr(), arr, vecIndexNone)) case movToFPSR: str = fmt.Sprintf("msr fpsr, %s", formatVRegSized(i.rn.nr(), 64)) case movFromFPSR: - str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd.nr(), 64)) + str = fmt.Sprintf("mrs %s fpsr", formatVRegSized(i.rd, 64)) case call: str = fmt.Sprintf("bl %s", ssa.FuncRef(i.u1)) case callInd: @@ -1422,15 +1438,15 @@ func (i *instruction) String() (str string) { str = "ret" case br: target := label(i.u1) - if i.u3 != 0 { + if i.rm.data != 0 { str = fmt.Sprintf("b #%#x (%s)", i.brOffset(), target.String()) } else { str = fmt.Sprintf("b %s", target.String()) } case condBr: - size := is64SizeBitToSize(i.u3) + size := is64SizeBitToSize(i.u2 >> 32) c := cond(i.u1) - target := label(i.u2) + target := label(i.u2 & 0xffffffff) switch c.kind() { case condKindRegisterZero: if !i.condBrOffsetResolved() { @@ -1456,7 +1472,7 @@ func (i *instruction) String() (str string) { } } case adr: - str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd.nr(), 64), int64(i.u1)) + str = fmt.Sprintf("adr %s, #%#x", formatVRegSized(i.rd, 64), int64(i.u1)) case brTableSequence: targetIndex := i.u1 str = fmt.Sprintf("br_table_sequence %s, table_index=%d", formatVRegSized(i.rn.nr(), 64), targetIndex) @@ -1473,7 +1489,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicCas: m := "casal" size := byte(32) @@ -1485,7 +1501,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rm.nr(), size), formatVRegSized(i.rn.nr(), 64)) case atomicLoad: m := "ldar" size := byte(32) @@ -1497,7 +1513,7 @@ func (i *instruction) String() (str string) { case 1: m = m + "b" } - str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd.nr(), size), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("%s %s, %s", m, formatVRegSized(i.rd, size), formatVRegSized(i.rn.nr(), 64)) case atomicStore: m := "stlr" size := byte(32) @@ -1517,9 +1533,9 @@ func (i *instruction) String() (str string) { case emitSourceOffsetInfo: str = fmt.Sprintf("source_offset_info %d", ssa.SourceOffset(i.u1)) case vecLoad1R: - str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd.nr(), vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) + str = fmt.Sprintf("ld1r {%s}, [%s]", formatVRegVec(i.rd, vecArrangement(i.u1), vecIndexNone), formatVRegSized(i.rn.nr(), 64)) case loadConstBlockArg: - str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd.nr(), 64), i.u1) + str = fmt.Sprintf("load_const_block_arg %s, %#x", formatVRegSized(i.rd, 64), i.u1) default: panic(i.kind) } @@ -1528,26 +1544,26 @@ func (i *instruction) String() (str string) { func (i *instruction) asAdr(rd regalloc.VReg, offset int64) { i.kind = adr - i.rd = operandNR(rd) + i.rd = rd i.u1 = uint64(offset) } -func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicRmw(op atomicRmwOp, rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicRmw - i.rd, i.rn, i.rm = rt, rn, rs + i.rd, i.rn, i.rm = rt, operandNR(rn), operandNR(rs) i.u1 = uint64(op) i.u2 = size } -func (i *instruction) asAtomicCas(rn, rs, rt operand, size uint64) { +func (i *instruction) asAtomicCas(rn, rs, rt regalloc.VReg, size uint64) { i.kind = atomicCas - i.rm, i.rn, i.rd = rt, rn, rs + i.rm, i.rn, i.rd = operandNR(rt), operandNR(rn), rs i.u2 = size } -func (i *instruction) asAtomicLoad(rn, rt operand, size uint64) { +func (i *instruction) asAtomicLoad(rn, rt regalloc.VReg, size uint64) { i.kind = atomicLoad - i.rn, i.rd = rn, rt + i.rn, i.rd = operandNR(rn), rt i.u2 = size } @@ -1755,12 +1771,12 @@ func (i *instruction) asLoadConstBlockArg(v uint64, typ ssa.Type, dst regalloc.V i.kind = loadConstBlockArg i.u1 = v i.u2 = uint64(typ) - i.rd = operandNR(dst) + i.rd = dst return i } func (i *instruction) loadConstBlockArgData() (v uint64, typ ssa.Type, dst regalloc.VReg) { - return i.u1, ssa.Type(i.u2), i.rd.nr() + return i.u1, ssa.Type(i.u2), i.rd } func (i *instruction) asEmitSourceOffsetInfo(l ssa.SourceOffset) *instruction { @@ -1778,7 +1794,7 @@ func (i *instruction) asUDF() *instruction { return i } -func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bool) { +func (i *instruction) asFpuToInt(rd regalloc.VReg, rn operand, rdSigned, src64bit, dst64bit bool) { i.kind = fpuToInt i.rn = rn i.rd = rd @@ -1789,11 +1805,11 @@ func (i *instruction) asFpuToInt(rd, rn operand, rdSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } -func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bool) { +func (i *instruction) asIntToFpu(rd regalloc.VReg, rn operand, rnSigned, src64bit, dst64bit bool) { i.kind = intToFpu i.rn = rn i.rd = rd @@ -1804,7 +1820,7 @@ func (i *instruction) asIntToFpu(rd, rn operand, rnSigned, src64bit, dst64bit bo i.u2 = 1 } if dst64bit { - i.u3 = 1 + i.u2 |= 2 } } @@ -1817,7 +1833,7 @@ func (i *instruction) asExitSequence(ctx regalloc.VReg) *instruction { // aluOp determines the type of ALU operation. Instructions whose kind is one of // aluRRR, aluRRRR, aluRRImm12, aluRRBitmaskImm, aluRRImmShift, aluRRRShift and aluRRRExtend // would use this type. -type aluOp int +type aluOp uint32 func (a aluOp) String() string { switch a { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go index 227a96474..f0ede2d6a 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/instr_encoding.go @@ -44,12 +44,12 @@ func (i *instruction) encode(m *machine) { case callInd: c.Emit4Bytes(encodeUnconditionalBranchReg(regNumberInEncoding[i.rn.realReg()], true)) case store8, store16, store32, store64, fpuStore32, fpuStore64, fpuStore128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rn.realReg()], *i.getAmode())) case uLoad8, uLoad16, uLoad32, uLoad64, sLoad8, sLoad16, sLoad32, fpuLoad32, fpuLoad64, fpuLoad128: - c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.realReg()], i.amode)) + c.Emit4Bytes(encodeLoadOrStore(i.kind, regNumberInEncoding[i.rd.RealReg()], *i.getAmode())) case vecLoad1R: c.Emit4Bytes(encodeVecLoad1R( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1))) case condBr: @@ -75,22 +75,22 @@ func (i *instruction) encode(m *machine) { panic("BUG") } case movN: - c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b00, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movZ: - c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b10, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case movK: - c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.realReg()], i.u1, i.u2, i.u3)) + c.Emit4Bytes(encodeMoveWideImmediate(0b11, regNumberInEncoding[i.rd.RealReg()], i.u1, uint32(i.u2), uint32(i.u2>>32))) case mov32: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() c.Emit4Bytes(encodeAsMov32(regNumberInEncoding[from], regNumberInEncoding[to])) case mov64: - to, from := i.rd.realReg(), i.rn.realReg() + to, from := i.rd.RealReg(), i.rn.realReg() toIsSp := to == sp fromIsSp := from == sp c.Emit4Bytes(encodeMov64(regNumberInEncoding[to], regNumberInEncoding[from], toIsSp, fromIsSp)) case loadP64, storeP64: rt, rt2 := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] - amode := i.amode + amode := i.getAmode() rn := regNumberInEncoding[amode.rn.RealReg()] var pre bool switch amode.kind { @@ -102,21 +102,21 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(encodePreOrPostIndexLoadStorePair64(pre, kind == loadP64, rn, rt, rt2, amode.imm)) case loadFpuConst32: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { encodeLoadFpuConst32(c, rd, i.u1) } case loadFpuConst64: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] if i.u1 == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement8B)) } else { - encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.realReg()], i.u1) + encodeLoadFpuConst64(c, regNumberInEncoding[i.rd.RealReg()], i.u1) } case loadFpuConst128: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] lo, hi := i.u1, i.u2 if lo == 0 && hi == 0 { c.Emit4Bytes(encodeVecRRR(vecOpEOR, rd, rd, rd, vecArrangement16B)) @@ -126,35 +126,35 @@ func (i *instruction) encode(m *machine) { case aluRRRR: c.Emit4Bytes(encodeAluRRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.ra.realReg()], - uint32(i.u3), + regNumberInEncoding[regalloc.VReg(i.u2).RealReg()], + uint32(i.u1>>32), )) case aluRRImmShift: c.Emit4Bytes(encodeAluRRImm( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), - uint32(i.u3), + uint32(i.u2>>32), )) case aluRRR: rn := i.rn.realReg() c.Emit4Bytes(encodeAluRRR( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[rn], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2>>32 == 1, rn == sp, )) case aluRRRExtend: rm, exo, to := i.rm.er() c.Emit4Bytes(encodeAluRRRExtend( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[rm.RealReg()], exo, @@ -164,25 +164,25 @@ func (i *instruction) encode(m *machine) { r, amt, sop := i.rm.sr() c.Emit4Bytes(encodeAluRRRShift( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[r.RealReg()], uint32(amt), sop, - i.u3 == 1, + i.u2>>32 == 1, )) case aluRRBitmaskImm: c.Emit4Bytes(encodeAluBitmaskImmediate( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], i.u2, - i.u3 == 1, + i.u1>>32 == 1, )) case bitRR: c.Emit4Bytes(encodeBitRR( bitOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2)), ) @@ -190,22 +190,22 @@ func (i *instruction) encode(m *machine) { imm12, shift := i.rm.imm12() c.Emit4Bytes(encodeAluRRImm12( aluOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], imm12, shift, - i.u3 == 1, + i.u2>>32 == 1, )) case fpuRRR: c.Emit4Bytes(encodeFpuRRR( fpuBinOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], - i.u3 == 1, + i.u2 == 1, )) case fpuMov64, fpuMov128: // https://developer.arm.com/documentation/ddi0596/2021-12/SIMD-FP-Instructions/MOV--vector---Move-vector--an-alias-of-ORR--vector--register-- - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var q uint32 if kind == fpuMov128 { @@ -213,7 +213,7 @@ func (i *instruction) encode(m *machine) { } c.Emit4Bytes(q<<30 | 0b1110101<<21 | rn<<16 | 0b000111<<10 | rn<<5 | rd) case cSet: - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] cf := condFlag(i.u1) if i.u2 == 1 { // https://developer.arm.com/documentation/ddi0602/2022-03/Base-Instructions/CSETM--Conditional-Set-Mask--an-alias-of-CSINV- @@ -225,12 +225,12 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0b1001101010011111<<16 | uint32(cf.invert())<<12 | 0b111111<<5 | rd) } case extend: - c.Emit4Bytes(encodeExtend(i.u3 == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.realReg()], regNumberInEncoding[i.rn.realReg()])) + c.Emit4Bytes(encodeExtend((i.u2>>32) == 1, byte(i.u1), byte(i.u2), regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()])) case fpuCmp: // https://developer.arm.com/documentation/ddi0596/2020-12/SIMD-FP-Instructions/FCMP--Floating-point-quiet-Compare--scalar--?lang=en rn, rm := regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()] var ftype uint32 - if i.u3 == 1 { + if i.u1 == 1 { ftype = 0b01 // double precision. } c.Emit4Bytes(0b1111<<25 | ftype<<22 | 1<<21 | rm<<16 | 0b1<<13 | rn<<5) @@ -242,34 +242,34 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(0) } case adr: - c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.realReg()], uint32(i.u1))) + c.Emit4Bytes(encodeAdr(regNumberInEncoding[i.rd.RealReg()], uint32(i.u1))) case cSel: c.Emit4Bytes(encodeConditionalSelect( kind, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case fpuCSel: c.Emit4Bytes(encodeFpuCSel( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], condFlag(i.u1), - i.u3 == 1, + i.u2 == 1, )) case movToVec: c.Emit4Bytes(encodeMoveToVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), )) case movFromVec, movFromVecSigned: c.Emit4Bytes(encodeMoveFromVec( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2), @@ -277,18 +277,18 @@ func (i *instruction) encode(m *machine) { )) case vecDup: c.Emit4Bytes(encodeVecDup( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)))) case vecDupElement: c.Emit4Bytes(encodeVecDupElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(byte(i.u1)), vecIndex(i.u2))) case vecExtract: c.Emit4Bytes(encodeVecExtract( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u1)), @@ -296,35 +296,35 @@ func (i *instruction) encode(m *machine) { case vecPermute: c.Emit4Bytes(encodeVecPermute( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(byte(i.u2)))) case vecMovElement: c.Emit4Bytes(encodeVecMovElement( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u1), - uint32(i.u2), uint32(i.u3), + uint32(i.u2), uint32(i.u2>>32), )) case vecMisc: c.Emit4Bytes(encodeAdvancedSIMDTwoMisc( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecLanes: c.Emit4Bytes(encodeVecLanes( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], vecArrangement(i.u2), )) case vecShiftImm: c.Emit4Bytes(encodeVecShiftImm( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.rm.shiftImm()), vecArrangement(i.u2), @@ -332,7 +332,7 @@ func (i *instruction) encode(m *machine) { case vecTbl: c.Emit4Bytes(encodeVecTbl( 1, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -340,7 +340,7 @@ func (i *instruction) encode(m *machine) { case vecTbl2: c.Emit4Bytes(encodeVecTbl( 2, - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2)), @@ -353,9 +353,9 @@ func (i *instruction) encode(m *machine) { case fpuRR: c.Emit4Bytes(encodeFloatDataOneSource( fpuUniOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], - i.u3 == 1, + i.u2 == 1, )) case vecRRR: if op := vecOp(i.u1); op == vecOpBsl || op == vecOpBit || op == vecOpUmlal { @@ -365,14 +365,14 @@ func (i *instruction) encode(m *machine) { case vecRRRRewrite: c.Emit4Bytes(encodeVecRRR( vecOp(i.u1), - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], regNumberInEncoding[i.rm.realReg()], vecArrangement(i.u2), )) case cCmpImm: // Conditional compare (immediate) in https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Register?lang=en - sf := uint32(i.u3 & 0b1) + sf := uint32((i.u2 >> 32) & 0b1) nzcv := uint32(i.u2 & 0b1111) cond := uint32(condFlag(i.u1)) imm := uint32(i.rm.data & 0b11111) @@ -381,7 +381,7 @@ func (i *instruction) encode(m *machine) { sf<<31 | 0b111101001<<22 | imm<<16 | cond<<12 | 0b1<<11 | rn<<5 | nzcv, ) case movFromFPSR: - rt := regNumberInEncoding[i.rd.realReg()] + rt := regNumberInEncoding[i.rd.RealReg()] c.Emit4Bytes(encodeSystemRegisterMove(rt, true)) case movToFPSR: rt := regNumberInEncoding[i.rn.realReg()] @@ -390,13 +390,13 @@ func (i *instruction) encode(m *machine) { c.Emit4Bytes(encodeAtomicRmw( atomicRmwOp(i.u1), regNumberInEncoding[i.rm.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), )) case atomicCas: c.Emit4Bytes(encodeAtomicCas( - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], regNumberInEncoding[i.rm.realReg()], regNumberInEncoding[i.rn.realReg()], uint32(i.u2), @@ -404,7 +404,7 @@ func (i *instruction) encode(m *machine) { case atomicLoad: c.Emit4Bytes(encodeAtomicLoadStore( regNumberInEncoding[i.rn.realReg()], - regNumberInEncoding[i.rd.realReg()], + regNumberInEncoding[i.rd.RealReg()], uint32(i.u2), 1, )) @@ -810,7 +810,7 @@ func encodeFloatDataOneSource(op fpuUniOp, rd, rn uint32, dst64bit bool) uint32 // encodeCnvBetweenFloatInt encodes as "Conversion between floating-point and integer" in // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Scalar-Floating-Point-and-Advanced-SIMD?lang=en func encodeCnvBetweenFloatInt(i *instruction) uint32 { - rd := regNumberInEncoding[i.rd.realReg()] + rd := regNumberInEncoding[i.rd.RealReg()] rn := regNumberInEncoding[i.rn.realReg()] var opcode uint32 @@ -822,8 +822,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b00 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b010 } else { @@ -841,8 +841,8 @@ func encodeCnvBetweenFloatInt(i *instruction) uint32 { rmode = 0b11 signed := i.u1 == 1 - src64bit := i.u2 == 1 - dst64bit := i.u3 == 1 + src64bit := i.u2&1 != 0 + dst64bit := i.u2&2 != 0 if signed { opcode = 0b000 @@ -1787,13 +1787,13 @@ func encodeCBZCBNZ(rt uint32, nz bool, imm19 uint32, _64bit bool) (ret uint32) { // https://developer.arm.com/documentation/ddi0596/2020-12/Index-by-Encoding/Data-Processing----Immediate?lang=en // // "shift" must have been divided by 16 at this point. -func encodeMoveWideImmediate(opc uint32, rd uint32, imm, shift, _64bit uint64) (ret uint32) { +func encodeMoveWideImmediate(opc uint32, rd uint32, imm uint64, shift, _64bit uint32) (ret uint32) { ret = rd ret |= uint32(imm&0xffff) << 5 - ret |= (uint32(shift)) << 21 + ret |= (shift) << 21 ret |= 0b100101 << 23 ret |= opc << 29 - ret |= uint32(_64bit) << 31 + ret |= _64bit << 31 return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go index 698b382d4..6c6824fb0 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_constant.go @@ -284,18 +284,18 @@ func (m *machine) load64bitConst(c int64, dst regalloc.VReg) { func (m *machine) insertMOVZ(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVZ(dst, v, uint64(shift), dst64) + instr.asMOVZ(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVK(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVK(dst, v, uint64(shift), dst64) + instr.asMOVK(dst, v, uint32(shift), dst64) m.insert(instr) } func (m *machine) insertMOVN(dst regalloc.VReg, v uint64, shift int, dst64 bool) { instr := m.allocateInstr() - instr.asMOVN(dst, v, uint64(shift), dst64) + instr.asMOVN(dst, v, uint32(shift), dst64) m.insert(instr) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go index 2bb234e8c..048bf3204 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_instr.go @@ -52,11 +52,11 @@ func (m *machine) lowerBrTable(i *ssa.Instruction) { maxIndexReg := m.compiler.AllocateVReg(ssa.TypeI32) m.lowerConstantI32(maxIndexReg, int32(len(targets)-1)) subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), indexOperand, operandNR(maxIndexReg), false) + subs.asALU(aluOpSubS, xzrVReg, indexOperand, operandNR(maxIndexReg), false) m.insert(subs) csel := m.allocateInstr() adjustedIndex := m.compiler.AllocateVReg(ssa.TypeI32) - csel.asCSel(operandNR(adjustedIndex), operandNR(maxIndexReg), indexOperand, hs, false) + csel.asCSel(adjustedIndex, operandNR(maxIndexReg), indexOperand, hs, false) m.insert(csel) brSequence := m.allocateInstr() @@ -249,7 +249,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rc := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerSelectVec(rc, rn, rm, rd) } else { m.lowerSelect(c, x, y, instr.Return()) @@ -270,7 +270,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, true, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToSintSat) @@ -278,7 +278,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, ctx := instr.Arg2() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) ctxVReg := m.compiler.VRegOf(ctx) m.lowerFpuToInt(rd, rn, ctxVReg, false, x.Type() == ssa.TypeF64, result.Type().Bits() == 64, op == ssa.OpcodeFcvtToUintSat) @@ -286,25 +286,25 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, true, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFcvtFromUint: x := instr.Arg() result := instr.Return() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) m.lowerIntToFpu(rd, rn, false, x.Type() == ssa.TypeI64, result.Type().Bits() == 64) case ssa.OpcodeFdemote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt64To32, rd, rn, false) m.insert(cnt) case ssa.OpcodeFpromote: v := instr.Arg() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) cnt := m.allocateInstr() cnt.asFpuRR(fpuUniOpCvt32To64, rd, rn, true) m.insert(cnt) @@ -343,15 +343,15 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerIDiv(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSdiv) case ssa.OpcodeSrem, ssa.OpcodeUrem: x, y, ctx := instr.Arg3() ctxVReg := m.compiler.VRegOf(ctx) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.lowerIRem(ctxVReg, rd, rn, rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) + rd := m.compiler.VRegOf(instr.Return()) + m.lowerIRem(ctxVReg, rd, rn.nr(), rm, x.Type() == ssa.TypeI64, op == ssa.OpcodeSrem) case ssa.OpcodeVconst: result := m.compiler.VRegOf(instr.Return()) lo, hi := instr.VconstData() @@ -362,7 +362,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x := instr.Arg() ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpNot, rd, rn, vecArrangement16B) m.insert(ins) case ssa.OpcodeVbxor: @@ -382,12 +382,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) creg := m.getOperand_NR(m.compiler.ValueDefinition(c), extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) // creg is overwritten by BSL, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() - mov.asFpuMov128(tmp.nr(), creg.nr()) + mov.asFpuMov128(tmp, creg.nr()) m.insert(mov) ins := m.allocateInstr() @@ -396,7 +396,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { mov2 := m.allocateInstr() rd := m.compiler.VRegOf(instr.Return()) - mov2.asFpuMov128(rd, tmp.nr()) + mov2.asFpuMov128(rd, tmp) m.insert(mov2) case ssa.OpcodeVanyTrue, ssa.OpcodeVallTrue: x, lane := instr.ArgWithLane() @@ -405,12 +405,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr = ssaLaneToArrangement(lane) } rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVcheckTrue(op, rm, rd, arr) case ssa.OpcodeVhighBits: x, lane := instr.ArgWithLane() rm := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) m.lowerVhighBits(rm, rd, arr) case ssa.OpcodeVIadd: @@ -441,9 +441,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { panic("unsupported lane " + lane.String()) } - widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo, vv, operandShiftImm(0), loArr) - widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi, vv, operandShiftImm(0), hiArr) - addp := m.allocateInstr().asVecRRR(vecOpAddp, operandNR(m.compiler.VRegOf(instr.Return())), tmpLo, tmpHi, dstArr) + widenLo := m.allocateInstr().asVecShiftImm(widen, tmpLo.nr(), vv, operandShiftImm(0), loArr) + widenHi := m.allocateInstr().asVecShiftImm(widen, tmpHi.nr(), vv, operandShiftImm(0), hiArr) + addp := m.allocateInstr().asVecRRR(vecOpAddp, m.compiler.VRegOf(instr.Return()), tmpLo, tmpHi, dstArr) m.insert(widenLo) m.insert(widenHi) m.insert(addp) @@ -493,7 +493,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVIMul(rd, rn, rm, arr) case ssa.OpcodeVIabs: m.lowerVecMisc(vecOpAbs, instr) @@ -507,7 +507,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVShift(op, rd, rn, rm, arr) case ssa.OpcodeVSqrt: m.lowerVecMisc(vecOpFsqrt, instr) @@ -547,18 +547,18 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuToInt(rd, rn, arr, op == ssa.OpcodeVFcvtToSintSat) case ssa.OpcodeVFcvtFromSint, ssa.OpcodeVFcvtFromUint: x, lane := instr.ArgWithLane() arr := ssaLaneToArrangement(lane) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerVfpuFromInt(rd, rn, arr, op == ssa.OpcodeVFcvtFromSint) case ssa.OpcodeSwidenLow, ssa.OpcodeUwidenLow: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) var arr vecArrangement switch lane { @@ -580,7 +580,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { case ssa.OpcodeSwidenHigh, ssa.OpcodeUwidenHigh: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -607,9 +607,9 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) loQxtn := m.allocateInstr() hiQxtn := m.allocateInstr() @@ -628,7 +628,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.insert(hiQxtn) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmp.nr()) + mov.asFpuMov128(rd, tmp) m.insert(mov) case ssa.OpcodeFvpromoteLow: x, lane := instr.ArgWithLane() @@ -637,7 +637,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtl, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeFvdemote: @@ -647,14 +647,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { } ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(vecOpFcvtn, rd, rn, vecArrangement2S) m.insert(ins) case ssa.OpcodeExtractlane: x, index, signed, lane := instr.ExtractlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) mov := m.allocateInstr() switch lane { @@ -680,12 +680,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, index, lane := instr.InsertlaneData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + rd := m.compiler.VRegOf(instr.Return()) + tmpReg := m.compiler.AllocateVReg(ssa.TypeV128) // Initially mov rn to tmp. mov1 := m.allocateInstr() - mov1.asFpuMov128(tmpReg.nr(), rn.nr()) + mov1.asFpuMov128(tmpReg, rn.nr()) m.insert(mov1) // movToVec and vecMovElement do not clear the remaining bits to zero, @@ -709,14 +709,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { // Finally mov tmp to rd. mov3 := m.allocateInstr() - mov3.asFpuMov128(rd.nr(), tmpReg.nr()) + mov3.asFpuMov128(rd, tmpReg) m.insert(mov3) case ssa.OpcodeSwizzle: x, y, lane := instr.Arg2WithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) arr := ssaLaneToArrangement(lane) @@ -729,14 +729,14 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { x, y, lane1, lane2 := instr.ShuffleData() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) m.lowerShuffle(rd, rn, rm, lane1, lane2) case ssa.OpcodeSplat: x, lane := instr.ArgWithLane() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) dup := m.allocateInstr() switch lane { @@ -760,12 +760,12 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { xx, yy := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone), m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) tmp, tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)), operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2, xx, yy, vecArrangement8H)) - m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp, tmp, tmp2, vecArrangement4S)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull, tmp.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpSmull2, tmp2.nr(), xx, yy, vecArrangement8H)) + m.insert(m.allocateInstr().asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp2, vecArrangement4S)) - rd := operandNR(m.compiler.VRegOf(instr.Return())) - m.insert(m.allocateInstr().asFpuMov128(rd.nr(), tmp.nr())) + rd := m.compiler.VRegOf(instr.Return()) + m.insert(m.allocateInstr().asFpuMov128(rd, tmp.nr())) case ssa.OpcodeLoadSplat: ptr, offset, lane := instr.LoadSplatData() @@ -794,7 +794,7 @@ func (m *machine) LowerInstr(instr *ssa.Instruction) { m.executableContext.FlushPendingInstructions() } -func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { +func (m *machine) lowerShuffle(rd regalloc.VReg, rn, rm operand, lane1, lane2 uint64) { // `tbl2` requires 2 consecutive registers, so we arbitrarily pick v29, v30. vReg, wReg := v29VReg, v30VReg @@ -822,7 +822,7 @@ func (m *machine) lowerShuffle(rd, rn, rm operand, lane1, lane2 uint64) { m.insert(tbl2) } -func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVShift(op ssa.Opcode, rd regalloc.VReg, rn, rm operand, arr vecArrangement) { var modulo byte switch arr { case vecArrangement16B: @@ -847,13 +847,13 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem if op != ssa.OpcodeVIshl { // Negate the amount to make this as right shift. neg := m.allocateInstr() - neg.asALU(aluOpSub, rtmp, operandNR(xzrVReg), rtmp, true) + neg.asALU(aluOpSub, rtmp.nr(), operandNR(xzrVReg), rtmp, true) m.insert(neg) } // Copy the shift amount into a vector register as sshl/ushl requires it to be there. dup := m.allocateInstr() - dup.asVecDup(vtmp, rtmp, arr) + dup.asVecDup(vtmp.nr(), rtmp, arr) m.insert(dup) if op == ssa.OpcodeVIshl || op == ssa.OpcodeVSshr { @@ -867,7 +867,7 @@ func (m *machine) lowerVShift(op ssa.Opcode, rd, rn, rm operand, arr vecArrangem } } -func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm operand, rd regalloc.VReg, arr vecArrangement) { tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) // Special case VallTrue for i64x2. @@ -878,11 +878,11 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem // cset dst, eq ins := m.allocateInstr() - ins.asVecMisc(vecOpCmeq0, tmp, rm, vecArrangement2D) + ins.asVecMisc(vecOpCmeq0, tmp.nr(), rm, vecArrangement2D) m.insert(ins) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp, tmp, tmp, vecArrangement2D) + addp.asVecRRR(vecOpAddp, tmp.nr(), tmp, tmp, vecArrangement2D) m.insert(addp) fcmp := m.allocateInstr() @@ -890,7 +890,7 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(fcmp) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, eq) + cset.asCSet(rd, false, eq) m.insert(cset) return @@ -900,10 +900,10 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem ins := m.allocateInstr() if op == ssa.OpcodeVanyTrue { // umaxp v4?.16b, v2?.16b, v2?.16b - ins.asVecRRR(vecOpUmaxp, tmp, rm, rm, vecArrangement16B) + ins.asVecRRR(vecOpUmaxp, tmp.nr(), rm, rm, vecArrangement16B) } else { // uminv d4?, v2?.4s - ins.asVecLanes(vecOpUminv, tmp, rm, arr) + ins.asVecLanes(vecOpUminv, tmp.nr(), rm, arr) } m.insert(ins) @@ -917,15 +917,15 @@ func (m *machine) lowerVcheckTrue(op ssa.Opcode, rm, rd operand, arr vecArrangem m.insert(movv) fc := m.allocateInstr() - fc.asCCmpImm(rd, uint64(0), al, 0, true) + fc.asCCmpImm(operandNR(rd), uint64(0), al, 0, true) m.insert(fc) cset := m.allocateInstr() - cset.asCSet(rd.nr(), false, ne) + cset.asCSet(rd, false, ne) m.insert(cset) } -func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { +func (m *machine) lowerVhighBits(rm operand, rd regalloc.VReg, arr vecArrangement) { r0 := operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) v0 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) v1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) @@ -947,7 +947,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v1[i] = 0xff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(7), vecArrangement16B) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(7), vecArrangement16B) m.insert(sshr) // Load the bit mask into r0. @@ -958,7 +958,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) // Lane-wise logical AND with the bit mask, meaning that we have @@ -967,23 +967,23 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Below, we use the following notation: // wi := (1 << i) if vi<0, 0 otherwise. and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v1, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v1.nr(), v1, v0, vecArrangement16B) m.insert(and) // Swap the lower and higher 8 byte elements, and write it into v0, meaning that we have // v0[i] = w(i+8) if i < 8, w(i-8) otherwise. ext := m.allocateInstr() - ext.asVecExtract(v0, v1, v1, vecArrangement16B, uint32(8)) + ext.asVecExtract(v0.nr(), v1, v1, vecArrangement16B, uint32(8)) m.insert(ext) // v = [w0, w8, ..., w7, w15] zip1 := m.allocateInstr() - zip1.asVecPermute(vecOpZip1, v0, v1, v0, vecArrangement16B) + zip1.asVecPermute(vecOpZip1, v0.nr(), v1, v0, vecArrangement16B) m.insert(zip1) // v.h[0] = w0 + ... + w15 addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) // Extract the v.h[0] as the result. @@ -1006,7 +1006,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(15), vecArrangement8H) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(15), vecArrangement8H) m.insert(sshr) // Load the bit mask into r0. @@ -1014,26 +1014,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(4), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(4), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i=0..3 // = (1 << (i+4)) if vi<0, 0 otherwise for i=3..7 and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement8H) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement8H) m.insert(addv) movfv := m.allocateInstr() @@ -1055,7 +1055,7 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Right arithmetic shift on the original vector and store the result into v1. So we have: // v[i] = 0xffffffff if vi<0, 0 otherwise. sshr := m.allocateInstr() - sshr.asVecShiftImm(vecOpSshr, v1, rm, operandShiftImm(31), vecArrangement4S) + sshr.asVecShiftImm(vecOpSshr, v1.nr(), rm, operandShiftImm(31), vecArrangement4S) m.insert(sshr) // Load the bit mask into r0. @@ -1063,26 +1063,26 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // dup r0 to vector v0. dup := m.allocateInstr() - dup.asVecDup(v0, r0, vecArrangement2D) + dup.asVecDup(v0.nr(), r0, vecArrangement2D) m.insert(dup) lsl := m.allocateInstr() - lsl.asALUShift(aluOpLsl, r0, r0, operandShiftImm(2), true) + lsl.asALUShift(aluOpLsl, r0.nr(), r0, operandShiftImm(2), true) m.insert(lsl) movv := m.allocateInstr() - movv.asMovToVec(v0, r0, vecArrangementD, vecIndex(1)) + movv.asMovToVec(v0.nr(), r0, vecArrangementD, vecIndex(1)) m.insert(movv) // Lane-wise logical AND with the bitmask, meaning that we have // v[i] = (1 << i) if vi<0, 0 otherwise for i in [0, 1] // = (1 << (i+4)) if vi<0, 0 otherwise for i in [2, 3] and := m.allocateInstr() - and.asVecRRR(vecOpAnd, v0, v1, v0, vecArrangement16B) + and.asVecRRR(vecOpAnd, v0.nr(), v1, v0, vecArrangement16B) m.insert(and) addv := m.allocateInstr() - addv.asVecLanes(vecOpAddv, v0, v0, vecArrangement4S) + addv.asVecLanes(vecOpAddv, v0.nr(), v0, vecArrangement4S) m.insert(addv) movfv := m.allocateInstr() @@ -1102,21 +1102,21 @@ func (m *machine) lowerVhighBits(rm, rd operand, arr vecArrangement) { // Move the higher 64-bit int into r0. movv1 := m.allocateInstr() - movv1.asMovFromVec(r0, rm, vecArrangementD, vecIndex(1), false) + movv1.asMovFromVec(r0.nr(), rm, vecArrangementD, vecIndex(1), false) m.insert(movv1) // Move the sign bit into the least significant bit. lsr1 := m.allocateInstr() - lsr1.asALUShift(aluOpLsr, r0, r0, operandShiftImm(63), true) + lsr1.asALUShift(aluOpLsr, r0.nr(), r0, operandShiftImm(63), true) m.insert(lsr1) lsr2 := m.allocateInstr() - lsr2.asALUShift(aluOpLsr, rd, rd, operandShiftImm(63), true) + lsr2.asALUShift(aluOpLsr, rd, operandNR(rd), operandShiftImm(63), true) m.insert(lsr2) // rd = (r0<<1) | rd lsl := m.allocateInstr() - lsl.asALU(aluOpAdd, rd, rd, operandSR(r0.nr(), 1, shiftOpLSL), false) + lsl.asALU(aluOpAdd, rd, operandNR(rd), operandSR(r0.nr(), 1, shiftOpLSL), false) m.insert(lsl) default: panic("Unsupported " + arr.String()) @@ -1128,7 +1128,7 @@ func (m *machine) lowerVecMisc(op vecOp, instr *ssa.Instruction) { arr := ssaLaneToArrangement(lane) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) ins.asVecMisc(op, rd, rn, arr) m.insert(ins) } @@ -1137,22 +1137,22 @@ func (m *machine) lowerVecRRR(op vecOp, x, y, ret ssa.Value, arr vecArrangement) ins := m.allocateInstr() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ins.asVecRRR(op, rd, rn, rm, arr) m.insert(ins) } -func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { +func (m *machine) lowerVIMul(rd regalloc.VReg, rn, rm operand, arr vecArrangement) { if arr != vecArrangement2D { mul := m.allocateInstr() mul.asVecRRR(vecOpMul, rd, rn, rm, arr) m.insert(mul) } else { - tmp1 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) - tmp3 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp1 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) + tmp3 := m.compiler.AllocateVReg(ssa.TypeV128) - tmpRes := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmpRes := m.compiler.AllocateVReg(ssa.TypeV128) // Following the algorithm in https://chromium-review.googlesource.com/c/v8/v8/+/1781696 rev64 := m.allocateInstr() @@ -1160,7 +1160,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(rev64) mul := m.allocateInstr() - mul.asVecRRR(vecOpMul, tmp2, tmp2, rn, vecArrangement4S) + mul.asVecRRR(vecOpMul, tmp2, operandNR(tmp2), rn, vecArrangement4S) m.insert(mul) xtn1 := m.allocateInstr() @@ -1168,7 +1168,7 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { m.insert(xtn1) addp := m.allocateInstr() - addp.asVecRRR(vecOpAddp, tmp2, tmp2, tmp2, vecArrangement4S) + addp.asVecRRR(vecOpAddp, tmp2, operandNR(tmp2), operandNR(tmp2), vecArrangement4S) m.insert(addp) xtn2 := m.allocateInstr() @@ -1179,15 +1179,15 @@ func (m *machine) lowerVIMul(rd, rn, rm operand, arr vecArrangement) { // In short, in UMLAL instruction, the result register is also one of the source register, and // the value on the result register is significant. shll := m.allocateInstr() - shll.asVecMisc(vecOpShll, tmpRes, tmp2, vecArrangement2S) + shll.asVecMisc(vecOpShll, tmpRes, operandNR(tmp2), vecArrangement2S) m.insert(shll) umlal := m.allocateInstr() - umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, tmp3, tmp1, vecArrangement2S) + umlal.asVecRRRRewrite(vecOpUmlal, tmpRes, operandNR(tmp3), operandNR(tmp1), vecArrangement2S) m.insert(umlal) mov := m.allocateInstr() - mov.asFpuMov128(rd.nr(), tmpRes.nr()) + mov.asFpuMov128(rd, tmpRes) m.insert(mov) } } @@ -1203,7 +1203,7 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { // BSL modifies the destination register, so we need to use a temporary register so that // the actual definition of the destination register happens *after* the BSL instruction. // That way, we can force the spill instruction to be inserted after the BSL instruction. - tmp := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp := m.compiler.AllocateVReg(ssa.TypeV128) fcmgt := m.allocateInstr() if max { @@ -1220,17 +1220,17 @@ func (m *machine) lowerVMinMaxPseudo(instr *ssa.Instruction, max bool) { res := operandNR(m.compiler.VRegOf(instr.Return())) mov2 := m.allocateInstr() - mov2.asFpuMov128(res.nr(), tmp.nr()) + mov2.asFpuMov128(res.nr(), tmp) m.insert(mov2) } -func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn regalloc.VReg, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { - div.asALU(aluOpSDiv, rd, rn, rm, _64bit) + div.asALU(aluOpSDiv, rd, operandNR(rn), rm, _64bit) } else { - div.asALU(aluOpUDiv, rd, rn, rm, _64bit) + div.asALU(aluOpUDiv, rd, operandNR(rn), rm, _64bit) } m.insert(div) @@ -1239,11 +1239,11 @@ func (m *machine) lowerIRem(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // rd = rn-rd*rm by MSUB instruction. msub := m.allocateInstr() - msub.asALURRRR(aluOpMSub, rd, rd, rm, rn, _64bit) + msub.asALURRRR(aluOpMSub, rd, operandNR(rd), rm, rn, _64bit) m.insert(msub) } -func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bit, signed bool) { +func (m *machine) lowerIDiv(execCtxVReg, rd regalloc.VReg, rn, rm operand, _64bit, signed bool) { div := m.allocateInstr() if signed { @@ -1260,7 +1260,7 @@ func (m *machine) lowerIDiv(execCtxVReg regalloc.VReg, rd, rn, rm operand, _64bi // We need to check the signed overflow which happens iff "math.MinInt{32,64} / -1" minusOneCheck := m.allocateInstr() // Sets eq condition if rm == -1. - minusOneCheck.asALU(aluOpAddS, operandNR(xzrVReg), rm, operandImm12(1, 0), _64bit) + minusOneCheck.asALU(aluOpAddS, xzrVReg, rm, operandImm12(1, 0), _64bit) m.insert(minusOneCheck) ccmp := m.allocateInstr() @@ -1290,20 +1290,20 @@ func (m *machine) exitIfNot(execCtxVReg regalloc.VReg, c cond, cond64bit bool, c func (m *machine) lowerFcopysign(x, y, ret ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmpI, tmpF operand + var tmpI, tmpF regalloc.VReg _64 := x.Type() == ssa.TypeF64 if _64 { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF64) + tmpI = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmpF = operandNR(m.compiler.AllocateVReg(ssa.TypeF32)) - tmpI = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmpF = m.compiler.AllocateVReg(ssa.TypeF32) + tmpI = m.compiler.AllocateVReg(ssa.TypeI32) } rd := m.compiler.VRegOf(ret) - m.lowerFcopysignImpl(operandNR(rd), rn, rm, tmpI, tmpF, _64) + m.lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF, _64) } -func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool) { +func (m *machine) lowerFcopysignImpl(rd regalloc.VReg, rn, rm operand, tmpI, tmpF regalloc.VReg, _64bit bool) { // This is exactly the same code emitted by GCC for "__builtin_copysign": // // mov x0, -9223372036854775808 @@ -1313,26 +1313,26 @@ func (m *machine) lowerFcopysignImpl(rd, rn, rm, tmpI, tmpF operand, _64bit bool setMSB := m.allocateInstr() if _64bit { - m.lowerConstantI64(tmpI.nr(), math.MinInt64) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementD, vecIndex(0)) + m.lowerConstantI64(tmpI, math.MinInt64) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementD, vecIndex(0)) } else { - m.lowerConstantI32(tmpI.nr(), math.MinInt32) - setMSB.asMovToVec(tmpF, tmpI, vecArrangementS, vecIndex(0)) + m.lowerConstantI32(tmpI, math.MinInt32) + setMSB.asMovToVec(tmpF, operandNR(tmpI), vecArrangementS, vecIndex(0)) } m.insert(setMSB) - tmpReg := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) + tmpReg := m.compiler.AllocateVReg(ssa.TypeF64) mov := m.allocateInstr() - mov.asFpuMov64(tmpReg.nr(), rn.nr()) + mov.asFpuMov64(tmpReg, rn.nr()) m.insert(mov) vbit := m.allocateInstr() - vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, tmpF, vecArrangement8B) + vbit.asVecRRRRewrite(vecOpBit, tmpReg, rm, operandNR(tmpF), vecArrangement8B) m.insert(vbit) movDst := m.allocateInstr() - movDst.asFpuMov64(rd.nr(), tmpReg.nr()) + movDst.asFpuMov64(rd, tmpReg) m.insert(movDst) } @@ -1340,7 +1340,7 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { v, dstType := instr.BitcastData() srcType := v.Type() rn := m.getOperand_NR(m.compiler.ValueDefinition(v), extModeNone) - rd := operandNR(m.compiler.VRegOf(instr.Return())) + rd := m.compiler.VRegOf(instr.Return()) srcInt := srcType.IsInt() dstInt := dstType.IsInt() switch { @@ -1371,14 +1371,14 @@ func (m *machine) lowerBitcast(instr *ssa.Instruction) { func (m *machine) lowerFpuUniOp(op fpuUniOp, in, out ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(in), extModeNone) - rd := operandNR(m.compiler.VRegOf(out)) + rd := m.compiler.VRegOf(out) neg := m.allocateInstr() neg.asFpuRR(op, rd, rn, in.Type().Bits() == 64) m.insert(neg) } -func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { +func (m *machine) lowerFpuToInt(rd regalloc.VReg, rn operand, ctx regalloc.VReg, signed, src64bit, dst64bit, nonTrapping bool) { if !nonTrapping { // First of all, we have to clear the FPU flags. flagClear := m.allocateInstr() @@ -1405,7 +1405,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 // Check if the conversion was undefined by comparing the status with 1. // See https://developer.arm.com/documentation/ddi0595/2020-12/AArch64-Registers/FPSR--Floating-point-Status-Register alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpReg), operandImm12(1, 0), true) + alu.asALU(aluOpSubS, xzrVReg, operandNR(tmpReg), operandImm12(1, 0), true) m.insert(alu) // If it is not undefined, we can return the result. @@ -1429,7 +1429,7 @@ func (m *machine) lowerFpuToInt(rd, rn operand, ctx regalloc.VReg, signed, src64 } } -func (m *machine) lowerIntToFpu(rd, rn operand, signed, src64bit, dst64bit bool) { +func (m *machine) lowerIntToFpu(rd regalloc.VReg, rn operand, signed, src64bit, dst64bit bool) { cvt := m.allocateInstr() cvt.asIntToFpu(rd, rn, signed, src64bit, dst64bit) m.insert(cvt) @@ -1456,7 +1456,7 @@ func (m *machine) lowerFpuBinOp(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) instr.asFpuRRR(op, rd, rn, rm, x.Type().Bits() == 64) m.insert(instr) } @@ -1482,7 +1482,7 @@ func (m *machine) lowerSubOrAdd(si *ssa.Instruction, add bool) { case !add && yNegated: // rn+rm = x-(-y) = x-y aop = aluOpAdd } - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aop, rd, rn, rm, x.Type().Bits() == 64) m.insert(alu) @@ -1527,7 +1527,7 @@ func (m *machine) lowerIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_Imm12_ER_SR_NR(m.compiler.ValueDefinition(y), ext) alu := m.allocateInstr() - alu.asALU(aluOpSubS, operandNR(xzrVReg), rn, rm, in64bit) + alu.asALU(aluOpSubS, xzrVReg, rn, rm, in64bit) m.insert(alu) cset := m.allocateInstr() @@ -1542,7 +1542,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1554,7 +1554,7 @@ func (m *machine) lowerVIcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpCmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1598,7 +1598,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) switch flag { case eq: @@ -1610,7 +1610,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { cmp.asVecRRR(vecOpFcmeq, rd, rn, rm, arr) m.insert(cmp) not := m.allocateInstr() - not.asVecMisc(vecOpNot, rd, rd, vecArrangement16B) + not.asVecMisc(vecOpNot, rd, operandNR(rd), vecArrangement16B) m.insert(not) case ge: cmp := m.allocateInstr() @@ -1631,7 +1631,7 @@ func (m *machine) lowerVFcmp(si *ssa.Instruction) { } } -func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuToInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpFcvtzs, rd, rn, arr) @@ -1643,15 +1643,15 @@ func (m *machine) lowerVfpuToInt(rd, rn operand, arr vecArrangement, signed bool if arr == vecArrangement2D { narrow := m.allocateInstr() if signed { - narrow.asVecMisc(vecOpSqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpSqxtn, rd, operandNR(rd), vecArrangement2S) } else { - narrow.asVecMisc(vecOpUqxtn, rd, rd, vecArrangement2S) + narrow.asVecMisc(vecOpUqxtn, rd, operandNR(rd), vecArrangement2S) } m.insert(narrow) } } -func (m *machine) lowerVfpuFromInt(rd, rn operand, arr vecArrangement, signed bool) { +func (m *machine) lowerVfpuFromInt(rd regalloc.VReg, rn operand, arr vecArrangement, signed bool) { cvt := m.allocateInstr() if signed { cvt.asVecMisc(vecOpScvtf, rd, rn, arr) @@ -1665,7 +1665,7 @@ func (m *machine) lowerShifts(si *ssa.Instruction, ext extMode, aluOp aluOp) { x, amount := si.Arg2() rn := m.getOperand_NR(m.compiler.ValueDefinition(x), ext) rm := m.getOperand_ShiftImm_NR(m.compiler.ValueDefinition(amount), ext, x.Type().Bits()) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALUShift(aluOp, rd, rn, rm, x.Type().Bits() == 64) @@ -1678,11 +1678,11 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) - var rd operand + var rd regalloc.VReg if ignoreResult { - rd = operandNR(xzrVReg) + rd = xzrVReg } else { - rd = operandNR(m.compiler.VRegOf(si.Return())) + rd = m.compiler.VRegOf(si.Return()) } _64 := x.Type().Bits() == 64 @@ -1691,7 +1691,7 @@ func (m *machine) lowerBitwiseAluOp(si *ssa.Instruction, op aluOp, ignoreResult c := instr.ConstantVal() if isBitMaskImmediate(c, _64) { // Constant bit wise operations can be lowered to a single instruction. - alu.asALUBitmaskImm(op, rd.nr(), rn.nr(), c, _64) + alu.asALUBitmaskImm(op, rd, rn.nr(), c, _64) m.insert(alu) return } @@ -1709,25 +1709,25 @@ func (m *machine) lowerRotl(si *ssa.Instruction) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - rd := operandNR(m.compiler.VRegOf(r)) + rd := m.compiler.VRegOf(r) // Encode rotl as neg + rotr: neg is a sub against the zero-reg. m.lowerRotlImpl(rd, rn, rm, tmp, _64) } -func (m *machine) lowerRotlImpl(rd, rn, rm, tmp operand, is64bit bool) { +func (m *machine) lowerRotlImpl(rd regalloc.VReg, rn, rm operand, tmp regalloc.VReg, is64bit bool) { // Encode rotl as neg + rotr: neg is a sub against the zero-reg. neg := m.allocateInstr() neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rm, is64bit) m.insert(neg) alu := m.allocateInstr() - alu.asALU(aluOpRotR, rd, rn, tmp, is64bit) + alu.asALU(aluOpRotR, rd, rn, operandNR(tmp), is64bit) m.insert(alu) } @@ -1737,7 +1737,7 @@ func (m *machine) lowerRotr(si *ssa.Instruction) { xDef, yDef := m.compiler.ValueDefinition(x), m.compiler.ValueDefinition(y) rn := m.getOperand_NR(xDef, extModeNone) rm := m.getOperand_NR(yDef, extModeNone) - rd := operandNR(m.compiler.VRegOf(si.Return())) + rd := m.compiler.VRegOf(si.Return()) alu := m.allocateInstr() alu.asALU(aluOpRotR, rd, rn, rm, si.Return().Type().Bits() == 64) @@ -1797,7 +1797,7 @@ func (m *machine) lowerImul(x, y, result ssa.Value) { // TODO: if this comes before Add/Sub, we could merge it by putting it into the place of xzrVReg. mul := m.allocateInstr() - mul.asALURRRR(aluOpMAdd, operandNR(rd), rn, rm, operandNR(xzrVReg), x.Type().Bits() == 64) + mul.asALURRRR(aluOpMAdd, rd, rn, rm, xzrVReg, x.Type().Bits() == 64) m.insert(mul) } @@ -1849,22 +1849,22 @@ func (m *machine) lowerPopcnt(x, result ssa.Value) { // mov x5, v0.d[0] ;; finally we mov the result back to a GPR // - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rf1 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) ins := m.allocateInstr() - ins.asMovToVec(rf1, rn, vecArrangementD, vecIndex(0)) + ins.asMovToVec(rf1.nr(), rn, vecArrangementD, vecIndex(0)) m.insert(ins) rf2 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) cnt := m.allocateInstr() - cnt.asVecMisc(vecOpCnt, rf2, rf1, vecArrangement16B) + cnt.asVecMisc(vecOpCnt, rf2.nr(), rf1, vecArrangement16B) m.insert(cnt) rf3 := operandNR(m.compiler.AllocateVReg(ssa.TypeF64)) uaddlv := m.allocateInstr() - uaddlv.asVecLanes(vecOpUaddlv, rf3, rf2, vecArrangement8B) + uaddlv.asVecLanes(vecOpUaddlv, rf3.nr(), rf2, vecArrangement8B) m.insert(uaddlv) mov := m.allocateInstr() @@ -1879,32 +1879,35 @@ func (m *machine) lowerExitWithCode(execCtxVReg regalloc.VReg, code wazevoapi.Ex loadExitCodeConst.asMOVZ(tmpReg1, uint64(code), 0, true) setExitCode := m.allocateInstr() - setExitCode.asStore(operandNR(tmpReg1), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), - }, 32) + mode := m.amodePool.Allocate() + *mode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetExitCodeOffset.I64(), + } + setExitCode.asStore(operandNR(tmpReg1), mode, 32) // In order to unwind the stack, we also need to push the current stack pointer: tmp2 := m.compiler.AllocateVReg(ssa.TypeI64) movSpToTmp := m.allocateInstr() movSpToTmp.asMove64(tmp2, spVReg) strSpToExecCtx := m.allocateInstr() - strSpToExecCtx.asStore(operandNR(tmp2), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), - }, 64) + mode2 := m.amodePool.Allocate() + *mode2 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetStackPointerBeforeGoCall.I64(), + } + strSpToExecCtx.asStore(operandNR(tmp2), mode2, 64) // Also the address of this exit. tmp3 := m.compiler.AllocateVReg(ssa.TypeI64) currentAddrToTmp := m.allocateInstr() currentAddrToTmp.asAdr(tmp3, 0) storeCurrentAddrToExecCtx := m.allocateInstr() - storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), - }, 64) + mode3 := m.amodePool.Allocate() + *mode3 = addressMode{ + kind: addressModeKindRegUnsignedImm12, + rn: execCtxVReg, imm: wazevoapi.ExecutionContextOffsetGoCallReturnAddress.I64(), + } + storeCurrentAddrToExecCtx.asStore(operandNR(tmp3), mode3, 64) exitSeq := m.allocateInstr() exitSeq.asExitSequence(execCtxVReg) @@ -1937,7 +1940,7 @@ func (m *machine) lowerIcmpToFlag(x, y ssa.Value, signed bool) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, rm, x.Type().Bits() == 64, @@ -2012,7 +2015,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { alu.asALU( aluOpSubS, // We don't need the result, just need to set flags. - operandNR(xzrVReg), + xzrVReg, rn, operandNR(xzrVReg), c.Type().Bits() == 64, @@ -2024,7 +2027,7 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { rn := m.getOperand_NR(m.compiler.ValueDefinition(x), extModeNone) rm := m.getOperand_NR(m.compiler.ValueDefinition(y), extModeNone) - rd := operandNR(m.compiler.VRegOf(result)) + rd := m.compiler.VRegOf(result) switch x.Type() { case ssa.TypeI32, ssa.TypeI64: // csel rd, rn, rm, cc @@ -2041,10 +2044,10 @@ func (m *machine) lowerSelect(c, x, y, result ssa.Value) { } } -func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { +func (m *machine) lowerSelectVec(rc, rn, rm operand, rd regalloc.VReg) { // First check if `rc` is zero or not. checkZero := m.allocateInstr() - checkZero.asALU(aluOpSubS, operandNR(xzrVReg), rc, operandNR(xzrVReg), false) + checkZero.asALU(aluOpSubS, xzrVReg, rc, operandNR(xzrVReg), false) m.insert(checkZero) // Then use CSETM to set all bits to one if `rc` is zero. @@ -2054,7 +2057,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { m.insert(cset) // Then move the bits to the result vector register. - tmp2 := operandNR(m.compiler.AllocateVReg(ssa.TypeV128)) + tmp2 := m.compiler.AllocateVReg(ssa.TypeV128) dup := m.allocateInstr() dup.asVecDup(tmp2, operandNR(allOnesOrZero), vecArrangement2D) m.insert(dup) @@ -2067,7 +2070,7 @@ func (m *machine) lowerSelectVec(rc, rn, rm, rd operand) { // Finally, move the result to the destination register. mov2 := m.allocateInstr() - mov2.asFpuMov128(rd.nr(), tmp2.nr()) + mov2.asFpuMov128(rd, tmp2) m.insert(mov2) } @@ -2099,28 +2102,28 @@ func (m *machine) lowerAtomicRmw(si *ssa.Instruction) { addr, val := si.Arg2() addrDef, valDef := m.compiler.ValueDefinition(addr), m.compiler.ValueDefinition(val) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) rs := m.getOperand_NR(valDef, extModeNone) _64 := si.Return().Type().Bits() == 64 - var tmp operand + var tmp regalloc.VReg if _64 { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI64)) + tmp = m.compiler.AllocateVReg(ssa.TypeI64) } else { - tmp = operandNR(m.compiler.AllocateVReg(ssa.TypeI32)) + tmp = m.compiler.AllocateVReg(ssa.TypeI32) } - m.lowerAtomicRmwImpl(op, rn, rs, rt, tmp, size, negateArg, flipArg, _64) + m.lowerAtomicRmwImpl(op, rn.nr(), rs.nr(), rt, tmp, size, negateArg, flipArg, _64) } -func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp operand, size uint64, negateArg, flipArg, dst64bit bool) { +func (m *machine) lowerAtomicRmwImpl(op atomicRmwOp, rn, rs, rt, tmp regalloc.VReg, size uint64, negateArg, flipArg, dst64bit bool) { switch { case negateArg: neg := m.allocateInstr() - neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), rs, dst64bit) + neg.asALU(aluOpSub, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(neg) case flipArg: flip := m.allocateInstr() - flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), rs, dst64bit) + flip.asALU(aluOpOrn, tmp, operandNR(xzrVReg), operandNR(rs), dst64bit) m.insert(flip) default: tmp = rs @@ -2139,32 +2142,32 @@ func (m *machine) lowerAtomicCas(si *ssa.Instruction) { rn := m.getOperand_NR(addrDef, extModeNone) rt := m.getOperand_NR(replDef, extModeNone) rs := m.getOperand_NR(expDef, extModeNone) - tmp := operandNR(m.compiler.AllocateVReg(si.Return().Type())) + tmp := m.compiler.AllocateVReg(si.Return().Type()) _64 := si.Return().Type().Bits() == 64 // rs is overwritten by CAS, so we need to move it to the result register before the instruction // in case when it is used somewhere else. mov := m.allocateInstr() if _64 { - mov.asMove64(tmp.nr(), rs.nr()) + mov.asMove64(tmp, rs.nr()) } else { - mov.asMove32(tmp.nr(), rs.nr()) + mov.asMove32(tmp, rs.nr()) } m.insert(mov) - m.lowerAtomicCasImpl(rn, tmp, rt, size) + m.lowerAtomicCasImpl(rn.nr(), tmp, rt.nr(), size) mov2 := m.allocateInstr() rd := m.compiler.VRegOf(si.Return()) if _64 { - mov2.asMove64(rd, tmp.nr()) + mov2.asMove64(rd, tmp) } else { - mov2.asMove32(rd, tmp.nr()) + mov2.asMove32(rd, tmp) } m.insert(mov2) } -func (m *machine) lowerAtomicCasImpl(rn, rs, rt operand, size uint64) { +func (m *machine) lowerAtomicCasImpl(rn, rs, rt regalloc.VReg, size uint64) { cas := m.allocateInstr() cas.asAtomicCas(rn, rs, rt, size) m.insert(cas) @@ -2176,12 +2179,12 @@ func (m *machine) lowerAtomicLoad(si *ssa.Instruction) { addrDef := m.compiler.ValueDefinition(addr) rn := m.getOperand_NR(addrDef, extModeNone) - rt := operandNR(m.compiler.VRegOf(si.Return())) + rt := m.compiler.VRegOf(si.Return()) - m.lowerAtomicLoadImpl(rn, rt, size) + m.lowerAtomicLoadImpl(rn.nr(), rt, size) } -func (m *machine) lowerAtomicLoadImpl(rn, rt operand, size uint64) { +func (m *machine) lowerAtomicLoadImpl(rn, rt regalloc.VReg, size uint64) { ld := m.allocateInstr() ld.asAtomicLoad(rn, rt, size) m.insert(ld) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go index 4842eaa38..fd0760d72 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/lower_mem.go @@ -24,6 +24,14 @@ type ( addressModeKind byte ) +func resetAddressMode(a *addressMode) { + a.kind = 0 + a.rn = 0 + a.rm = 0 + a.extOp = 0 + a.imm = 0 +} + const ( // addressModeKindRegExtended takes a base register and an index register. The index register is sign/zero-extended, // and then scaled by bits(type)/8. @@ -140,15 +148,17 @@ func (a addressMode) format(dstSizeBits byte) (ret string) { return } -func addressModePreOrPostIndex(rn regalloc.VReg, imm int64, preIndex bool) addressMode { +func addressModePreOrPostIndex(m *machine, rn regalloc.VReg, imm int64, preIndex bool) *addressMode { if !offsetFitsInAddressModeKindRegSignedImm9(imm) { panic(fmt.Sprintf("BUG: offset %#x does not fit in addressModeKindRegSignedImm9", imm)) } + mode := m.amodePool.Allocate() if preIndex { - return addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPreIndex, rn: rn, imm: imm} } else { - return addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} + *mode = addressMode{kind: addressModeKindPostIndex, rn: rn, imm: imm} } + return mode } func offsetFitsInAddressModeKindRegUnsignedImm12(dstSizeInBits byte, offset int64) bool { @@ -207,9 +217,9 @@ func (m *machine) lowerExtLoad(op ssa.Opcode, ptr ssa.Value, offset uint32, ret amode := m.lowerToAddressMode(ptr, offset, size) load := m.allocateInstr() if signed { - load.asSLoad(operandNR(ret), amode, size) + load.asSLoad(ret, amode, size) } else { - load.asULoad(operandNR(ret), amode, size) + load.asULoad(ret, amode, size) } m.insert(load) } @@ -221,11 +231,11 @@ func (m *machine) lowerLoad(ptr ssa.Value, offset uint32, typ ssa.Type, ret ssa. load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(dst), amode, typ.Bits()) + load.asULoad(dst, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(dst), amode, typ.Bits()) + load.asFpuLoad(dst, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(dst), amode, 128) + load.asFpuLoad(dst, amode, 128) default: panic("TODO") } @@ -239,7 +249,7 @@ func (m *machine) lowerLoadSplat(ptr ssa.Value, offset uint32, lane ssa.VecLane, m.lowerConstantI64(offsetReg, int64(offset)) addedBase := m.addReg64ToReg64(base, offsetReg) - rd := operandNR(m.compiler.VRegOf(ret)) + rd := m.compiler.VRegOf(ret) ld1r := m.allocateInstr() ld1r.asVecLoad1R(rd, operandNR(addedBase), ssaLaneToArrangement(lane)) @@ -258,7 +268,7 @@ func (m *machine) lowerStore(si *ssa.Instruction) { } // lowerToAddressMode converts a pointer to an addressMode that can be used as an operand for load/store instructions. -func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode addressMode) { +func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte) (amode *addressMode) { // TODO: currently the instruction selection logic doesn't support addressModeKindRegScaledExtended and // addressModeKindRegScaled since collectAddends doesn't take ssa.OpcodeIshl into account. This should be fixed // to support more efficient address resolution. @@ -272,32 +282,33 @@ func (m *machine) lowerToAddressMode(ptr ssa.Value, offsetBase uint32, size byte // During the construction, this might emit additional instructions. // // Extracted as a separate function for easy testing. -func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode addressMode) { +func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], a64s *wazevoapi.Queue[regalloc.VReg], size byte, offset int64) (amode *addressMode) { + amode = m.amodePool.Allocate() switch a64sExist, a32sExist := !a64s.Empty(), !a32s.Empty(); { case a64sExist && a32sExist: var base regalloc.VReg base = a64s.Dequeue() var a32 addend32 a32 = a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: a32.r, extOp: a32.ext} case a64sExist && offsetFitsInAddressModeKindRegUnsignedImm12(size, offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: offset} offset = 0 case a64sExist && offsetFitsInAddressModeKindRegSignedImm9(offset): var base regalloc.VReg base = a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} + *amode = addressMode{kind: addressModeKindRegSignedImm9, rn: base, imm: offset} offset = 0 case a64sExist: var base regalloc.VReg base = a64s.Dequeue() if !a64s.Empty() { index := a64s.Dequeue() - amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} + *amode = addressMode{kind: addressModeKindRegReg, rn: base, rm: index, extOp: extendOpUXTX /* indicates index reg is 64-bit */} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } case a32sExist: base32 := a32s.Dequeue() @@ -314,14 +325,14 @@ func (m *machine) lowerToAddressModeFromAddends(a32s *wazevoapi.Queue[addend32], if !a32s.Empty() { index := a32s.Dequeue() - amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} + *amode = addressMode{kind: addressModeKindRegExtended, rn: base, rm: index.r, extOp: index.ext} } else { - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: base, imm: 0} } default: // Only static offsets. tmpReg := m.compiler.AllocateVReg(ssa.TypeI64) m.lowerConstantI64(tmpReg, offset) - amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} + *amode = addressMode{kind: addressModeKindRegUnsignedImm12, rn: tmpReg, imm: 0} offset = 0 } @@ -411,13 +422,13 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() if imm12Op, ok := asImm12Operand(uint64(c)); ok { - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpAdd, rd, operandNR(r), imm12Op, true) } else if imm12Op, ok = asImm12Operand(uint64(-c)); ok { - alu.asALU(aluOpSub, operandNR(rd), operandNR(r), imm12Op, true) + alu.asALU(aluOpSub, rd, operandNR(r), imm12Op, true) } else { tmp := m.compiler.AllocateVReg(ssa.TypeI64) m.load64bitConst(c, tmp) - alu.asALU(aluOpAdd, operandNR(rd), operandNR(r), operandNR(tmp), true) + alu.asALU(aluOpAdd, rd, operandNR(r), operandNR(tmp), true) } m.insert(alu) return @@ -426,7 +437,7 @@ func (m *machine) addConstToReg64(r regalloc.VReg, c int64) (rd regalloc.VReg) { func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandNR(rm), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandNR(rm), true) m.insert(alu) return } @@ -434,7 +445,7 @@ func (m *machine) addReg64ToReg64(rn, rm regalloc.VReg) (rd regalloc.VReg) { func (m *machine) addRegToReg64Ext(rn, rm regalloc.VReg, ext extendOp) (rd regalloc.VReg) { rd = m.compiler.AllocateVReg(ssa.TypeI64) alu := m.allocateInstr() - alu.asALU(aluOpAdd, operandNR(rd), operandNR(rn), operandER(rm, ext, 64), true) + alu.asALU(aluOpAdd, rd, operandNR(rn), operandER(rm, ext, 64), true) m.insert(alu) return } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go index b435d9ba9..5f584f928 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine.go @@ -21,6 +21,8 @@ type ( regAlloc regalloc.Allocator regAllocFn *backend.RegAllocFunction[*instruction, *machine] + amodePool wazevoapi.Pool[addressMode] + // addendsWorkQueue is used during address lowering, defined here for reuse. addendsWorkQueue wazevoapi.Queue[ssa.Value] addends32 wazevoapi.Queue[addend32] @@ -105,6 +107,7 @@ func NewBackend() backend.Machine { spillSlots: make(map[regalloc.VRegID]int64), executableContext: newExecutableContext(), regAlloc: regalloc.NewAllocator(regInfo), + amodePool: wazevoapi.NewPool[addressMode](resetAddressMode), } return m } @@ -149,6 +152,7 @@ func (m *machine) Reset() { m.maxRequiredStackSizeForCalls = 0 m.executableContext.Reset() m.jmpTableTargets = m.jmpTableTargets[:0] + m.amodePool.Reset() } // SetCurrentABI implements backend.Machine SetCurrentABI. @@ -183,9 +187,8 @@ func (m *machine) allocateBrTarget() (nop *instruction, l label) { l = ectx.AllocateLabel() nop = m.allocateInstr() nop.asNop0WithLabel(l) - pos := ectx.AllocateLabelPosition(l) + pos := ectx.GetOrAllocateLabelPosition(l) pos.Begin, pos.End = nop, nop - ectx.LabelPositions[l] = pos return } @@ -209,7 +212,7 @@ func (m *machine) allocateNop() *instruction { } func (m *machine) resolveAddressingMode(arg0offset, ret0offset int64, i *instruction) { - amode := &i.amode + amode := i.getAmode() switch amode.kind { case addressModeKindResultStackSpace: amode.imm += ret0offset @@ -281,7 +284,7 @@ func (m *machine) resolveRelativeAddresses(ctx context.Context) { switch cur.kind { case nop0: l := cur.nop0Label() - if pos, ok := ectx.LabelPositions[l]; ok { + if pos := ectx.LabelPositions[l]; pos != nil { pos.BinaryOffset = offset + size } case condBr: @@ -428,8 +431,10 @@ func (m *machine) insertConditionalJumpTrampoline(cbr *instruction, currentBlk * func (m *machine) Format() string { ectx := m.executableContext begins := map[*instruction]label{} - for l, pos := range ectx.LabelPositions { - begins[pos.Begin] = l + for _, pos := range ectx.LabelPositions { + if pos != nil { + begins[pos.Begin] = pos.L + } } irBlocks := map[label]ssa.BasicBlockID{} diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go index 466fac464..d9032f921 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_pro_epi_logue.go @@ -70,7 +70,7 @@ func (m *machine) setupPrologue() { // +-----------------+ <----- SP // (low address) // - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -159,7 +159,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc sizeOfArgRetReg = tmpRegVReg subSp := m.allocateInstr() - subSp.asALU(aluOpSub, operandNR(spVReg), operandNR(spVReg), operandNR(sizeOfArgRetReg), true) + subSp.asALU(aluOpSub, spVReg, operandNR(spVReg), operandNR(sizeOfArgRetReg), true) cur = linkInstr(cur, subSp) } else { sizeOfArgRetReg = xzrVReg @@ -168,7 +168,7 @@ func (m *machine) createReturnAddrAndSizeOfArgRetSlot(cur *instruction) *instruc // Saves the return address (lr) and the size_of_arg_ret below the SP. // size_of_arg_ret is used for stack unwinding. pstr := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, -16, true /* decrement before store */) + amode := addressModePreOrPostIndex(m, spVReg, -16, true /* decrement before store */) pstr.asStorePair64(lrVReg, sizeOfArgRetReg, amode) cur = linkInstr(cur, pstr) return cur @@ -182,7 +182,7 @@ func (m *machine) createFrameSizeSlot(cur *instruction, s int64) *instruction { } else { frameSizeReg = xzrVReg } - _amode := addressModePreOrPostIndex(spVReg, + _amode := addressModePreOrPostIndex(m, spVReg, -16, // stack pointer must be 16-byte aligned. true, // Decrement before store. ) @@ -213,7 +213,7 @@ func (m *machine) postRegAlloc() { m.executableContext.PendingInstructions = m.executableContext.PendingInstructions[:0] default: // Removes the redundant copy instruction. - if cur.IsCopy() && cur.rn.realReg() == cur.rd.realReg() { + if cur.IsCopy() && cur.rn.realReg() == cur.rd.RealReg() { prev, next := cur.prev, cur.next // Remove the copy instruction. prev.next = next @@ -286,16 +286,16 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { for i := range m.clobberedRegs { vr := m.clobberedRegs[l-i] // reverse order to restore. load := m.allocateInstr() - amode := addressModePreOrPostIndex(spVReg, + amode := addressModePreOrPostIndex(m, spVReg, 16, // stack pointer must be 16-byte aligned. false, // Increment after store. ) // TODO: pair loads to reduce the number of instructions. switch regTypeToRegisterSizeInBits(vr.RegType()) { case 64: // save int reg. - load.asULoad(operandNR(vr), amode, 64) + load.asULoad(vr, amode, 64) case 128: // save vector reg. - load.asFpuLoad(operandNR(vr), amode, 128) + load.asFpuLoad(vr, amode, 128) } cur = linkInstr(cur, load) } @@ -317,8 +317,8 @@ func (m *machine) setupEpilogueAfter(cur *instruction) { // SP----> +-----------------+ ldr := m.allocateInstr() - ldr.asULoad(operandNR(lrVReg), - addressModePreOrPostIndex(spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) + ldr.asULoad(lrVReg, + addressModePreOrPostIndex(m, spVReg, 16 /* stack pointer must be 16-byte aligned. */, false /* increment after loads */), 64) cur = linkInstr(cur, ldr) if s := int64(m.currentABI.AlignedArgResultStackSlotSize()); s > 0 { @@ -351,14 +351,14 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi if immm12op, ok := asImm12Operand(uint64(requiredStackSize)); ok { // sub tmp, sp, #requiredStackSize sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), immm12op, true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), immm12op, true) cur = linkInstr(cur, sub) } else { // This case, we first load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) // Then subtract it. sub := m.allocateInstr() - sub.asALU(aluOpSub, operandNR(tmpRegVReg), operandNR(spVReg), operandNR(tmpRegVReg), true) + sub.asALU(aluOpSub, tmpRegVReg, operandNR(spVReg), operandNR(tmpRegVReg), true) cur = linkInstr(cur, sub) } @@ -366,16 +366,18 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // ldr tmp2, [executionContext #StackBottomPtr] ldr := m.allocateInstr() - ldr.asULoad(operandNR(tmp2), addressMode{ + amode := m.amodePool.Allocate() + *amode = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument. imm: wazevoapi.ExecutionContextOffsetStackBottomPtr.I64(), - }, 64) + } + ldr.asULoad(tmp2, amode, 64) cur = linkInstr(cur, ldr) // subs xzr, tmp, tmp2 subs := m.allocateInstr() - subs.asALU(aluOpSubS, operandNR(xzrVReg), operandNR(tmpRegVReg), operandNR(tmp2), true) + subs.asALU(aluOpSubS, xzrVReg, operandNR(tmpRegVReg), operandNR(tmp2), true) cur = linkInstr(cur, subs) // b.ge #imm @@ -388,22 +390,25 @@ func (m *machine) insertStackBoundsCheck(requiredStackSize int64, cur *instructi // First load the requiredStackSize into the temporary register, cur = m.lowerConstantI64AndInsert(cur, tmpRegVReg, requiredStackSize) setRequiredStackSize := m.allocateInstr() - setRequiredStackSize.asStore(operandNR(tmpRegVReg), - addressMode{ - kind: addressModeKindRegUnsignedImm12, - // Execution context is always the first argument. - rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), - }, 64) + amode := m.amodePool.Allocate() + *amode = addressMode{ + kind: addressModeKindRegUnsignedImm12, + // Execution context is always the first argument. + rn: x0VReg, imm: wazevoapi.ExecutionContextOffsetStackGrowRequiredSize.I64(), + } + setRequiredStackSize.asStore(operandNR(tmpRegVReg), amode, 64) cur = linkInstr(cur, setRequiredStackSize) } ldrAddress := m.allocateInstr() - ldrAddress.asULoad(operandNR(tmpRegVReg), addressMode{ + amode2 := m.amodePool.Allocate() + *amode2 = addressMode{ kind: addressModeKindRegUnsignedImm12, rn: x0VReg, // execution context is always the first argument imm: wazevoapi.ExecutionContextOffsetStackGrowCallTrampolineAddress.I64(), - }, 64) + } + ldrAddress.asULoad(tmpRegVReg, amode2, 64) cur = linkInstr(cur, ldrAddress) // Then jumps to the stack grow call sequence's address, meaning diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go index 1c8793b73..c7eb92cc2 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/isa/arm64/machine_regalloc.go @@ -91,7 +91,7 @@ func (m *machine) InsertStoreRegisterAt(v regalloc.VReg, instr *instruction, aft } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) store := m.allocateInstr() store.asStore(operandNR(v), amode, typ.Bits()) @@ -116,16 +116,16 @@ func (m *machine) InsertReloadRegisterAt(v regalloc.VReg, instr *instruction, af } offsetFromSP := m.getVRegSpillSlotOffsetFromSP(v.ID(), typ.Size()) - var amode addressMode + var amode *addressMode cur, amode = m.resolveAddressModeForOffsetAndInsert(cur, offsetFromSP, typ.Bits(), spVReg, true) load := m.allocateInstr() switch typ { case ssa.TypeI32, ssa.TypeI64: - load.asULoad(operandNR(v), amode, typ.Bits()) + load.asULoad(v, amode, typ.Bits()) case ssa.TypeF32, ssa.TypeF64: - load.asFpuLoad(operandNR(v), amode, typ.Bits()) + load.asFpuLoad(v, amode, typ.Bits()) case ssa.TypeV128: - load.asFpuLoad(operandNR(v), amode, 128) + load.asFpuLoad(v, amode, 128) default: panic("TODO") } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go index 3f36c84e5..655370786 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc.go @@ -35,7 +35,7 @@ type ( iter int reversePostOrderBlocks []RegAllocBlock[I, m] // labelToRegAllocBlockIndex maps label to the index of reversePostOrderBlocks. - labelToRegAllocBlockIndex map[Label]int + labelToRegAllocBlockIndex [] /* Label to */ int loopNestingForestRoots []ssa.BasicBlock } @@ -56,10 +56,9 @@ type ( // NewRegAllocFunction returns a new RegAllocFunction. func NewRegAllocFunction[I regalloc.InstrConstraint, M RegAllocFunctionMachine[I]](m M, ssb ssa.Builder, c Compiler) *RegAllocFunction[I, M] { return &RegAllocFunction[I, M]{ - m: m, - ssb: ssb, - c: c, - labelToRegAllocBlockIndex: make(map[Label]int), + m: m, + ssb: ssb, + c: c, } } @@ -74,6 +73,9 @@ func (f *RegAllocFunction[I, M]) AddBlock(sb ssa.BasicBlock, l Label, begin, end end: end, id: int(sb.ID()), }) + if len(f.labelToRegAllocBlockIndex) <= int(l) { + f.labelToRegAllocBlockIndex = append(f.labelToRegAllocBlockIndex, make([]int, int(l)-len(f.labelToRegAllocBlockIndex)+1)...) + } f.labelToRegAllocBlockIndex[l] = i } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go index b4450d56f..eacb6a7ef 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regalloc.go @@ -60,9 +60,8 @@ type ( phiDefInstListPool wazevoapi.Pool[phiDefInstList] // Followings are re-used during various places. - blks []Block - reals []RealReg - currentOccupants regInUseSet + blks []Block + reals []RealReg // Following two fields are updated while iterating the blocks in the reverse postorder. state state @@ -755,7 +754,8 @@ func (a *Allocator) allocBlock(f Function, blk Block) { killSet := a.reals[:0] // Gather the set of registers that will be used in the current instruction. - for _, use := range instr.Uses(&a.vs) { + uses := instr.Uses(&a.vs) + for _, use := range uses { if use.IsRealReg() { r := use.RealReg() currentUsedSet = currentUsedSet.add(r) @@ -770,7 +770,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { } } - for i, use := range instr.Uses(&a.vs) { + for i, use := range uses { if !use.IsRealReg() { vs := s.getVRegState(use.ID()) killed := vs.lastUse == pc @@ -944,8 +944,7 @@ func (a *Allocator) allocBlock(f Function, blk Block) { func (a *Allocator) releaseCallerSavedRegs(addrReg RealReg) { s := &a.state - for i := 0; i < 64; i++ { - allocated := RealReg(i) + for allocated := RealReg(0); allocated < 64; allocated++ { if allocated == addrReg { // If this is the call indirect, we should not touch the addr register. continue } @@ -974,11 +973,10 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { bID := blk.ID() blkSt := a.getOrAllocateBlockState(bID) desiredOccupants := &blkSt.startRegs - aliveOnRegVRegs := make(map[VReg]RealReg) - for i := 0; i < 64; i++ { - r := RealReg(i) - if v := blkSt.startRegs.get(r); v.Valid() { - aliveOnRegVRegs[v] = r + var desiredOccupantsSet RegSet + for i, v := range desiredOccupants { + if v != VRegInvalid { + desiredOccupantsSet = desiredOccupantsSet.add(RealReg(i)) } } @@ -987,56 +985,38 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { } s.currentBlockID = bID - a.updateLiveInVRState(a.getOrAllocateBlockState(bID)) + a.updateLiveInVRState(blkSt) - currentOccupants := &a.currentOccupants for i := 0; i < preds; i++ { - currentOccupants.reset() if i == blkSt.startFromPredIndex { continue } - currentOccupantsRev := make(map[VReg]RealReg) pred := blk.Pred(i) predSt := a.getOrAllocateBlockState(pred.ID()) - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) - if v := predSt.endRegs.get(r); v.Valid() { - if _, ok := aliveOnRegVRegs[v]; !ok { - continue - } - currentOccupants.add(r, v) - currentOccupantsRev[v] = r - } - } s.resetAt(predSt) // Finds the free registers if any. intTmp, floatTmp := VRegInvalid, VRegInvalid if intFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeInt], desiredOccupantsSet, ); intFree != RealRegInvalid { intTmp = FromRealReg(intFree, RegTypeInt) } if floatFree := s.findAllocatable( - a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupants.set, + a.regInfo.AllocatableRegisters[RegTypeFloat], desiredOccupantsSet, ); floatFree != RealRegInvalid { floatTmp = FromRealReg(floatFree, RegTypeFloat) } - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) - } - - for ii := 0; ii < 64; ii++ { - r := RealReg(ii) + for r := RealReg(0); r < 64; r++ { desiredVReg := desiredOccupants.get(r) if !desiredVReg.Valid() { continue } - currentVReg := currentOccupants.get(r) + currentVReg := s.regsInUse.get(r) if desiredVReg.ID() == currentVReg.ID() { continue } @@ -1048,86 +1028,95 @@ func (a *Allocator) fixMergeState(f Function, blk Block) { } else { tmpRealReg = floatTmp } - a.reconcileEdge(f, r, pred, currentOccupants, currentOccupantsRev, currentVReg, desiredVReg, tmpRealReg, typ) + a.reconcileEdge(f, r, pred, currentVReg, desiredVReg, tmpRealReg, typ) } } } +// reconcileEdge reconciles the register state between the current block and the predecessor for the real register `r`. +// +// - currentVReg is the current VReg value that sits on the register `r`. This can be VRegInvalid if the register is not used at the end of the predecessor. +// - desiredVReg is the desired VReg value that should be on the register `r`. +// - freeReg is the temporary register that can be used to swap the values, which may or may not be used. +// - typ is the register type of the `r`. func (a *Allocator) reconcileEdge(f Function, r RealReg, pred Block, - currentOccupants *regInUseSet, - currentOccupantsRev map[VReg]RealReg, currentVReg, desiredVReg VReg, freeReg VReg, typ RegType, ) { + // There are four cases to consider: + // 1. currentVReg is valid, but desiredVReg is on the stack. + // 2. Both currentVReg and desiredVReg are valid. + // 3. Desired is on a different register than `r` and currentReg is not valid. + // 4. Desired is on the stack and currentReg is not valid. + s := &a.state if currentVReg.Valid() { - // Both are on reg. - er, ok := currentOccupantsRev[desiredVReg] - if !ok { + desiredState := s.getVRegState(desiredVReg.ID()) + er := desiredState.r + if er == RealRegInvalid { + // Case 1: currentVReg is valid, but desiredVReg is on the stack. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, but currently on the stack\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - // This case is that the desired value is on the stack, but currentVReg is on the target register. - // We need to move the current value to the stack, and reload the desired value. + // We need to move the current value to the stack, and reload the desired value into the register. // TODO: we can do better here. f.StoreRegisterBefore(currentVReg.SetRealReg(r), pred.LastInstrForInsertion()) - delete(currentOccupantsRev, currentVReg) + s.releaseRealReg(r) s.getVRegState(desiredVReg.ID()).recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) - currentOccupants.add(r, desiredVReg) - currentOccupantsRev[desiredVReg] = r + s.useRealReg(r, desiredVReg) return - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", - desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + } else { + // Case 2: Both currentVReg and desiredVReg are valid. + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d is desired to be on %s, but currently on %s\n", + desiredVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er), + ) + } + // This case, we need to swap the values between the current and desired values. + f.SwapBefore( + currentVReg.SetRealReg(r), + desiredVReg.SetRealReg(er), + freeReg, + pred.LastInstrForInsertion(), ) - } - f.SwapBefore( - currentVReg.SetRealReg(r), - desiredVReg.SetRealReg(er), - freeReg, - pred.LastInstrForInsertion(), - ) - s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) - currentOccupantsRev[desiredVReg] = r - currentOccupantsRev[currentVReg] = er - currentOccupants.add(r, desiredVReg) - currentOccupants.add(er, currentVReg) - if wazevoapi.RegAllocLoggingEnabled { - fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + s.allocatedRegSet = s.allocatedRegSet.add(freeReg.RealReg()) + s.releaseRealReg(r) + s.releaseRealReg(er) + s.useRealReg(r, desiredVReg) + s.useRealReg(er, currentVReg) + if wazevoapi.RegAllocLoggingEnabled { + fmt.Printf("\t\tv%d previously on %s moved to %s\n", currentVReg.ID(), a.regInfo.RealRegName(r), a.regInfo.RealRegName(er)) + } } } else { - // Desired is on reg, but currently the target register is not used. if wazevoapi.RegAllocLoggingEnabled { fmt.Printf("\t\tv%d is desired to be on %s, current not used\n", desiredVReg.ID(), a.regInfo.RealRegName(r), ) } - if currentReg, ok := currentOccupantsRev[desiredVReg]; ok { + if currentReg := s.getVRegState(desiredVReg.ID()).r; currentReg != RealRegInvalid { + // Case 3: Desired is on a different register than `r` and currentReg is not valid. + // We simply need to move the desired value to the register. f.InsertMoveBefore( FromRealReg(r, typ), desiredVReg.SetRealReg(currentReg), pred.LastInstrForInsertion(), ) - currentOccupants.remove(currentReg) + s.releaseRealReg(currentReg) } else { + // Case 4: Both currentVReg and desiredVReg are not valid. + // We simply need to reload the desired value into the register. s.getVRegState(desiredVReg.ID()).recordReload(f, pred) f.ReloadRegisterBefore(desiredVReg.SetRealReg(r), pred.LastInstrForInsertion()) } - currentOccupantsRev[desiredVReg] = r - currentOccupants.add(r, desiredVReg) - } - - if wazevoapi.RegAllocLoggingEnabled { - fmt.Println("\t", pred.ID(), ":", currentOccupants.format(a.regInfo)) + s.useRealReg(r, desiredVReg) } } @@ -1169,8 +1158,7 @@ func (a *Allocator) scheduleSpill(f Function, vs *vrState) { } for pos != definingBlk { st := a.getOrAllocateBlockState(pos.ID()) - for ii := 0; ii < 64; ii++ { - rr := RealReg(ii) + for rr := RealReg(0); rr < 64; rr++ { if st.startRegs.get(rr) == v { r = rr // Already in the register, so we can place the spill at the beginning of the block. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go index e9bf60661..04a8e8f4d 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/backend/regalloc/regset.go @@ -46,23 +46,24 @@ func (rs RegSet) Range(f func(allocatedRealReg RealReg)) { } } -type regInUseSet struct { - set RegSet - vrs [64]VReg +type regInUseSet [64]VReg + +func newRegInUseSet() regInUseSet { + var ret regInUseSet + ret.reset() + return ret } func (rs *regInUseSet) reset() { - rs.set = 0 - for i := range rs.vrs { - rs.vrs[i] = VRegInvalid + for i := range rs { + rs[i] = VRegInvalid } } func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused var ret []string - for i := 0; i < 64; i++ { - if rs.set&(1<v%d)", info.RealRegName(RealReg(i)), vr.ID())) } } @@ -70,39 +71,28 @@ func (rs *regInUseSet) format(info *RegisterInfo) string { //nolint:unused } func (rs *regInUseSet) has(r RealReg) bool { - if r >= 64 { - return false - } - return rs.set&(1<= 64 { - return VRegInvalid - } - return rs.vrs[r] + return rs[r] } func (rs *regInUseSet) remove(r RealReg) { - if r >= 64 { - return - } - rs.set &= ^(1 << uint(r)) - rs.vrs[r] = VRegInvalid + rs[r] = VRegInvalid } func (rs *regInUseSet) add(r RealReg, vr VReg) { if r >= 64 { return } - rs.set |= 1 << uint(r) - rs.vrs[r] = vr + rs[r] = vr } func (rs *regInUseSet) range_(f func(allocatedRealReg RealReg, vr VReg)) { - for i := 0; i < 64; i++ { - if rs.set&(1< 0 { - b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) - } - binary.LittleEndian.PutUint64(opaque[offset:], b) - binary.LittleEndian.PutUint64(opaque[offset+8:], s) -} - func (m *moduleEngine) setupOpaque() { inst := m.module offsets := &m.parent.offsets @@ -106,7 +96,7 @@ func (m *moduleEngine) setupOpaque() { ) if lm := offsets.LocalMemoryBegin; lm >= 0 { - putLocalMemory(opaque, lm, inst.MemoryInstance) + m.putLocalMemory() } // Note: imported memory is resolved in ResolveImportedFunction. @@ -227,6 +217,25 @@ func (m *moduleEngine) SetGlobalValue(i wasm.Index, lo, hi uint64) { // OwnsGlobals implements the same method as documented on wasm.ModuleEngine. func (m *moduleEngine) OwnsGlobals() bool { return true } +// MemoryGrown implements wasm.ModuleEngine. +func (m *moduleEngine) MemoryGrown() { + m.putLocalMemory() +} + +// putLocalMemory writes the local memory buffer pointer and length to the opaque buffer. +func (m *moduleEngine) putLocalMemory() { + mem := m.module.MemoryInstance + offset := m.parent.offsets.LocalMemoryBegin + + s := uint64(len(mem.Buffer)) + var b uint64 + if len(mem.Buffer) > 0 { + b = uint64(uintptr(unsafe.Pointer(&mem.Buffer[0]))) + } + binary.LittleEndian.PutUint64(m.opaque[offset:], b) + binary.LittleEndian.PutUint64(m.opaque[offset+8:], s) +} + // ResolveImportedFunction implements wasm.ModuleEngine. func (m *moduleEngine) ResolveImportedFunction(index, indexInImportedModule wasm.Index, importedModuleEngine wasm.ModuleEngine) { executableOffset, moduleCtxOffset, typeIDOffset := m.parent.offsets.ImportedFunctionOffset(index) diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go index 10b6b4b62..39627b989 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/basic_block.go @@ -49,21 +49,12 @@ type BasicBlock interface { // ReturnBlock returns ture if this block represents the function return. ReturnBlock() bool - // FormatHeader returns the debug string of this block, not including instruction. - FormatHeader(b Builder) string - // Valid is true if this block is still valid even after optimizations. Valid() bool // Sealed is true if this block has been sealed. Sealed() bool - // BeginPredIterator returns the first predecessor of this block. - BeginPredIterator() BasicBlock - - // NextPredIterator returns the next predecessor of this block. - NextPredIterator() BasicBlock - // Preds returns the number of predecessors of this block. Preds() int @@ -88,10 +79,11 @@ type ( basicBlock struct { id BasicBlockID rootInstr, currentInstr *Instruction - params []blockParam - predIter int - preds []basicBlockPredecessorInfo - success []*basicBlock + // params are Values that represent parameters to a basicBlock. + // Each parameter can be considered as an output of PHI instruction in traditional SSA. + params []Value + preds []basicBlockPredecessorInfo + success []*basicBlock // singlePred is the alias to preds[0] for fast lookup, and only set after Seal is called. singlePred *basicBlock // lastDefinitions maps Variable to its last definition in this block. @@ -116,11 +108,14 @@ type ( // loopNestingForestChildren holds the children of this block in the loop nesting forest. // Non-empty if and only if this block is a loop header (i.e. loopHeader=true) - loopNestingForestChildren []BasicBlock + loopNestingForestChildren wazevoapi.VarLength[BasicBlock] // reversePostOrder is used to sort all the blocks in the function in reverse post order. // This is used in builder.LayoutBlocks. - reversePostOrder int + reversePostOrder int32 + + // visited is used during various traversals. + visited int32 // child and sibling are the ones in the dominator tree. child, sibling *basicBlock @@ -128,15 +123,6 @@ type ( // BasicBlockID is the unique ID of a basicBlock. BasicBlockID uint32 - // blockParam implements Value and represents a parameter to a basicBlock. - blockParam struct { - // value is the Value that corresponds to the parameter in this block, - // and can be considered as an output of PHI instruction in traditional SSA. - value Value - // typ is the type of the parameter. - typ Type - } - unknownValue struct { // variable is the variable that this unknownValue represents. variable Variable @@ -145,6 +131,9 @@ type ( } ) +// basicBlockVarLengthNil is the default nil value for basicBlock.loopNestingForestChildren. +var basicBlockVarLengthNil = wazevoapi.NewNilVarLength[BasicBlock]() + const basicBlockIDReturnBlock = 0xffffffff // Name implements BasicBlock.Name. @@ -190,13 +179,13 @@ func (bb *basicBlock) ReturnBlock() bool { // AddParam implements BasicBlock.AddParam. func (bb *basicBlock) AddParam(b Builder, typ Type) Value { paramValue := b.allocateValue(typ) - bb.params = append(bb.params, blockParam{typ: typ, value: paramValue}) + bb.params = append(bb.params, paramValue) return paramValue } // addParamOn adds a parameter to this block whose value is already allocated. -func (bb *basicBlock) addParamOn(typ Type, value Value) { - bb.params = append(bb.params, blockParam{typ: typ, value: value}) +func (bb *basicBlock) addParamOn(value Value) { + bb.params = append(bb.params, value) } // Params implements BasicBlock.Params. @@ -206,8 +195,7 @@ func (bb *basicBlock) Params() int { // Param implements BasicBlock.Param. func (bb *basicBlock) Param(i int) Value { - p := &bb.params[i] - return p.value + return bb.params[i] } // Valid implements BasicBlock.Valid. @@ -248,22 +236,6 @@ func (bb *basicBlock) NumPreds() int { return len(bb.preds) } -// BeginPredIterator implements BasicBlock.BeginPredIterator. -func (bb *basicBlock) BeginPredIterator() BasicBlock { - bb.predIter = 0 - return bb.NextPredIterator() -} - -// NextPredIterator implements BasicBlock.NextPredIterator. -func (bb *basicBlock) NextPredIterator() BasicBlock { - if bb.predIter >= len(bb.preds) { - return nil - } - pred := bb.preds[bb.predIter].blk - bb.predIter++ - return pred -} - // Preds implements BasicBlock.Preds. func (bb *basicBlock) Preds() int { return len(bb.preds) @@ -305,7 +277,8 @@ func resetBasicBlock(bb *basicBlock) { bb.unknownValues = bb.unknownValues[:0] bb.lastDefinitions = wazevoapi.ResetMap(bb.lastDefinitions) bb.reversePostOrder = -1 - bb.loopNestingForestChildren = bb.loopNestingForestChildren[:0] + bb.visited = 0 + bb.loopNestingForestChildren = basicBlockVarLengthNil bb.loopHeader = false bb.sibling = nil bb.child = nil @@ -335,11 +308,11 @@ func (bb *basicBlock) addPred(blk BasicBlock, branch *Instruction) { pred.success = append(pred.success, bb) } -// FormatHeader implements BasicBlock.FormatHeader. -func (bb *basicBlock) FormatHeader(b Builder) string { +// formatHeader returns the string representation of the header of the basicBlock. +func (bb *basicBlock) formatHeader(b Builder) string { ps := make([]string, len(bb.params)) for i, p := range bb.params { - ps[i] = p.value.formatWithType(b) + ps[i] = p.formatWithType(b) } if len(bb.preds) > 0 { @@ -398,7 +371,7 @@ func (bb *basicBlock) String() string { // LoopNestingForestChildren implements BasicBlock.LoopNestingForestChildren. func (bb *basicBlock) LoopNestingForestChildren() []BasicBlock { - return bb.loopNestingForestChildren + return bb.loopNestingForestChildren.View() } // LoopHeader implements BasicBlock.LoopHeader. diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go index 1fc84d2ea..0b700c4b1 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/builder.go @@ -54,9 +54,6 @@ type Builder interface { // MustFindValue searches the latest definition of the given Variable and returns the result. MustFindValue(variable Variable) Value - // MustFindValueInBlk is the same as MustFindValue except it searches the latest definition from the given BasicBlock. - MustFindValueInBlk(variable Variable, blk BasicBlock) Value - // FindValueInLinearPath tries to find the latest definition of the given Variable in the linear path to the current BasicBlock. // If it cannot find the definition, or it's not sealed yet, it returns ValueInvalid. FindValueInLinearPath(variable Variable) Value @@ -127,7 +124,11 @@ type Builder interface { // Idom returns the immediate dominator of the given BasicBlock. Idom(blk BasicBlock) BasicBlock + // VarLengthPool returns the VarLengthPool of Value. VarLengthPool() *wazevoapi.VarLengthPool[Value] + + // InsertZeroValue inserts a zero value constant instruction of the given type. + InsertZeroValue(t Type) } // NewBuilder returns a new Builder implementation. @@ -135,10 +136,10 @@ func NewBuilder() Builder { return &builder{ instructionsPool: wazevoapi.NewPool[Instruction](resetInstruction), basicBlocksPool: wazevoapi.NewPool[basicBlock](resetBasicBlock), + varLengthBasicBlockPool: wazevoapi.NewVarLengthPool[BasicBlock](), varLengthPool: wazevoapi.NewVarLengthPool[Value](), valueAnnotations: make(map[ValueID]string), signatures: make(map[SignatureID]*Signature), - blkVisited: make(map[*basicBlock]int), valueIDAliases: make(map[ValueID]Value), redundantParameterIndexToValue: make(map[int]Value), returnBlk: &basicBlock{id: basicBlockIDReturnBlock}, @@ -177,12 +178,13 @@ type builder struct { dominators []*basicBlock sparseTree dominatorSparseTree + varLengthBasicBlockPool wazevoapi.VarLengthPool[BasicBlock] + // loopNestingForestRoots are the roots of the loop nesting forest. loopNestingForestRoots []BasicBlock // The followings are used for optimization passes/deterministic compilation. instStack []*Instruction - blkVisited map[*basicBlock]int valueIDToInstruction []*Instruction blkStack []*basicBlock blkStack2 []*basicBlock @@ -200,6 +202,32 @@ type builder struct { donePostBlockLayoutPasses bool currentSourceOffset SourceOffset + + // zeros are the zero value constants for each type. + zeros [typeEnd]Value +} + +// InsertZeroValue implements Builder.InsertZeroValue. +func (b *builder) InsertZeroValue(t Type) { + if b.zeros[t].Valid() { + return + } + zeroInst := b.AllocateInstruction() + switch t { + case TypeI32: + zeroInst.AsIconst32(0) + case TypeI64: + zeroInst.AsIconst64(0) + case TypeF32: + zeroInst.AsF32const(0) + case TypeF64: + zeroInst.AsF64const(0) + case TypeV128: + zeroInst.AsVconst(0, 0) + default: + panic("TODO: " + t.String()) + } + b.zeros[t] = zeroInst.Insert(b).Return() } func (b *builder) VarLengthPool() *wazevoapi.VarLengthPool[Value] { @@ -215,10 +243,12 @@ func (b *builder) ReturnBlock() BasicBlock { func (b *builder) Init(s *Signature) { b.nextVariable = 0 b.currentSignature = s + b.zeros = [typeEnd]Value{ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid, ValueInvalid} resetBasicBlock(b.returnBlk) b.instructionsPool.Reset() b.basicBlocksPool.Reset() b.varLengthPool.Reset() + b.varLengthBasicBlockPool.Reset() b.donePreBlockLayoutPasses = false b.doneBlockLayout = false b.donePostBlockLayoutPasses = false @@ -231,11 +261,6 @@ func (b *builder) Init(s *Signature) { b.blkStack2 = b.blkStack2[:0] b.dominators = b.dominators[:0] b.loopNestingForestRoots = b.loopNestingForestRoots[:0] - - for i := 0; i < b.basicBlocksPool.Allocated(); i++ { - blk := b.basicBlocksPool.View(i) - delete(b.blkVisited, blk) - } b.basicBlocksPool.Reset() for v := ValueID(0); v < b.nextValueID; v++ { @@ -448,11 +473,6 @@ func (b *builder) findValueInLinearPath(variable Variable, blk *basicBlock) Valu return ValueInvalid } -func (b *builder) MustFindValueInBlk(variable Variable, blk BasicBlock) Value { - typ := b.definedVariableType(variable) - return b.findValue(typ, variable, blk.(*basicBlock)) -} - // MustFindValue implements Builder.MustFindValue. func (b *builder) MustFindValue(variable Variable) Value { typ := b.definedVariableType(variable) @@ -482,6 +502,9 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value value: value, }) return value + } else if blk.EntryBlock() { + // If this is the entry block, we reach the uninitialized variable which has zero value. + return b.zeros[b.definedVariableType(variable)] } if pred := blk.singlePred; pred != nil { @@ -495,21 +518,42 @@ func (b *builder) findValue(typ Type, variable Variable, blk *basicBlock) Value // If this block has multiple predecessors, we have to gather the definitions, // and treat them as an argument to this block. // - // The first thing is to define a new parameter to this block which may or may not be redundant, but - // later we eliminate trivial params in an optimization pass. This must be done before finding the - // definitions in the predecessors so that we can break the cycle. - paramValue := blk.AddParam(b, typ) - b.DefineVariable(variable, paramValue, blk) - - // After the new param is added, we have to manipulate the original branching instructions - // in predecessors so that they would pass the definition of `variable` as the argument to - // the newly added PHI. + // But before that, we have to check if the possible definitions are the same Value. + tmpValue := b.allocateValue(typ) + // Break the cycle by defining the variable with the tmpValue. + b.DefineVariable(variable, tmpValue, blk) + // Check all the predecessors if they have the same definition. + uniqueValue := ValueInvalid for i := range blk.preds { - pred := &blk.preds[i] - value := b.findValue(typ, variable, pred.blk) - pred.branch.addArgumentBranchInst(b, value) + predValue := b.findValue(typ, variable, blk.preds[i].blk) + if uniqueValue == ValueInvalid { + uniqueValue = predValue + } else if uniqueValue != predValue { + uniqueValue = ValueInvalid + break + } + } + + if uniqueValue != ValueInvalid { + // If all the predecessors have the same definition, we can use that value. + b.DefineVariable(variable, uniqueValue, blk) + b.alias(tmpValue, uniqueValue) + return uniqueValue + } else { + // Otherwise, add the tmpValue to this block as a parameter which may or may not be redundant, but + // later we eliminate trivial params in an optimization pass. This must be done before finding the + // definitions in the predecessors so that we can break the cycle. + blk.addParamOn(tmpValue) + // After the new param is added, we have to manipulate the original branching instructions + // in predecessors so that they would pass the definition of `variable` as the argument to + // the newly added PHI. + for i := range blk.preds { + pred := &blk.preds[i] + value := b.findValue(typ, variable, pred.blk) + pred.branch.addArgumentBranchInst(b, value) + } + return tmpValue } - return paramValue } // Seal implements Builder.Seal. @@ -523,7 +567,7 @@ func (b *builder) Seal(raw BasicBlock) { for _, v := range blk.unknownValues { variable, phiValue := v.variable, v.value typ := b.definedVariableType(variable) - blk.addParamOn(typ, phiValue) + blk.addParamOn(phiValue) for i := range blk.preds { pred := &blk.preds[i] predValue := b.findValue(typ, variable, pred.blk) @@ -566,7 +610,7 @@ func (b *builder) Format() string { } for bb := iterBegin(); bb != nil; bb = iterNext() { str.WriteByte('\n') - str.WriteString(bb.FormatHeader(b)) + str.WriteString(bb.formatHeader(b)) str.WriteByte('\n') for cur := bb.Root(); cur != nil; cur = cur.Next() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go index a2e986cd1..89ec34b7e 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass.go @@ -22,9 +22,9 @@ func (b *builder) RunPasses() { func (b *builder) runPreBlockLayoutPasses() { passSortSuccessors(b) passDeadBlockEliminationOpt(b) - passRedundantPhiEliminationOpt(b) // The result of passCalculateImmediateDominators will be used by various passes below. passCalculateImmediateDominators(b) + passRedundantPhiEliminationOpt(b) passNopInstElimination(b) // TODO: implement either conversion of irreducible CFG into reducible one, or irreducible CFG detection where we panic. @@ -78,12 +78,11 @@ func (b *builder) runFinalizingPasses() { // passDeadBlockEliminationOpt searches the unreachable blocks, and sets the basicBlock.invalid flag true if so. func passDeadBlockEliminationOpt(b *builder) { entryBlk := b.entryBlk() - b.clearBlkVisited() b.blkStack = append(b.blkStack, entryBlk) for len(b.blkStack) > 0 { reachableBlk := b.blkStack[len(b.blkStack)-1] b.blkStack = b.blkStack[:len(b.blkStack)-1] - b.blkVisited[reachableBlk] = 0 // the value won't be used in this pass. + reachableBlk.visited = 1 if !reachableBlk.sealed && !reachableBlk.ReturnBlock() { panic(fmt.Sprintf("%s is not sealed", reachableBlk)) @@ -94,7 +93,7 @@ func passDeadBlockEliminationOpt(b *builder) { } for _, succ := range reachableBlk.success { - if _, ok := b.blkVisited[succ]; ok { + if succ.visited == 1 { continue } b.blkStack = append(b.blkStack, succ) @@ -102,13 +101,16 @@ func passDeadBlockEliminationOpt(b *builder) { } for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { - if _, ok := b.blkVisited[blk]; !ok { + if blk.visited != 1 { blk.invalid = true } + blk.visited = 0 } } // passRedundantPhiEliminationOpt eliminates the redundant PHIs (in our terminology, parameters of a block). +// This requires the reverse post-order traversal to be calculated before calling this function, +// hence passCalculateImmediateDominators must be called before this. func passRedundantPhiEliminationOpt(b *builder) { redundantParameterIndexes := b.ints[:0] // reuse the slice from previous iterations. @@ -118,15 +120,18 @@ func passRedundantPhiEliminationOpt(b *builder) { // relatively small. For example, sqlite speedtest binary results in the large number of redundant PHIs, // the maximum number of iteration was 22, which seems to be acceptable but not that small either since the // complexity here is O(BlockNum * Iterations) at the worst case where BlockNum might be the order of thousands. + // -- Note -- + // Currently, each iteration can run in any order of blocks, but it empirically converges quickly in practice when + // running on the reverse post-order. It might be possible to optimize this further by using the dominator tree. for { changed := false - _ = b.blockIteratorBegin() // skip entry block! + _ = b.blockIteratorReversePostOrderBegin() // skip entry block! // Below, we intentionally use the named iteration variable name, as this comes with inevitable nested for loops! - for blk := b.blockIteratorNext(); blk != nil; blk = b.blockIteratorNext() { + for blk := b.blockIteratorReversePostOrderNext(); blk != nil; blk = b.blockIteratorReversePostOrderNext() { paramNum := len(blk.params) for paramIndex := 0; paramIndex < paramNum; paramIndex++ { - phiValue := blk.params[paramIndex].value + phiValue := blk.params[paramIndex] redundant := true nonSelfReferencingValue := ValueInvalid @@ -184,7 +189,7 @@ func passRedundantPhiEliminationOpt(b *builder) { // Still need to have the definition of the value of the PHI (previously as the parameter). for _, redundantParamIndex := range redundantParameterIndexes { - phiValue := blk.params[redundantParamIndex].value + phiValue := blk.params[redundantParamIndex] onlyValue := b.redundantParameterIndexToValue[redundantParamIndex] // Create an alias in this block from the only phi argument to the phi value. b.alias(phiValue, onlyValue) @@ -227,10 +232,10 @@ func passRedundantPhiEliminationOpt(b *builder) { func passDeadCodeEliminationOpt(b *builder) { nvid := int(b.nextValueID) if nvid >= len(b.valueRefCounts) { - b.valueRefCounts = append(b.valueRefCounts, make([]int, b.nextValueID)...) + b.valueRefCounts = append(b.valueRefCounts, make([]int, nvid-len(b.valueRefCounts)+1)...) } if nvid >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, nvid-len(b.valueIDToInstruction)+1)...) } // First, we gather all the instructions with side effects. @@ -350,22 +355,10 @@ func (b *builder) incRefCount(id ValueID, from *Instruction) { b.valueRefCounts[id]++ } -// clearBlkVisited clears the b.blkVisited map so that we can reuse it for multiple places. -func (b *builder) clearBlkVisited() { - b.blkStack2 = b.blkStack2[:0] - for key := range b.blkVisited { - b.blkStack2 = append(b.blkStack2, key) - } - for _, blk := range b.blkStack2 { - delete(b.blkVisited, blk) - } - b.blkStack2 = b.blkStack2[:0] -} - // passNopInstElimination eliminates the instructions which is essentially a no-op. func passNopInstElimination(b *builder) { if int(b.nextValueID) >= len(b.valueIDToInstruction) { - b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, b.nextValueID)...) + b.valueIDToInstruction = append(b.valueIDToInstruction, make([]*Instruction, int(b.nextValueID)-len(b.valueIDToInstruction)+1)...) } for blk := b.blockIteratorBegin(); blk != nil; blk = b.blockIteratorNext() { diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go index 9068180a0..584b5eade 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_blk_layouts.go @@ -23,8 +23,6 @@ import ( // // This heuristic is done in maybeInvertBranches function. func passLayoutBlocks(b *builder) { - b.clearBlkVisited() - // We might end up splitting critical edges which adds more basic blocks, // so we store the currently existing basic blocks in nonSplitBlocks temporarily. // That way we can iterate over the original basic blocks while appending new ones into reversePostOrderedBasicBlocks. @@ -47,20 +45,20 @@ func passLayoutBlocks(b *builder) { for _, blk := range nonSplitBlocks { for i := range blk.preds { pred := blk.preds[i].blk - if _, ok := b.blkVisited[pred]; ok || !pred.Valid() { + if pred.visited == 1 || !pred.Valid() { continue } else if pred.reversePostOrder < blk.reversePostOrder { // This means the edge is critical, and this pred is the trampoline and yet to be inserted. // Split edge trampolines must come before the destination in reverse post-order. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, pred) - b.blkVisited[pred] = 0 // mark as inserted, the value is not used. + pred.visited = 1 // mark as inserted. } } // Now that we've already added all the potential trampoline blocks incoming to this block, // we can add this block itself. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, blk) - b.blkVisited[blk] = 0 // mark as inserted, the value is not used. + blk.visited = 1 // mark as inserted. if len(blk.success) < 2 { // There won't be critical edge originating from this block. @@ -116,7 +114,7 @@ func passLayoutBlocks(b *builder) { if fallthroughBranch.opcode == OpcodeJump && fallthroughBranch.blk == trampoline { // This can be lowered as fallthrough at the end of the block. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } else { uninsertedTrampolines = append(uninsertedTrampolines, trampoline) } @@ -126,7 +124,7 @@ func passLayoutBlocks(b *builder) { if trampoline.success[0].reversePostOrder <= trampoline.reversePostOrder { // "<=", not "<" because the target might be itself. // This means the critical edge was backward, so we insert after the current block immediately. b.reversePostOrderedBasicBlocks = append(b.reversePostOrderedBasicBlocks, trampoline) - b.blkVisited[trampoline] = 0 // mark as inserted, the value is not used. + trampoline.visited = 1 // mark as inserted. } // If the target is forward, we can wait to insert until the target is inserted. } uninsertedTrampolines = uninsertedTrampolines[:0] // Reuse the stack for the next block. @@ -142,8 +140,8 @@ func passLayoutBlocks(b *builder) { if wazevoapi.SSAValidationEnabled { for _, trampoline := range trampolines { - if _, ok := b.blkVisited[trampoline]; !ok { - panic("BUG: trampoline block not inserted: " + trampoline.FormatHeader(b)) + if trampoline.visited != 1 { + panic("BUG: trampoline block not inserted: " + trampoline.formatHeader(b)) } trampoline.validate(b) } diff --git a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go index 50cb9c475..e8288c4bd 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go +++ b/vendor/github.com/tetratelabs/wazero/internal/engine/wazevo/ssa/pass_cfg.go @@ -15,10 +15,6 @@ import ( // At the last of pass, this function also does the loop detection and sets the basicBlock.loop flag. func passCalculateImmediateDominators(b *builder) { reversePostOrder := b.reversePostOrderedBasicBlocks[:0] - exploreStack := b.blkStack[:0] - b.clearBlkVisited() - - entryBlk := b.entryBlk() // Store the reverse postorder from the entrypoint into reversePostOrder slice. // This calculation of reverse postorder is not described in the paper, @@ -28,14 +24,17 @@ func passCalculateImmediateDominators(b *builder) { // which is a reasonable assumption as long as SSA Builder is properly used. // // First we push blocks in postorder iteratively visit successors of the entry block. - exploreStack = append(exploreStack, entryBlk) + entryBlk := b.entryBlk() + exploreStack := append(b.blkStack[:0], entryBlk) + // These flags are used to track the state of the block in the DFS traversal. + // We temporarily use the reversePostOrder field to store the state. const visitStateUnseen, visitStateSeen, visitStateDone = 0, 1, 2 - b.blkVisited[entryBlk] = visitStateSeen + entryBlk.visited = visitStateSeen for len(exploreStack) > 0 { tail := len(exploreStack) - 1 blk := exploreStack[tail] exploreStack = exploreStack[:tail] - switch b.blkVisited[blk] { + switch blk.visited { case visitStateUnseen: // This is likely a bug in the frontend. panic("BUG: unsupported CFG") @@ -48,16 +47,18 @@ func passCalculateImmediateDominators(b *builder) { if succ.ReturnBlock() || succ.invalid { continue } - if b.blkVisited[succ] == visitStateUnseen { - b.blkVisited[succ] = visitStateSeen + if succ.visited == visitStateUnseen { + succ.visited = visitStateSeen exploreStack = append(exploreStack, succ) } } // Finally, we could pop this block once we pop all of its successors. - b.blkVisited[blk] = visitStateDone + blk.visited = visitStateDone case visitStateDone: // Note: at this point we push blk in postorder despite its name. reversePostOrder = append(reversePostOrder, blk) + default: + panic("BUG") } } // At this point, reversePostOrder has postorder actually, so we reverse it. @@ -67,7 +68,7 @@ func passCalculateImmediateDominators(b *builder) { } for i, blk := range reversePostOrder { - blk.reversePostOrder = i + blk.reversePostOrder = int32(i) } // Reuse the dominators slice if possible from the previous computation of function. @@ -180,7 +181,7 @@ func passBuildLoopNestingForest(b *builder) { b.loopNestingForestRoots = append(b.loopNestingForestRoots, blk) } else if n == ent { } else if n.loopHeader { - n.loopNestingForestChildren = append(n.loopNestingForestChildren, blk) + n.loopNestingForestChildren = n.loopNestingForestChildren.Append(&b.varLengthBasicBlockPool, blk) } } @@ -193,7 +194,7 @@ func passBuildLoopNestingForest(b *builder) { func printLoopNestingForest(root *basicBlock, depth int) { fmt.Println(strings.Repeat("\t", depth), "loop nesting forest root:", root.ID()) - for _, child := range root.loopNestingForestChildren { + for _, child := range root.loopNestingForestChildren.View() { fmt.Println(strings.Repeat("\t", depth+1), "child:", child.ID()) if child.LoopHeader() { printLoopNestingForest(child.(*basicBlock), depth+2) @@ -202,10 +203,10 @@ func printLoopNestingForest(root *basicBlock, depth int) { } type dominatorSparseTree struct { - time int + time int32 euler []*basicBlock - first, depth []int - table [][]int + first, depth []int32 + table [][]int32 } // passBuildDominatorTree builds the dominator tree for the function, and constructs builder.sparseTree. @@ -232,11 +233,11 @@ func passBuildDominatorTree(b *builder) { n := b.basicBlocksPool.Allocated() st := &b.sparseTree st.euler = append(st.euler[:0], make([]*basicBlock, 2*n-1)...) - st.first = append(st.first[:0], make([]int, n)...) + st.first = append(st.first[:0], make([]int32, n)...) for i := range st.first { st.first[i] = -1 } - st.depth = append(st.depth[:0], make([]int, 2*n-1)...) + st.depth = append(st.depth[:0], make([]int32, 2*n-1)...) st.time = 0 // Start building the sparse tree. @@ -244,9 +245,9 @@ func passBuildDominatorTree(b *builder) { st.buildSparseTable() } -func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int) { +func (dt *dominatorSparseTree) eulerTour(node *basicBlock, height int32) { if wazevoapi.SSALoggingEnabled { - fmt.Println(strings.Repeat("\t", height), "euler tour:", node.ID()) + fmt.Println(strings.Repeat("\t", int(height)), "euler tour:", node.ID()) } dt.euler[dt.time] = node dt.depth[dt.time] = height @@ -270,13 +271,13 @@ func (dt *dominatorSparseTree) buildSparseTable() { table := dt.table if n >= len(table) { - table = append(table, make([][]int, n+1)...) + table = append(table, make([][]int32, n-len(table)+1)...) } for i := range table { if len(table[i]) < k { - table[i] = append(table[i], make([]int, k)...) + table[i] = append(table[i], make([]int32, k-len(table[i]))...) } - table[i][0] = i + table[i][0] = int32(i) } for j := 1; 1< 0 { - m.NonStaticLocals[idx] = nonStaticLocals - } - functionType := &m.TypeSection[m.FunctionSection[idx]] code := &m.CodeSection[idx] body := code.Body @@ -357,7 +352,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalSetName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] @@ -373,7 +367,6 @@ func (m *Module) validateFunctionWithMaxStackValues( return fmt.Errorf("invalid local index for %s %d >= %d(=len(locals)+len(parameters))", OpcodeLocalTeeName, index, l) } - nonStaticLocals[index] = struct{}{} var expType ValueType if index < inputLen { expType = functionType.Params[index] diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go index 5cc5012da..947b16112 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/memory.go @@ -59,11 +59,14 @@ type MemoryInstance struct { // with a fixed weight of 1 and no spurious notifications. waiters sync.Map + // ownerModuleEngine is the module engine that owns this memory instance. + ownerModuleEngine ModuleEngine + expBuffer experimental.LinearMemory } // NewMemoryInstance creates a new instance based on the parameters in the SectionIDMemory. -func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) *MemoryInstance { +func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator, moduleEngine ModuleEngine) *MemoryInstance { minBytes := MemoryPagesToBytesNum(memSec.Min) capBytes := MemoryPagesToBytesNum(memSec.Cap) maxBytes := MemoryPagesToBytesNum(memSec.Max) @@ -89,12 +92,13 @@ func NewMemoryInstance(memSec *Memory, allocator experimental.MemoryAllocator) * buffer = make([]byte, minBytes, capBytes) } return &MemoryInstance{ - Buffer: buffer, - Min: memSec.Min, - Cap: memoryBytesNumToPages(uint64(cap(buffer))), - Max: memSec.Max, - Shared: memSec.IsShared, - expBuffer: expBuffer, + Buffer: buffer, + Min: memSec.Min, + Cap: memoryBytesNumToPages(uint64(cap(buffer))), + Max: memSec.Max, + Shared: memSec.IsShared, + expBuffer: expBuffer, + ownerModuleEngine: moduleEngine, } } @@ -247,14 +251,12 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { m.Buffer = buffer m.Cap = newPages } - return currentPages, true } else if newPages > m.Cap { // grow the memory. if m.Shared { panic("shared memory cannot be grown, this is a bug in wazero") } m.Buffer = append(m.Buffer, make([]byte, MemoryPagesToBytesNum(delta))...) m.Cap = newPages - return currentPages, true } else { // We already have the capacity we need. if m.Shared { // We assume grow is called under a guest lock. @@ -264,8 +266,9 @@ func (m *MemoryInstance) Grow(delta uint32) (result uint32, ok bool) { } else { m.Buffer = m.Buffer[:MemoryPagesToBytesNum(newPages)] } - return currentPages, true } + m.ownerModuleEngine.MemoryGrown() + return currentPages, true } // Pages implements the same method as documented on api.Memory. diff --git a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go index 68573b918..8369ad9ed 100644 --- a/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go +++ b/vendor/github.com/tetratelabs/wazero/internal/wasm/module.go @@ -185,9 +185,6 @@ type Module struct { // as described in https://yurydelendik.github.io/webassembly-dwarf/, though it is not specified in the Wasm // specification: https://github.com/WebAssembly/debugging/issues/1 DWARFLines *wasmdebug.DWARFLines - - // NonStaticLocals collects the local indexes that will change its value through either local.get or local.tee. - NonStaticLocals []map[Index]struct{} } // ModuleID represents sha256 hash value uniquely assigned to Module. @@ -366,8 +363,6 @@ func (m *Module) validateFunctions(enabledFeatures api.CoreFeatures, functions [ br := bytes.NewReader(nil) // Also, we reuse the stacks across multiple function validations to reduce allocations. vs := &stacks{} - // Non-static locals are gathered during validation and used in the down-stream compilation. - m.NonStaticLocals = make([]map[Index]struct{}, len(m.FunctionSection)) for idx, typeIndex := range m.FunctionSection { if typeIndex >= typeCount { return fmt.Errorf("invalid %s: type section index %d out of range", m.funcDesc(SectionIDFunction, Index(idx)), typeIndex) @@ -655,7 +650,7 @@ func paramNames(localNames IndirectNameMap, funcIdx uint32, paramLen int) []stri func (m *ModuleInstance) buildMemory(module *Module, allocator experimental.MemoryAllocator) { memSec := module.MemorySection if memSec != nil { - m.MemoryInstance = NewMemoryInstance(memSec, allocator) + m.MemoryInstance = NewMemoryInstance(memSec, allocator, m.Engine) m.MemoryInstance.definition = &module.MemoryDefinitionSection[0] } } diff --git a/vendor/modules.txt b/vendor/modules.txt index d0fd99a6c..201dcdd5c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -517,7 +517,7 @@ github.com/modern-go/concurrent # github.com/modern-go/reflect2 v1.0.2 ## explicit; go 1.12 github.com/modern-go/reflect2 -# github.com/ncruces/go-sqlite3 v0.16.1 +# github.com/ncruces/go-sqlite3 v0.16.2 ## explicit; go 1.21 github.com/ncruces/go-sqlite3 github.com/ncruces/go-sqlite3/driver @@ -833,7 +833,7 @@ github.com/tdewolff/parse/v2/strconv # github.com/technologize/otel-go-contrib v1.1.1 ## explicit; go 1.17 github.com/technologize/otel-go-contrib/otelginmetrics -# github.com/tetratelabs/wazero v1.7.2 +# github.com/tetratelabs/wazero v1.7.3 ## explicit; go 1.20 github.com/tetratelabs/wazero github.com/tetratelabs/wazero/api