From 4f511e21258267d17c16eb9f2e335f83f8c06a3e Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 9 Nov 2023 09:32:10 +0800 Subject: [PATCH] =?UTF-8?q?internal/subtle=20xor=20=E7=AE=80=E5=8C=96?= =?UTF-8?q?=E4=BB=A3=E7=A0=81=EF=BC=8C=E5=8E=BB=E9=99=A4=E9=87=8D=E5=A4=8D?= =?UTF-8?q?=20#75?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- internal/subtle/xor.go | 24 ++++++++++ internal/subtle/xor_amd64.go | 17 ------- internal/subtle/xor_arm64.go | 18 ------- internal/subtle/xor_generic.go | 85 +++++++++++++++------------------- 4 files changed, 62 insertions(+), 82 deletions(-) create mode 100644 internal/subtle/xor.go diff --git a/internal/subtle/xor.go b/internal/subtle/xor.go new file mode 100644 index 0000000..cdc4529 --- /dev/null +++ b/internal/subtle/xor.go @@ -0,0 +1,24 @@ +// Copyright 2022 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package subtle + +// XORBytes sets dst[i] = x[i] ^ y[i] for all i < n = min(len(x), len(y)), +// returning n, the number of bytes written to dst. +// If dst does not have length at least n, +// XORBytes panics without writing anything to dst. +func XORBytes(dst, x, y []byte) int { + n := len(x) + if len(y) < n { + n = len(y) + } + if n == 0 { + return 0 + } + if n > len(dst) { + panic("subtle.XORBytes: dst too short") + } + xorBytes(&dst[0], &x[0], &y[0], n) // arch-specific + return n +} diff --git a/internal/subtle/xor_amd64.go b/internal/subtle/xor_amd64.go index bb4d85c..fb6bccb 100644 --- a/internal/subtle/xor_amd64.go +++ b/internal/subtle/xor_amd64.go @@ -10,22 +10,5 @@ import "golang.org/x/sys/cpu" var useAVX2 = cpu.X86.HasAVX2 -// XORBytes xors the bytes in a and b. The destination should have enough -// space, otherwise XORBytes will panic. Returns the number of bytes xor'd. -func XORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - if n > len(dst) { - panic("subtle.XORBytes: dst too short") - } - xorBytes(&dst[0], &a[0], &b[0], n) // amd64 must have SSE2 - return n -} - //go:noescape func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_arm64.go b/internal/subtle/xor_arm64.go index 15fcb5f..912678e 100644 --- a/internal/subtle/xor_arm64.go +++ b/internal/subtle/xor_arm64.go @@ -6,23 +6,5 @@ package subtle -// XORBytes xors the bytes in a and b. The destination should have enough -// space, otherwise XORBytes will panic. Returns the number of bytes xor'd. -func XORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - if n > len(dst) { - panic("subtle.XORBytes: dst too short") - } - - xorBytes(&dst[0], &a[0], &b[0], n) - return n -} - //go:noescape func xorBytes(dst, a, b *byte, n int) diff --git a/internal/subtle/xor_generic.go b/internal/subtle/xor_generic.go index 05033c1..2d7f1d5 100644 --- a/internal/subtle/xor_generic.go +++ b/internal/subtle/xor_generic.go @@ -11,63 +11,54 @@ import ( "unsafe" ) -const wordSize = int(unsafe.Sizeof(uintptr(0))) +const wordSize = unsafe.Sizeof(uintptr(0)) const supportsUnaligned = runtime.GOARCH == "386" || + runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x" -// XORBytes xors the bytes in a and b. The destination should have enough -// space, otherwise XORBytes will panic. Returns the number of bytes xor'd. -func XORBytes(dst, a, b []byte) int { - n := len(a) - if len(b) < n { - n = len(b) - } - if n == 0 { - return 0 - } - if n > len(dst) { - panic("subtle.XORBytes: dst too short") - } +func xorBytes(dstb, xb, yb *byte, n int) { + // xorBytes assembly is written using pointers and n. Back to slices. + dst := unsafe.Slice(dstb, n) + x := unsafe.Slice(xb, n) + y := unsafe.Slice(yb, n) - switch { - case supportsUnaligned: - fastXORBytes(dst, a, b, n) - default: - // TODO(hanwen): if (dst, a, b) have common alignment - // we could still try fastXORBytes. It is not clear - // how often this happens, and it's only worth it if - // the block encryption itself is hardware - // accelerated. - safeXORBytes(dst, a, b, n) - } - return n -} - -// fastXORBytes xors in bulk. It only works on architectures that -// support unaligned read/writes. -// n needs to be smaller or equal than the length of a and b. -func fastXORBytes(dst, a, b []byte, n int) { - w := n / wordSize - if w > 0 { - dw := *(*[]uintptr)(unsafe.Pointer(&dst)) - aw := *(*[]uintptr)(unsafe.Pointer(&a)) - bw := *(*[]uintptr)(unsafe.Pointer(&b)) - for i := 0; i < w; i++ { - dw[i] = aw[i] ^ bw[i] + if supportsUnaligned || aligned(dstb, xb, yb) { + xorLoop(words(dst), words(x), words(y)) + if uintptr(n)%wordSize == 0 { + return } + done := n &^ int(wordSize-1) + dst = dst[done:] + x = x[done:] + y = y[done:] } - - for i := (n - n%wordSize); i < n; i++ { - dst[i] = a[i] ^ b[i] - } + xorLoop(dst, x, y) } -// n needs to be smaller or equal than the length of a and b. -func safeXORBytes(dst, a, b []byte, n int) { - for i := 0; i < n; i++ { - dst[i] = a[i] ^ b[i] +// aligned reports whether dst, x, and y are all word-aligned pointers. +func aligned(dst, x, y *byte) bool { + return (uintptr(unsafe.Pointer(dst))|uintptr(unsafe.Pointer(x))|uintptr(unsafe.Pointer(y)))&(wordSize-1) == 0 +} + +// words returns a []uintptr pointing at the same data as x, +// with any trailing partial word removed. +func words(x []byte) []uintptr { + n := uintptr(len(x)) / wordSize + if n == 0 { + // Avoid creating a *uintptr that refers to data smaller than a uintptr; + // see issue 59334. + return nil + } + return unsafe.Slice((*uintptr)(unsafe.Pointer(&x[0])), n) +} + +func xorLoop[T byte | uintptr](dst, x, y []T) { + x = x[:len(dst)] // remove bounds check in loop + y = y[:len(dst)] // remove bounds check in loop + for i := range dst { + dst[i] = x[i] ^ y[i] } }