diff --git a/.github/workflows/test_qemu.yml b/.github/workflows/test_qemu.yml index 3d2aadf..632aff1 100644 --- a/.github/workflows/test_qemu.yml +++ b/.github/workflows/test_qemu.yml @@ -30,7 +30,7 @@ jobs: uses: actions/checkout@v3 - name: Test - run: go test -v -short ./... + run: go test -v -short ./cipher/... env: DISABLE_SM3NI: 1 DISABLE_SM4NI: 1 diff --git a/cipher/xts_arm64.s b/cipher/xts_arm64.s new file mode 100644 index 0000000..02a13e7 --- /dev/null +++ b/cipher/xts_arm64.s @@ -0,0 +1,150 @@ +//go:build arm64 && !purego +// +build arm64,!purego + +#include "textflag.h" + +#define B0 V0 +#define T1 V1 +#define T2 V2 + +#define POLY V3 +#define ZERO V4 + +#define tweak R0 +#define GB R1 +#define I R2 + +// func mul2(tweak *[blockSize]byte, isGB bool) +TEXT ·mul2(SB),NOSPLIT,$0 + MOVD tweak+0(FP), tweak + MOVB isGB+8(FP), GB + + VLD1 (tweak), [B0.B16] + + VEOR POLY.B16, POLY.B16, POLY.B16 + VEOR ZERO.B16, ZERO.B16, ZERO.B16 + + CMP $1, GB + BEQ gb_alg + + MOVD $0x87, I + VMOV I, POLY.D[0] + + VMOV B0.D[1], I + ASR $63, I + VMOV I, T1.D[0] + VMOV I, T1.D[1] + VAND POLY.B16, T1.B16, T1.B16 + + VUSHR $63, B0.D2, T2.D2 + VEXT $8, T2.B16, ZERO.B16, T2.B16 + VSHL $1, B0.D2, B0.D2 + VEOR T1.B16, B0.B16, B0.B16 + VEOR T2.B16, B0.B16, B0.B16 + + VST1 [B0.B16], (tweak) + RET + +gb_alg: + MOVD $0xE1, I + LSL $56, I + VMOV I, POLY.D[1] + + VREV64 B0.B16, B0.B16 + VEXT $8, B0.B16, B0.B16, B0.B16 + + VMOV B0.D[0], I + LSL $63, I + ASR $63, I + VMOV I, T1.D[0] + VMOV I, T1.D[1] + VAND POLY.B16, T1.B16, T1.B16 + + VSHL $63, B0.D2, T2.D2 + VEXT $8, ZERO.B16, T2.B16, T2.B16 + VUSHR $1, B0.D2, B0.D2 + VEOR T1.B16, B0.B16, B0.B16 + VEOR T2.B16, B0.B16, B0.B16 + + VEXT $8, B0.B16, B0.B16, B0.B16 + VREV64 B0.B16, B0.B16 + + VST1 [B0.B16], (tweak) + RET + +// func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) +TEXT ·doubleTweaks(SB),NOSPLIT,$0 + MOVD tweak+0(FP), tweak + MOVD tweaks+8(FP), R3 + MOVD tweaks_len+16(FP), R4 + MOVB isGB+32(FP), GB + + LSR $4, R4 + EOR R5, R5 + + VEOR POLY.B16, POLY.B16, POLY.B16 + VEOR ZERO.B16, ZERO.B16, ZERO.B16 + + VLD1 (tweak), [B0.B16] + + CMP $1, GB + BEQ dt_gb_alg + + MOVD $0x87, I + VMOV I, POLY.D[0] + +loop: + VST1.P [B0.B16], 16(R3) + + VMOV B0.D[1], I + ASR $63, I + VMOV I, T1.D[0] + VMOV I, T1.D[1] + VAND POLY.B16, T1.B16, T1.B16 + + VUSHR $63, B0.D2, T2.D2 + VEXT $8, T2.B16, ZERO.B16, T2.B16 + VSHL $1, B0.D2, B0.D2 + VEOR T1.B16, B0.B16, B0.B16 + VEOR T2.B16, B0.B16, B0.B16 + + ADD $1, R5 + CMP R4, R5 + BNE loop + + VST1 [B0.B16], (tweak) + RET + +dt_gb_alg: + MOVD $0xE1, I + LSL $56, I + VMOV I, POLY.D[1] + +gb_loop: + VST1.P [B0.B16], 16(R3) + + VREV64 B0.B16, B0.B16 + VEXT $8, B0.B16, B0.B16, B0.B16 + + VMOV B0.D[0], I + LSL $63, I + ASR $63, I + VMOV I, T1.D[0] + VMOV I, T1.D[1] + VAND POLY.B16, T1.B16, T1.B16 + + VSHL $63, B0.D2, T2.D2 + VEXT $8, ZERO.B16, T2.B16, T2.B16 + VUSHR $1, B0.D2, B0.D2 + VEOR T1.B16, B0.B16, B0.B16 + VEOR T2.B16, B0.B16, B0.B16 + + VEXT $8, B0.B16, B0.B16, B0.B16 + VREV64 B0.B16, B0.B16 + + ADD $1, R5 + CMP R4, R5 + BNE gb_loop + + VST1 [B0.B16], (tweak) + RET diff --git a/cipher/xts_asm.go b/cipher/xts_asm.go index 2b824a5..3606e6a 100644 --- a/cipher/xts_asm.go +++ b/cipher/xts_asm.go @@ -1,5 +1,5 @@ -//go:build amd64 && !purego -// +build amd64,!purego +//go:build (amd64 && !purego) || (arm64 && !purego) +// +build amd64,!purego arm64,!purego package cipher diff --git a/cipher/xts_asm_test.go b/cipher/xts_asm_test.go index d14be04..376bbdd 100644 --- a/cipher/xts_asm_test.go +++ b/cipher/xts_asm_test.go @@ -1,5 +1,5 @@ -//go:build amd64 && !purego -// +build amd64,!purego +//go:build (amd64 && !purego) || (arm64 && !purego) +// +build amd64,!purego arm64,!purego package cipher diff --git a/cipher/xts_generic.go b/cipher/xts_generic.go index 01c812d..6c17d6a 100644 --- a/cipher/xts_generic.go +++ b/cipher/xts_generic.go @@ -1,5 +1,5 @@ -//go:build !amd64 || purego -// +build !amd64 purego +//go:build !amd64 && !arm64 || purego +// +build !amd64,!arm64 purego package cipher