diff --git a/.travis.yml b/.travis.yml index e1a0a75..2e5f8f9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,14 +1,14 @@ language: go go: -# - "1.15.x" -# - "1.16.x" + - "1.15.x" + - "1.16.x" - "1.17.x" jobs: include: - # - arch: ppc64le - # go: 1.17.x + - arch: ppc64le + go: 1.17.x - arch: arm64-graviton2 virt: vm os: linux @@ -21,7 +21,6 @@ install: - go mod download script: - - go build ./... - go test -v ./... after_success: diff --git a/README.md b/README.md index b5c2b77..4fed5b0 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ This is a **SM2 sm2p256v1** implementation whose performance is similar like gol This is also a **SM3** implementation whose performance is similar like golang native SHA 256 with SIMD under **amd64**, for implementation detail, please refer [SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96). -For **SM4** implementation, SIMD & AES-NI are used under **amd64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), support CBC/CFB/OFB/CTR/GCM/CCM/XTS modes. +For **SM4** implementation, SIMD & AES-NI are used under **amd64** and **arm64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), support CBC/CFB/OFB/CTR/GCM/CCM/XTS modes. **SM2 encryption Benchmark** diff --git a/sm4/asm_amd64.s b/sm4/asm_amd64.s index 005f4c4..8533b00 100644 --- a/sm4/asm_amd64.s +++ b/sm4/asm_amd64.s @@ -100,7 +100,7 @@ GLOBL fk_mask<>(SB), RODATA, $16 MOVOU y, XTMP6; \ PSLLL $2, XTMP6; \ PSRLL $30, y; \ - PXOR XTMP6, y; \ //y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30); + POR XTMP6, y; \ //y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30); MOVOU x, XTMP7; \ PSHUFB r24_mask<>(SB), XTMP7; \ PXOR y, x; \ //x = x xor y diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 8fb24be..afb56a0 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -8,116 +8,97 @@ #define t3 V5 #define ZERO V16 #define FLIP_MASK V17 - +#define NIBBLE_MASK V20 +#define INVERSE_SHIFT_ROWS V21 +#define M1L V22 +#define M1H V23 +#define M2L V24 +#define M2H V25 +#define R08_MASK V26 +#define R16_MASK V27 +#define R24_MASK V28 +#define FK_MASK V29 #define XTMP6 V6 #define XTMP7 V7 // shuffle byte order from LE to BE DATA flip_mask<>+0x00(SB)/8, $0x0405060700010203 DATA flip_mask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b -GLOBL flip_mask<>(SB), RODATA, $16 +GLOBL flip_mask<>(SB), (NOPTR+RODATA), $16 //nibble mask DATA nibble_mask<>+0x00(SB)/8, $0x0F0F0F0F0F0F0F0F DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F -GLOBL nibble_mask<>(SB), RODATA, $16 +GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 // inverse shift rows DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 -GLOBL inverse_shift_rows<>(SB), RODATA, $16 +GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 // Affine transform 1 (low and high hibbles) DATA m1_low<>+0x00(SB)/8, $0x9197E2E474720701 DATA m1_low<>+0x08(SB)/8, $0xC7C1B4B222245157 -GLOBL m1_low<>(SB), RODATA, $16 +GLOBL m1_low<>(SB), (NOPTR+RODATA), $16 DATA m1_high<>+0x00(SB)/8, $0xE240AB09EB49A200 DATA m1_high<>+0x08(SB)/8, $0xF052B91BF95BB012 -GLOBL m1_high<>(SB), RODATA, $16 +GLOBL m1_high<>(SB), (NOPTR+RODATA), $16 // Affine transform 2 (low and high hibbles) DATA m2_low<>+0x00(SB)/8, $0x5B67F2CEA19D0834 DATA m2_low<>+0x08(SB)/8, $0xEDD14478172BBE82 -GLOBL m2_low<>(SB), RODATA, $16 +GLOBL m2_low<>(SB), (NOPTR+RODATA), $16 DATA m2_high<>+0x00(SB)/8, $0xAE7201DD73AFDC00 DATA m2_high<>+0x08(SB)/8, $0x11CDBE62CC1063BF -GLOBL m2_high<>(SB), RODATA, $16 +GLOBL m2_high<>(SB), (NOPTR+RODATA), $16 // left rotations of 32-bit words by 8-bit increments DATA r08_mask<>+0x00(SB)/8, $0x0605040702010003 DATA r08_mask<>+0x08(SB)/8, $0x0E0D0C0F0A09080B -GLOBL r08_mask<>(SB), RODATA, $16 +GLOBL r08_mask<>(SB), (NOPTR+RODATA), $16 DATA r16_mask<>+0x00(SB)/8, $0x0504070601000302 DATA r16_mask<>+0x08(SB)/8, $0x0D0C0F0E09080B0A -GLOBL r16_mask<>(SB), RODATA, $16 +GLOBL r16_mask<>(SB), (NOPTR+RODATA), $16 DATA r24_mask<>+0x00(SB)/8, $0x0407060500030201 DATA r24_mask<>+0x08(SB)/8, $0x0C0F0E0D080B0A09 -GLOBL r24_mask<>(SB), RODATA, $16 +GLOBL r24_mask<>(SB), (NOPTR+RODATA), $16 DATA fk_mask<>+0x00(SB)/8, $0x56aa3350a3b1bac6 DATA fk_mask<>+0x08(SB)/8, $0xb27022dc677d9197 -GLOBL fk_mask<>(SB), RODATA, $16 +GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16 #define SM4_SBOX(x, y) \ ; \ //############################# inner affine ############################// - LDP nibble_mask<>(SB), (R0, R1); \ - VMOV R0, XTMP6.D[0]; \ - VMOV R1, XTMP6.D[1]; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m1_low<>(SB), (R0, R1); \ - VMOV R0, y.D[0]; \ - VMOV R1, y.D[1]; \ - VTBL XTMP7.B16, [y.B16], y.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M1L.B16], y.B16; \ VUSHR $4, x.D2, x.D2; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m1_low<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \ VEOR y.B16, XTMP7.B16, x.B16; \ - LDP inverse_shift_rows<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL V8.B16, [x.B16], x.B16; \ + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \ AESE ZERO.B16, x.B16; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m2_low<>(SB), (R0, R1); \ - VMOV R0, y.D[0]; \ - VMOV R1, y.D[1]; \ - VTBL XTMP7.B16, [y.B16], y.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M2L.B16], y.B16; \ VUSHR $4, x.D2, x.D2; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m2_high<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \ - VEOR y.B16, XTMP7.B16, x.B16; \ - + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \ + VEOR y.B16, XTMP7.B16, x.B16 #define SM4_TAO_L1(x, y) \ SM4_SBOX(x, y); \ ; \ //#################### 4 parallel L1 linear transforms ##################// - LDP r08_mask<>(SB), (R0, R1); \ - VMOV R0, XTMP7.D[0]; \ - VMOV R1, XTMP7.D[1]; \ - VTBL XTMP7.B16, [x.B16], y.B16; \ + VTBL R08_MASK.B16, [x.B16], y.B16; \ VEOR y.B16, x.B16, y.B16; \ - LDP r16_mask<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL V8.B16, [x.B16], XTMP7.B16; \ + VTBL R16_MASK.B16, [x.B16], XTMP7.B16; \ VEOR XTMP7.B16, y.B16, y.B16; \ VSHL $2, y.S4, XTMP7.S4; \ - VUSHR $32, y.S4, y.S4; \ - VEOR y.B16, XTMP7.B16, y.B16; \ - LDP r24_mask<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL V8.B16, [x.B16], XTMP7.B16; \ + VUSHR $30, y.S4, y.S4; \ + VORR y.B16, XTMP7.B16, y.B16; \ + VTBL R24_MASK.B16, [x.B16], XTMP7.B16; \ VEOR XTMP7.B16, x.B16, x.B16; \ VEOR y.B16, x.B16, x.B16 @@ -133,22 +114,56 @@ GLOBL fk_mask<>(SB), RODATA, $16 VEOR XTMP7.B16, y.B16, y.B16; \ VEOR x.B16, y.B16, x.B16 +#define load_global_data_1() \ + LDP flip_mask<>(SB), (R0, R1) \ + VMOV R0, FLIP_MASK.D[0] \ + VMOV R1, FLIP_MASK.D[1] \ + LDP nibble_mask<>(SB), (R0, R1) \ + VMOV R0, NIBBLE_MASK.D[0] \ + VMOV R1, NIBBLE_MASK.D[1] \ + LDP m1_low<>(SB), (R0, R1) \ + VMOV R0, M1L.D[0] \ + VMOV R1, M1L.D[1] \ + LDP m1_high<>(SB), (R0, R1) \ + VMOV R0, M1H.D[0] \ + VMOV R1, M1H.D[1] \ + LDP m2_low<>(SB), (R0, R1) \ + VMOV R0, M2L.D[0] \ + VMOV R1, M2L.D[1] \ + LDP m2_high<>(SB), (R0, R1) \ + VMOV R0, M2H.D[0] \ + VMOV R1, M2H.D[1] \ + LDP fk_mask<>(SB), (R0, R1) \ + VMOV R0, FK_MASK.D[0] \ + VMOV R1, FK_MASK.D[1] \ + LDP inverse_shift_rows<>(SB), (R0, R1) \ + VMOV R0, INVERSE_SHIFT_ROWS.D[0] \ + VMOV R1, INVERSE_SHIFT_ROWS.D[1] + +#define load_global_data_2() \ + load_global_data_1() \ + LDP r08_mask<>(SB), (R0, R1) \ + VMOV R0, R08_MASK.D[0] \ + VMOV R1, R08_MASK.D[1] \ + LDP r16_mask<>(SB), (R0, R1) \ + VMOV R0, R16_MASK.D[0] \ + VMOV R1, R16_MASK.D[1] \ + LDP r24_mask<>(SB), (R0, R1) \ + VMOV R0, R24_MASK.D[0] \ + VMOV R1, R24_MASK.D[1] + // func expandKeyAsm(key *byte, ck, enc, dec *uint32) TEXT ·expandKeyAsm(SB),NOSPLIT,$0 MOVD key+0(FP), R8 MOVD ck+8(FP), R9 MOVD enc+16(FP), R10 MOVD dec+24(FP), R11 - - VLD1 (R8), [t0.B16]; - LDP flip_mask<>(SB), (R0, R1) - VMOV R0, FLIP_MASK.D[0] - VMOV R1, FLIP_MASK.D[1] + + load_global_data_1() + + VLD1 (R8), [t0.B16] VTBL FLIP_MASK.B16, [t0.B16], t0.B16 - LDP fk_mask<>(SB), (R0, R1) - VMOV R0, XTMP7.D[0] - VMOV R1, XTMP7.D[1] - VEOR t0.B16, XTMP7.B16, t0.B16 + VEOR t0.B16, FK_MASK.B16, t0.B16 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] @@ -157,7 +172,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 ADD $124, R11 VEOR ZERO.B16, ZERO.B16, ZERO.B16 -loop: +ksLoop: MOVW.P 4(R9), R19 VMOV R19, x.S[0] VEOR t1.B16, x.B16, x.B16 @@ -203,10 +218,9 @@ loop: MOVW.P R2, -4(R11) ADD $16, R0 - CMP $4*32, R0 - BNE loop + CMP $128, R0 + BNE ksLoop -expand_end: RET // func encryptBlocksAsm(xk *uint32, dst, src *byte) @@ -243,9 +257,7 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0 VMOV R21, t2.S[3] VMOV R22, t3.S[3] - LDP flip_mask<>(SB), (R0, R1) - VMOV R0, FLIP_MASK.D[0] - VMOV R1, FLIP_MASK.D[1] + load_global_data_2() VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VTBL FLIP_MASK.B16, [t1.B16], t1.B16 @@ -253,10 +265,9 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0 VTBL FLIP_MASK.B16, [t3.B16], t3.B16 VEOR ZERO.B16, ZERO.B16, ZERO.B16 - EOR R0, R0 -loop: +encryptBlocksLoop: MOVW.P 4(R8), R19 VMOV R19, x.S[0] VMOV R19, x.S[1] @@ -289,7 +300,6 @@ loop: VEOR t3.B16, x.B16, x.B16 SM4_TAO_L1(x, y) VEOR x.B16, t2.B16, t2.B16 - ADD $4, R0 MOVW.P 4(R8), R19 VMOV R19, x.S[0] @@ -303,8 +313,8 @@ loop: VEOR x.B16, t3.B16, t3.B16 ADD $16, R0 - CMP $4*32, R0 - BNE loop + CMP $128, R0 + BNE encryptBlocksLoop VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VTBL FLIP_MASK.B16, [t1.B16], t1.B16 @@ -335,7 +345,6 @@ loop: VMOV t0.S[3], V8.S[3] VST1 [V8.B16], (R9) -done_sm4: RET @@ -356,13 +365,12 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 VMOV R21, t2.S[0] VMOV R22, t3.S[0] + load_global_data_2() + VEOR ZERO.B16, ZERO.B16, ZERO.B16 - LDP flip_mask<>(SB), (R0, R1) - VMOV R0, FLIP_MASK.D[0] - VMOV R1, FLIP_MASK.D[1] EOR R0, R0 -loop: +encryptBlockLoop: MOVW.P 4(R8), R19 VMOV R19, x.S[0] VMOV R19, x.S[1] @@ -408,8 +416,8 @@ loop: VEOR x.B16, t3.B16, t3.B16 ADD $16, R0 - CMP $4*32, R0 - BNE loop + CMP $128, R0 + BNE encryptBlockLoop VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VTBL FLIP_MASK.B16, [t1.B16], t1.B16 @@ -422,5 +430,4 @@ loop: VMOV t0.S[0], V8.S[3] VST1 [V8.B16], (R9) -done_sm4: RET diff --git a/sm4/cbc_amd64.go b/sm4/cbc_cipher_asm.go similarity index 94% rename from sm4/cbc_amd64.go rename to sm4/cbc_cipher_asm.go index e7f8509..889eed4 100644 --- a/sm4/cbc_amd64.go +++ b/sm4/cbc_cipher_asm.go @@ -1,3 +1,6 @@ +//go:build amd64 || arm64 +// +build amd64 arm64 + package sm4 import ( diff --git a/sm4/cipher_asm.go b/sm4/cipher_asm.go index 2772bcf..98cb55f 100644 --- a/sm4/cipher_asm.go +++ b/sm4/cipher_asm.go @@ -12,7 +12,7 @@ import ( var supportSM4 = cpu.ARM64.HasSM4 var supportsAES = cpu.X86.HasAES || cpu.ARM64.HasAES -var supportsGFMUL = cpu.X86.HasPCLMULQDQ +var supportsGFMUL = cpu.X86.HasPCLMULQDQ || cpu.ARM64.HasPMULL //go:noescape func encryptBlocksAsm(xk *uint32, dst, src *byte) @@ -33,9 +33,9 @@ func newCipher(key []byte) (cipher.Block, error) { } c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}} expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0]) - //if supportsAES && supportsGFMUL { - // return &sm4CipherGCM{c}, nil - //} + if supportsAES && supportsGFMUL { + return &sm4CipherGCM{c}, nil + } return &c, nil } diff --git a/sm4/cipher_asm_fuzzy_test.go b/sm4/cipher_asm_fuzzy_test.go index d9b71e4..1e142da 100644 --- a/sm4/cipher_asm_fuzzy_test.go +++ b/sm4/cipher_asm_fuzzy_test.go @@ -5,14 +5,12 @@ package sm4 import ( "crypto/rand" - "fmt" "io" + "reflect" "testing" - - "golang.org/x/sys/cpu" + "time" ) -/* func TestExpandKey(t *testing.T) { key := make([]byte, 16) @@ -45,19 +43,3 @@ func TestExpandKey(t *testing.T) { } } } -*/ - -func TestExpandKeySimple(t *testing.T) { - fmt.Printf("cpu.ARM64.HasAES=%v\n", cpu.ARM64.HasAES) - key := make([]byte, 16) - - encRes1 := make([]uint32, 32) - decRes1 := make([]uint32, 32) - encRes2 := make([]uint32, 32) - decRes2 := make([]uint32, 32) - io.ReadFull(rand.Reader, key) - expandKeyGo(key, encRes1, decRes1) - expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0]) - fmt.Printf("expected=%v, result=%v\n", encRes1, encRes2) - fmt.Printf("expected=%v, result=%v\n", decRes1, decRes2) -} diff --git a/sm4/ctr_amd64.go b/sm4/ctr_cipher_asm.go similarity index 93% rename from sm4/ctr_amd64.go rename to sm4/ctr_cipher_asm.go index 1d5782e..57703a3 100644 --- a/sm4/ctr_amd64.go +++ b/sm4/ctr_cipher_asm.go @@ -1,3 +1,6 @@ +//go:build amd64 || arm64 +// +build amd64 arm64 + package sm4 import ( diff --git a/sm4/gcm_arm64.s b/sm4/gcm_arm64.s new file mode 100644 index 0000000..4712187 --- /dev/null +++ b/sm4/gcm_arm64.s @@ -0,0 +1,326 @@ +#include "textflag.h" + +#define B0 V0 +#define B1 V1 +#define B2 V2 +#define B3 V3 +#define B4 V4 +#define B5 V5 +#define B6 V6 +#define B7 V7 + +#define ACC0 V8 +#define ACC1 V9 +#define ACCM V10 + +#define T0 V11 +#define T1 V12 +#define T2 V13 +#define T3 V14 + +#define POLY V15 +#define ZERO V16 +#define INC V17 +#define CTR V18 + +#define K0 V19 +#define K1 V20 +#define K2 V21 +#define K3 V22 +#define K4 V23 +#define K5 V24 +#define K6 V25 +#define K7 V26 +#define K8 V27 +#define K9 V28 +#define K10 V29 +#define K11 V30 +#define KLAST V31 + +#define reduce() \ + VEOR ACC0.B16, ACCM.B16, ACCM.B16 \ + VEOR ACC1.B16, ACCM.B16, ACCM.B16 \ + VEXT $8, ZERO.B16, ACCM.B16, T0.B16 \ + VEXT $8, ACCM.B16, ZERO.B16, ACCM.B16 \ + VEOR ACCM.B16, ACC0.B16, ACC0.B16 \ + VEOR T0.B16, ACC1.B16, ACC1.B16 \ + VPMULL POLY.D1, ACC0.D1, T0.Q1 \ + VEXT $8, ACC0.B16, ACC0.B16, ACC0.B16 \ + VEOR T0.B16, ACC0.B16, ACC0.B16 \ + VPMULL POLY.D1, ACC0.D1, T0.Q1 \ + VEOR T0.B16, ACC1.B16, ACC1.B16 \ + VEXT $8, ACC1.B16, ACC1.B16, ACC1.B16 \ + VEOR ACC1.B16, ACC0.B16, ACC0.B16 \ + +// func gcmSm4Finish(productTable *[256]byte, tagMask, T *[16]byte, pLen, dLen uint64) +TEXT ·gcmSm4Finish(SB),NOSPLIT,$0 +#define pTbl R0 +#define tMsk R1 +#define tPtr R2 +#define plen R3 +#define dlen R4 + + MOVD $0xC2, R1 + LSL $56, R1 + MOVD $1, R0 + VMOV R1, POLY.D[0] + VMOV R0, POLY.D[1] + VEOR ZERO.B16, ZERO.B16, ZERO.B16 + + MOVD productTable+0(FP), pTbl + MOVD tagMask+8(FP), tMsk + MOVD T+16(FP), tPtr + MOVD pLen+24(FP), plen + MOVD dLen+32(FP), dlen + + VLD1 (tPtr), [ACC0.B16] + VLD1 (tMsk), [B1.B16] + + LSL $3, plen + LSL $3, dlen + + VMOV dlen, B0.D[0] + VMOV plen, B0.D[1] + + ADD $14*16, pTbl + VLD1.P (pTbl), [T1.B16, T2.B16] + + VEOR ACC0.B16, B0.B16, B0.B16 + + VEXT $8, B0.B16, B0.B16, T0.B16 + VEOR B0.B16, T0.B16, T0.B16 + VPMULL B0.D1, T1.D1, ACC1.Q1 + VPMULL2 B0.D2, T1.D2, ACC0.Q1 + VPMULL T0.D1, T2.D1, ACCM.Q1 + + reduce() + + VREV64 ACC0.B16, ACC0.B16 + VEOR B1.B16, ACC0.B16, ACC0.B16 + + VST1 [ACC0.B16], (tPtr) + RET +#undef pTbl +#undef tMsk +#undef tPtr +#undef plen +#undef dlen + +// func precomputeTableAsm(productTable *[256]byte, src *[16]byte) +TEXT ·precomputeTableAsm(SB),NOSPLIT,$0 +#define pTbl R0 +#define SRC R1 +#define I R3 + + MOVD productTable+0(FP), pTbl + MOVD src+8(FP), SRC + + MOVD $0xC2, I + LSL $56, I + VMOV I, POLY.D[0] + MOVD $1, I + VMOV I, POLY.D[1] + VEOR ZERO.B16, ZERO.B16, ZERO.B16 + + VLD1 (SRC), [B0.B16] + VREV64 B0.B16, B0.B16 + + // Multiply by 2 modulo P + VMOV B0.D[0], I + ASR $63, I + VMOV I, T1.D[0] + VMOV I, T1.D[1] + VAND POLY.B16, T1.B16, T1.B16 + VUSHR $63, B0.D2, T2.D2 + VEXT $8, ZERO.B16, T2.B16, T2.B16 + VSHL $1, B0.D2, B0.D2 + VEOR T1.B16, B0.B16, B0.B16 + VEOR T2.B16, B0.B16, B0.B16 // Can avoid this when VSLI is available + + // Karatsuba pre-computation + VEXT $8, B0.B16, B0.B16, B1.B16 + VEOR B0.B16, B1.B16, B1.B16 + + ADD $14*16, pTbl + + VST1 [B0.B16, B1.B16], (pTbl) + SUB $2*16, pTbl + + VMOV B0.B16, B2.B16 + VMOV B1.B16, B3.B16 + + MOVD $7, I + +initLoop: + // Compute powers of H + SUBS $1, I + + VPMULL B0.D1, B2.D1, T1.Q1 + VPMULL2 B0.D2, B2.D2, T0.Q1 + VPMULL B1.D1, B3.D1, T2.Q1 + VEOR T0.B16, T2.B16, T2.B16 + VEOR T1.B16, T2.B16, T2.B16 + VEXT $8, ZERO.B16, T2.B16, T3.B16 + VEXT $8, T2.B16, ZERO.B16, T2.B16 + VEOR T2.B16, T0.B16, T0.B16 + VEOR T3.B16, T1.B16, T1.B16 + VPMULL POLY.D1, T0.D1, T2.Q1 + VEXT $8, T0.B16, T0.B16, T0.B16 + VEOR T2.B16, T0.B16, T0.B16 + VPMULL POLY.D1, T0.D1, T2.Q1 + VEXT $8, T0.B16, T0.B16, T0.B16 + VEOR T2.B16, T0.B16, T0.B16 + VEOR T1.B16, T0.B16, B2.B16 + VMOV B2.B16, B3.B16 + VEXT $8, B2.B16, B2.B16, B2.B16 + VEOR B2.B16, B3.B16, B3.B16 + + VST1 [B2.B16, B3.B16], (pTbl) + SUB $2*16, pTbl + + BNE initLoop + RET +#undef I +#undef SRC +#undef pTbl + +// func gcmSm4Data(productTable *[256]byte, data []byte, T *[16]byte) +TEXT ·gcmSm4Data(SB),NOSPLIT,$0 +#define pTbl R0 +#define aut R1 +#define tPtr R2 +#define autLen R3 +#define H0 R4 +#define pTblSave R5 + +#define mulRound(X) \ + VLD1.P 32(pTbl), [T1.B16, T2.B16] \ + VREV64 X.B16, X.B16 \ + VEXT $8, X.B16, X.B16, T0.B16 \ + VEOR X.B16, T0.B16, T0.B16 \ + VPMULL X.D1, T1.D1, T3.Q1 \ + VEOR T3.B16, ACC1.B16, ACC1.B16 \ + VPMULL2 X.D2, T1.D2, T3.Q1 \ + VEOR T3.B16, ACC0.B16, ACC0.B16 \ + VPMULL T0.D1, T2.D1, T3.Q1 \ + VEOR T3.B16, ACCM.B16, ACCM.B16 + + MOVD productTable+0(FP), pTbl + MOVD data_base+8(FP), aut + MOVD data_len+16(FP), autLen + MOVD T+32(FP), tPtr + + //VEOR ACC0.B16, ACC0.B16, ACC0.B16 + VLD1 (tPtr), [ACC0.B16] + CBZ autLen, dataBail + + MOVD $0xC2, H0 + LSL $56, H0 + VMOV H0, POLY.D[0] + MOVD $1, H0 + VMOV H0, POLY.D[1] + VEOR ZERO.B16, ZERO.B16, ZERO.B16 + MOVD pTbl, pTblSave + + CMP $13, autLen + BEQ dataTLS + CMP $128, autLen + BLT startSinglesLoop + B octetsLoop + +dataTLS: + ADD $14*16, pTbl + VLD1.P (pTbl), [T1.B16, T2.B16] + VEOR B0.B16, B0.B16, B0.B16 + + MOVD (aut), H0 + VMOV H0, B0.D[0] + MOVW 8(aut), H0 + VMOV H0, B0.S[2] + MOVB 12(aut), H0 + VMOV H0, B0.B[12] + + MOVD $0, autLen + B dataMul + +octetsLoop: + CMP $128, autLen + BLT startSinglesLoop + SUB $128, autLen + + VLD1.P 32(aut), [B0.B16, B1.B16] + + VLD1.P 32(pTbl), [T1.B16, T2.B16] + VREV64 B0.B16, B0.B16 + VEOR ACC0.B16, B0.B16, B0.B16 + VEXT $8, B0.B16, B0.B16, T0.B16 + VEOR B0.B16, T0.B16, T0.B16 + VPMULL B0.D1, T1.D1, ACC1.Q1 + VPMULL2 B0.D2, T1.D2, ACC0.Q1 + VPMULL T0.D1, T2.D1, ACCM.Q1 + + mulRound(B1) + VLD1.P 32(aut), [B2.B16, B3.B16] + mulRound(B2) + mulRound(B3) + VLD1.P 32(aut), [B4.B16, B5.B16] + mulRound(B4) + mulRound(B5) + VLD1.P 32(aut), [B6.B16, B7.B16] + mulRound(B6) + mulRound(B7) + + MOVD pTblSave, pTbl + reduce() + B octetsLoop + +startSinglesLoop: + + ADD $14*16, pTbl + VLD1.P (pTbl), [T1.B16, T2.B16] + +singlesLoop: + + CMP $16, autLen + BLT dataEnd + SUB $16, autLen + + VLD1.P 16(aut), [B0.B16] +dataMul: + VREV64 B0.B16, B0.B16 + VEOR ACC0.B16, B0.B16, B0.B16 + + VEXT $8, B0.B16, B0.B16, T0.B16 + VEOR B0.B16, T0.B16, T0.B16 + VPMULL B0.D1, T1.D1, ACC1.Q1 + VPMULL2 B0.D2, T1.D2, ACC0.Q1 + VPMULL T0.D1, T2.D1, ACCM.Q1 + + reduce() + + B singlesLoop + +dataEnd: + + CBZ autLen, dataBail + VEOR B0.B16, B0.B16, B0.B16 + ADD autLen, aut + +dataLoadLoop: + MOVB.W -1(aut), H0 + VEXT $15, B0.B16, ZERO.B16, B0.B16 + VMOV H0, B0.B[0] + SUBS $1, autLen + BNE dataLoadLoop + B dataMul + +dataBail: + VST1 [ACC0.B16], (tPtr) + RET + +#undef pTbl +#undef aut +#undef tPtr +#undef autLen +#undef H0 +#undef pTblSave diff --git a/sm4/gcm_amd64.go b/sm4/gcm_cipher_asm.go similarity index 96% rename from sm4/gcm_amd64.go rename to sm4/gcm_cipher_asm.go index 662aa18..5845243 100644 --- a/sm4/gcm_amd64.go +++ b/sm4/gcm_cipher_asm.go @@ -1,3 +1,6 @@ +//go:build amd64 || arm64 +// +build amd64 arm64 + package sm4 import ( diff --git a/sm4/sm4_gcm.go b/sm4/sm4_gcm.go index 8cf74d3..2e277ab 100644 --- a/sm4/sm4_gcm.go +++ b/sm4/sm4_gcm.go @@ -1,5 +1,5 @@ -//go:build amd64 -// +build amd64 +//go:build amd64 || arm64 +// +build amd64 arm64 package sm4 diff --git a/sm4/sm4_gcm_test.go b/sm4/sm4_gcm_test.go new file mode 100644 index 0000000..c16d7bb --- /dev/null +++ b/sm4/sm4_gcm_test.go @@ -0,0 +1,146 @@ +//go:build amd64 || arm64 +// +build amd64 arm64 + +package sm4 + +import ( + "fmt" + "testing" +) + +func genPrecomputeTable() *gcmAsm { + key := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10} + c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}} + expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0]) + c1 := &sm4CipherGCM{c} + g := &gcmAsm{} + g.cipher = &c1.sm4CipherAsm + var key1 [gcmBlockSize]byte + c1.Encrypt(key1[:], key1[:]) + fmt.Printf("%v\n", key1) + precomputeTableAsm(&g.bytesProductTable, &key1) + return g +} + +/* +amd64 result = { + 0xEF, 0xE0, 0x28, 0x75, 0x21, 0x1F, 0x10, 0x4B, 0x6C, 0xC6, 0x39, 0x8A, 0x88, 0xE0, 0x26, 0x16, + 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, + 0xD1, 0x99, 0x07, 0x39, 0xBA, 0x15, 0x68, 0xA7, 0xB8, 0x50, 0xC2, 0xB3, 0xD6, 0xFA, 0xA7, 0x02, + 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, + 0xC4, 0x65, 0xCA, 0xCA, 0x55, 0x7F, 0x2B, 0x72, 0xB1, 0xA4, 0x14, 0x62, 0xDE, 0xBD, 0x1B, 0x00, + 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, + 0x85, 0xF6, 0x58, 0x15, 0x09, 0x45, 0xB9, 0x72, 0x00, 0x30, 0xAB, 0x91, 0x2A, 0x73, 0xB7, 0x1C, + 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, + 0x70, 0xD7, 0xD2, 0x6D, 0x60, 0xBA, 0x5E, 0x2E, 0x43, 0x4C, 0x4A, 0xCF, 0xFA, 0xE2, 0xF1, 0x5B, + 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, + 0xED, 0xEB, 0x6C, 0xD4, 0x1B, 0x6C, 0x86, 0x6A, 0xA1, 0x16, 0xA5, 0xFF, 0x33, 0xDC, 0xBB, 0xC0, + 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, + 0xBF, 0x7C, 0x2D, 0x4E, 0xFD, 0xDD, 0x55, 0x77, 0x1C, 0x7E, 0x73, 0xC7, 0xAA, 0x8B, 0x73, 0x2F, + 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, + 0x54, 0x44, 0xA9, 0xB7, 0x20, 0x66, 0xAA, 0x2E, 0x99, 0x45, 0x82, 0x13, 0xD6, 0xE8, 0xEF, 0x4C, + 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, } +arm64 result = { + 0x6C, 0xC6, 0x39, 0x8A, 0x88, 0xE0, 0x26, 0x16, 0xEF, 0xE0, 0x28, 0x75, 0x21, 0x1F, 0x10, 0x4B, + 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, + 0xB8, 0x50, 0xC2, 0xB3, 0xD6, 0xFA, 0xA7, 0x02, 0xD1, 0x99, 0x07, 0x39, 0xBA, 0x15, 0x68, 0xA7, + 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, + 0xB1, 0xA4, 0x14, 0x62, 0xDE, 0xBD, 0x1B, 0x00, 0xC4, 0x65, 0xCA, 0xCA, 0x55, 0x7F, 0x2B, 0x72, + 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, + 0x00, 0x30, 0xAB, 0x91, 0x2A, 0x73, 0xB7, 0x1C, 0x85, 0xF6, 0x58, 0x15, 0x09, 0x45, 0xB9, 0x72, + 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, + 0x43, 0x4C, 0x4A, 0xCF, 0xFA, 0xE2, 0xF1, 0x5B, 0x70, 0xD7, 0xD2, 0x6D, 0x60, 0xBA, 0x5E, 0x2E, + 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, + 0xA1, 0x16, 0xA5, 0xFF, 0x33, 0xDC, 0xBB, 0xC0, 0xED, 0xEB, 0x6C, 0xD4, 0x1B, 0x6C, 0x86, 0x6A, + 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, + 0x1C, 0x7E, 0x73, 0xC7, 0xAA, 0x8B, 0x73, 0x2F, 0xBF, 0x7C, 0x2D, 0x4E, 0xFD, 0xDD, 0x55, 0x77, + 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, + 0x99, 0x45, 0x82, 0x13, 0xD6, 0xE8, 0xEF, 0x4C, 0x54, 0x44, 0xA9, 0xB7, 0x20, 0x66, 0xAA, 0x2E, + 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, +} +*/ +func TestPrecomputeTableAsm(t *testing.T) { + g := genPrecomputeTable() + for i := 0; i < 16; i++ { + for j := 0; j < 16; j++ { + fmt.Printf("0x%02X, ", g.bytesProductTable[i*16+j]) + } + fmt.Println() + } +} + +/* +amd64 result = { + 7D 13 81 A2 78 ED 2D 5E 91 3E 7F 9A 15 2C 76 DA +} + +arm64 result = { + 91 3E 7F 9A 15 2C 76 DA 7D 13 81 A2 78 ED 2D 5E +} +*/ +func TestGcmSm4Data(t *testing.T) { + g := genPrecomputeTable() + var counter [gcmBlockSize]byte + nonce := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13} + gcmSm4Data(&g.bytesProductTable, nonce, &counter) + for j := 0; j < 16; j++ { + fmt.Printf("%02X ", counter[j]) + } + fmt.Println() +} + +/* +amd64 result = { + 8F F3 05 10 EA 99 A8 D7 41 D9 E3 BA 67 D6 18 EE +} +arm64 result = { + 8F F3 05 10 EA 99 A8 D7 41 D9 E3 BA 67 D6 18 EE +} +*/ +func TestGcmSm4Finish(t *testing.T) { + g := genPrecomputeTable() + var counter, tagMask [gcmBlockSize]byte + nonce := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13} + gcmSm4Data(&g.bytesProductTable, nonce, &counter) + gcmSm4Finish(&g.bytesProductTable, &tagMask, &counter, uint64(len(nonce)), uint64(0)) + for j := 0; j < 16; j++ { + fmt.Printf("%02X ", counter[j]) + } + fmt.Println() +} + +/* +amd64 result= { +71 F0 B5 6E B6 6A 89 11 98 01 23 72 4B F6 0D 0C +5B 36 17 D5 95 7E B6 42 8C 6A C7 E1 80 76 70 B6 +16 3E 35 A0 B7 51 62 AA 1D AF C1 15 2D C4 3B 9D +} +arm64 result = { +98 01 23 72 4B F6 0D 0C 71 F0 B5 6E B6 6A 89 11 +8C 6A C7 E1 80 76 70 B6 5B 36 17 D5 95 7E B6 42 +16 3E 35 A0 B7 51 62 AA 1D AF C1 15 2D C4 3B 9D +} +*/ +func TestBothDataPlaintext(t *testing.T) { + g := genPrecomputeTable() + var tagOut, tagMask [gcmBlockSize]byte + data := []byte("emmansun") + gcmSm4Data(&g.bytesProductTable, data, &tagOut) + for j := 0; j < 16; j++ { + tagMask[j] = byte(j) + } + for j := 0; j < 16; j++ { + fmt.Printf("%02X ", tagOut[j]) + } + fmt.Println() + gcmSm4Data(&g.bytesProductTable, []byte("emmansunemmansunemmansunemmansun"), &tagOut) + for j := 0; j < 16; j++ { + fmt.Printf("%02X ", tagOut[j]) + } + fmt.Println() + gcmSm4Finish(&g.bytesProductTable, &tagMask, &tagOut, uint64(32), uint64(8)) + for j := 0; j < 16; j++ { + fmt.Printf("%02X ", tagOut[j]) + } + fmt.Println() +} diff --git a/sm4_test/gcm_sm4_test.go b/sm4_test/gcm_sm4_test.go index e7c74a2..7c0fc9d 100644 --- a/sm4_test/gcm_sm4_test.go +++ b/sm4_test/gcm_sm4_test.go @@ -75,42 +75,42 @@ var sm4GCMTests = []struct { "", "97ce841f7d174d76969fb46b19e742cf28983f4439909cbb6c27662dd4fbbc73", }, - { + { //#9 "fe47fcce5fc32665d2ae399e4eec72ba", "5adb9609dbaeb58cbd6e7275", "7c0e88c88899a779228465074797cd4c2e1498d259b54390b85e3eef1c02df60e743f1b840382c4bccaf3bafb4ca8429bea063", "88319d6e1d3ffa5f987199166c8a9b56c2aeba5a", "2276da0e9a4ccaa2a5934c96ba1dc6b0a52b3430ca011b4db4bf6e298b3a58425402952806350fdda7ac20bc38838d7124ee7c333e395b9a94c508b6bf0ce6b2d10d61", }, - { + { //#10 "ec0c2ba17aa95cd6afffe949da9cc3a8", "296bce5b50b7d66096d627ef", "b85b3753535b825cbe5f632c0b843c741351f18aa484281aebec2f45bb9eea2d79d987b764b9611f6c0f8641843d5d58f3a242", "f8d00f05d22bf68599bcdeb131292ad6e2df5d14", "3175cd3cb772af34490e4f5203b6a5743cd9b3798c387b7bda2708ff82d520c35d3022767b2d0fe4addff59fb25ead69ca3dd4d73ce1b4cb53a7c4cdc6a4c1fb06c316", }, - { + { //#11 "2c1f21cf0f6fb3661943155c3e3d8492", "23cb5ff362e22426984d1907", "42f758836986954db44bf37c6ef5e4ac0adaf38f27252a1b82d02ea949c8a1a2dbc0d68b5615ba7c1220ff6510e259f06655d8", "5d3624879d35e46849953e45a32a624d6a6c536ed9857c613b572b0333e701557a713e3f010ecdf9a6bd6c9e3e44b065208645aff4aabee611b391528514170084ccf587177f4488f33cfb5e979e42b6e1cfc0a60238982a7aec", "9db299bb7f9d6914c4a13589cf41ab014445e4914c1571745d50508bf0f6adeaa41aa4b081a444ee82fed6769da92f5e727d004b21791f961e212a69bfe80af14e7adf", }, - { + { //#12 "d9f7d2411091f947b4d6f1e2d1f0fb2e", "e1934f5db57cc983e6b180e7", "73ed042327f70fe9c572a61545eda8b2a0c6e1d6c291ef19248e973aee6c312012f490c2c6f6166f4a59431e182663fcaea05a", "0a8a18a7150e940c3d87b38e73baee9a5c049ee21795663e264b694a949822b639092d0e67015e86363583fcf0ca645af9f43375f05fdb4ce84f411dcbca73c2220dea03a20115d2e51398344b16bee1ed7c499b353d6c597af8", "65c81d83857626a3ec94c913a9f44fa065b6cd61ca5dd6e15e15bb7f16e757202ef966ab1f1e8e6dcbc82f002d29ba6070f53cd79767b1cbcb8cdb656a6a4369f297fc", }, - { + { //#13 "fe9bb47deb3a61e423c2231841cfd1fb", "4d328eb776f500a2f7fb47aa", "f1cc3818e421876bb6b8bbd6c9", "", "d8ce306b812aa1b09299ceef804e76b1cb3f736791a5b0d93774d40c2a", }, - { + { //#14 "6703df3701a7f54911ca72e24dca046a", "12823ab601c350ea4bc2488c", "793cd125b0b84a043e3ac67717", @@ -118,49 +118,49 @@ var sm4GCMTests = []struct { "f42f741a51c02f71a99519f60a55c8dbdcc9a15549158cc1acd6754847", }, // These cases test non-standard nonce sizes. - { + { //#15 "1672c3537afa82004c6b8a46f6f0d026", "05", "", "", "65bde02c20351976153d5d2b49790e30", }, - { + { //#16 "9a4fea86a621a91ab371e492457796c0", "75", "ca6131faf0ff210e4e693d6c31c109fc5b6f54224eb120f37de31dc59ec669b6", "4f6e2585c161f05a9ae1f2f894e9f0ab52b45d0f", "b86d6055e7e07a664801ccce38172bf7d91dc20babf2c0662d635cc9111ffefb308ee64ce01afe544b6ee1a65b803cb9", }, - { + { //#17 "d0f1f4defa1e8c08b4b26d576392027c", "42b4f01eb9f5a1ea5b1eb73b0fb0baed54f387ecaa0393c7d7dffc6af50146ecc021abf7eb9038d4303d91f8d741a11743166c0860208bcc02c6258fd9511a2fa626f96d60b72fcff773af4e88e7a923506e4916ecbd814651e9f445adef4ad6a6b6c7290cc13b956130eef5b837c939fcac0cbbcc9656cd75b13823ee5acdac", "", "", "1edcf8ea546af4879379e7653c53dddc", }, - { + { //#18 "4a0c00a3d284dea9d4bf8b8dde86685e", "f8cbe82588e784bcacbe092cd9089b51e01527297f635bf294b3aa787d91057ef23869789698ac960707857f163ecb242135a228ad93964f5dc4a4d7f88fd7b3b07dd0a5b37f9768fb05a523639f108c34c661498a56879e501a2321c8a4a94d7e1b89db255ac1f685e185263368e99735ebe62a7f2931b47282be8eb165e4d7", "6d4bf87640a6a48a50d28797b7", "8d8c7ffc55086d539b5a8f0d1232654c", "193952a26ab455b3c16db216bb2597cba90a9946dec5b7d085ceb7408e", }, - { + { //#19 "0e18a844ac5bf38e4cd72d9b0942e506", "0870d4b28a2954489a0abcd5", "67c6697351ff4aec29cdbaabf2fbe3467cc254f81be8e78d765a2e63339fc99a66320db73158a35a255d051758e95ed4abb2cdc69bb454110e827441213ddc8770e93ea141e1fc673e017e97eadc6b968f385c2aecb03bfb32af3c54ec18db5c021afe43fbfaaa3afb29d1e6053c7c9475d8be6189f95cbba8990f95b1ebf1b3", "05eff700e9a13ae5ca0bcbd0484764bd1f231ea81c7b64c514735ac55e4b79633b706424119e09dcaad4acf21b10af3b33cde3504847155cbb6f2219ba9b7df50be11a1c7f23f829f8a41b13b5ca4ee8983238e0794d3d34bc5f4e77facb6c05ac86212baa1a55a2be70b5733b045cd33694b3afe2f0e49e4f321549fd824ea9", "f492d37084697e941acd69c3d8b53d91760f4bced0fdff529327fb03000b865fbf87133c5816bdafdd23013f1440a30835b7e4d57bb6660e14b438b19b5b07a03f74369f2a11a163e5fcc4fd7ea139982ccf589533011d8efab4a44f6154043099b39f19754a4f434290299c2faa838b92453a1b989f354e7b50ea558daf1f6a88ea50b481a4ffcdd634f324f27cb3f6", }, - { + { //#20 "1f6c3a3bc0542aabba4ef8f6c7169e73", "f3584606472b260e0dd2ebb2", "67c6697351ff4aec29cdbaabf2fbe3467cc254f81be8e78d765a2e63339fc99a66320db73158a35a255d051758e95ed4abb2cdc69bb454110e827441213ddc8770e93ea141e1fc673e017e97eadc6b968f385c2aecb03bfb32af3c54ec18db5c021afe43fbfaaa3afb29d1e6053c7c9475d8be6189f95cbba8990f95b1ebf1b305eff700e9a13ae5ca0bcbd0484764bd1f231ea81c7b64c514735ac55e4b79633b706424119e09dcaad4acf21b10af3b33cde3504847155cbb6f2219ba9b7df50be11a1c7f23f829f8a41b13b5ca4ee8983238e0794d3d34bc5f4e77facb6c05ac86212baa1a55a2be70b5733b045cd33694b3afe2f0e49e4f321549fd824ea90870d4b28a2954489a0abcd50e18a844ac5bf38e4cd72d9b0942e506c433afcda3847f2dadd47647de321cec4ac430f62023856cfbb20704f4ec0bb920ba86c33e05f1ecd96733b79950a3e314d3d934f75ea0f210a8f6059401beb4bc4478fa4969e623d01ada696a7e4c7e5125b34884533a94fb319990325744ee9bbce9e525cf08f5e9e25e5360aad2b2d085fa54d835e8d466826498d9a8877565705a8a3f62802944de7ca5894e5759d351adac869580ec17e485f18c0c66f17cc07cbb22fce466da610b63af62bc83b4692f3affaf271693ac071fb86d11342d8def4f89d4b66335c1c7e4248367d8ed9612ec453902d8e50af89d7709d1a596c1f41f", "95aa82ca6c49ae90cd1668baac7aa6f2b4a8ca99b2c2372acb08cf61c9c3805e6e0328da4cd76a19edd2d3994c798b0022569ad418d1fee4d9cd45a391c601ffc92ad91501432fee150287617c13629e69fc7281cd7165a63eab49cf714bce3a75a74f76ea7e64ff81eb61fdfec39b67bf0de98c7e4e32bdf97c8c6ac75ba43c02f4b2ed7216ecf3014df000108b67cf99505b179f8ed4980a6103d1bca70dbe9bbfab0ed59801d6e5f2d6f67d3ec5168e212e2daf02c6b963c98a1f7097de0c56891a2b211b01070dd8fd8b16c2a1a4e3cfd292d2984b3561d555d16c33ddc2bcf7edde13efe520c7e2abdda44d81881c531aeeeb66244c3b791ea8acfb6a68", "c40924873aa2ef1b1b7bf4e16576446b4d24ab529c3f526cdbf7ea1cf64a73f26e4077d1464d1af165b26138ae65281dc3ca0d0998cce7b3c4fe2de5007c5c47ae586016fb11eb1b5ee1f775005b00f2c030c22fbebffc4c7fb3f4ae5b0032e7ab79b3fa48e17bb576486ba73ada0322577efd52b79f229da7e05d00a215ab3a1d717ede7c383c2eff400c4fd13c2eb6dd9e4165f67a7f5260619e459d7d9e2d276f44839ea1ec8bcc460a94b759b12b49f49ba350dab04313953d9ac0a8ac2fdd2b5cbfc70c62cdfaea658427afdc7a8a86c6a3b85c795364077fab193e87965a2cc45cbc82656e62410f027b79276317d7a1a81ebc721af6174f34e7d524c2b333e9802d2ecebec414bbdecd4587bc15079001ef140d65f689bb8f686cd670376d1e579a23fc5d098137ef2f11ec4413fcc308e689f4fcb11bde15c657651ee82694cdb676a286b2059fdf41210eceb9f03c3add1e316495a613d85e9126f4e4ba4565a2465fe578587748476360e353c2cd0e880100be8821ddae242f54efb4e7079420312443834db98e9252456b97cd1925880fffba64b0fcf2c8c05f49e0739c78df846975d99d8072b7c3c2ed5df96cdc3ad3a5dfb9d9fa8a73154765f33ca68a64bfced57391bd54250d5681aa09c28970f1fad0627205a0ea68e02bba7edb8e4f2468d70c879a585461349637639887d41f3206da7421bba36c142947a5bfa91ed341b466f8f6c8c12af0f2", }, - { + { //#21 "0795d80bc7f40f4d41c280271a2e4f7f", "ff824c906594aff365d3cb1f", "1ad4e74d127f935beee57cff920665babe7ce56227377afe570ba786193ded3412d4812453157f42fafc418c02a746c1232c234a639d49baa8f041c12e2ef540027764568ce49886e0d913e28059a3a485c6eee96337a30b28e4cd5612c2961539fa6bc5de034cbedc5fa15db844013e0bef276e27ca7a4faf47a5c1093bd643354108144454d221b3737e6cb87faac36ed131959babe44af2890cfcc4e23ffa24470e689ce0894f5407bb0c8665cff536008ad2ac6f1c9ef8289abd0bd9b72f21c597bda5210cf928c805af2dd4a464d52e36819d521f967bba5386930ab5b4cf4c71746d7e6e964673457348e9d71d170d9eb560bd4bdb779e610ba816bf776231ebd0af5966f5cdab6815944032ab4dd060ad8dab880549e910f1ffcf6862005432afad", @@ -168,35 +168,35 @@ var sm4GCMTests = []struct { "598798e51e3b70677ee1cd17c25dd6a4752f42aa51b2d055df9992e46afc8e48ac0e99f645bbab4388bc22bc674ecd3bea4f59dbe77a3e33f1b66d751f2772b59eb462443d2de8f27cbf057b8e00c000e2653a597c440cdd3a87a83f7a2f26f3966ba26fc60c05de7da075e635fdd3b5fefa816398855e099ab746278fc57f65b7573f5372a676ca5a9835d0e158f16201ea16fb6685da1829cffc6cea57a9937e822dc6becd7679239c55df5b88caa91522eeb3223dd9357d374a5b3be015624ca21ff667f427d94e9c5cd6e9ec227d3fb2b8c3835dfe5cd8949da744f8d30470a5f36dc33f3f57586ff9e4f117d94b1d1a94318a7cecb61f0386b2e34d4d39e965640e2fc211f34552352ef1df24f409583f82d4b259bf0f9358c3330bea2a2cab2fd303d8cd22abce5339576d8a6736f46589d8", }, // These cases test non-standard tag sizes. - { + { //#22 "89c54b0d3bc3c397d5039058c220685f", "bc7f45c00868758d62d4bb4d", "582670b0baf5540a3775b6615605bd05", "48d16cda0337105a50e2ed76fd18e114", "6e37e818153f115f2fab4c890f3eac139a3ee8b30bf2cbcb54c39ff0651313", }, - { + { //#23 "bad6049678bf75c9087b3e3ae7e72c13", "a0a017b83a67d8f1b883e561", "a1be93012f05a1958440f74a5311f4a1", "f7c27b51d5367161dc2ff1e9e3edc6f2", "baa7c826af7983e1824558e7e31d04063543c8a5e80eb58af0e38b7a1581", }, - { + { //#24 "66a3c722ccf9709525650973ecc100a9", "1621d42d3a6d42a2d2bf9494", "61fa9dbbed2190fbc2ffabf5d2ea4ff8", "d7a9b6523b8827068a6354a6d166c6b9", "4e920aff4744aef585b81c80fe962231d13d8f7f03e56a06cb33d12491", }, - { + { //#25 "562ae8aadb8d23e0f271a99a7d1bd4d1", "f7a5e2399413b89b6ad31aff", "bbdc3504d803682aa08a773cde5f231a", "2b9680b886b3efb7c6354b38c63b5373", "716a4e0150125a51e72f95d900814fc37b0ddba2a85bda1f8819b774", }, - { + { //#26 "11754cd72aec309bf52f7687212e8957", "", "",