# Conflicts:
#	sm4/cipher_asm_fuzzy_test.go
This commit is contained in:
Emman 2022-01-04 08:44:29 +08:00
commit c78a37b66d
13 changed files with 604 additions and 135 deletions

View File

@ -1,14 +1,14 @@
language: go
go:
# - "1.15.x"
# - "1.16.x"
- "1.15.x"
- "1.16.x"
- "1.17.x"
jobs:
include:
# - arch: ppc64le
# go: 1.17.x
- arch: ppc64le
go: 1.17.x
- arch: arm64-graviton2
virt: vm
os: linux
@ -21,7 +21,6 @@ install:
- go mod download
script:
- go build ./...
- go test -v ./...
after_success:

View File

@ -7,7 +7,7 @@ This is a **SM2 sm2p256v1** implementation whose performance is similar like gol
This is also a **SM3** implementation whose performance is similar like golang native SHA 256 with SIMD under **amd64**, for implementation detail, please refer [SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96).
For **SM4** implementation, SIMD & AES-NI are used under **amd64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), support CBC/CFB/OFB/CTR/GCM/CCM/XTS modes.
For **SM4** implementation, SIMD & AES-NI are used under **amd64** and **arm64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), support CBC/CFB/OFB/CTR/GCM/CCM/XTS modes.
**SM2 encryption Benchmark**

View File

@ -100,7 +100,7 @@ GLOBL fk_mask<>(SB), RODATA, $16
MOVOU y, XTMP6; \
PSLLL $2, XTMP6; \
PSRLL $30, y; \
PXOR XTMP6, y; \ //y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30);
POR XTMP6, y; \ //y = _mm_slli_epi32(y, 2) ^ _mm_srli_epi32(y, 30);
MOVOU x, XTMP7; \
PSHUFB r24_mask<>(SB), XTMP7; \
PXOR y, x; \ //x = x xor y

View File

@ -8,116 +8,97 @@
#define t3 V5
#define ZERO V16
#define FLIP_MASK V17
#define NIBBLE_MASK V20
#define INVERSE_SHIFT_ROWS V21
#define M1L V22
#define M1H V23
#define M2L V24
#define M2H V25
#define R08_MASK V26
#define R16_MASK V27
#define R24_MASK V28
#define FK_MASK V29
#define XTMP6 V6
#define XTMP7 V7
// shuffle byte order from LE to BE
DATA flip_mask<>+0x00(SB)/8, $0x0405060700010203
DATA flip_mask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
GLOBL flip_mask<>(SB), RODATA, $16
GLOBL flip_mask<>(SB), (NOPTR+RODATA), $16
//nibble mask
DATA nibble_mask<>+0x00(SB)/8, $0x0F0F0F0F0F0F0F0F
DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F
GLOBL nibble_mask<>(SB), RODATA, $16
GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16
// inverse shift rows
DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00
DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508
GLOBL inverse_shift_rows<>(SB), RODATA, $16
GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16
// Affine transform 1 (low and high hibbles)
DATA m1_low<>+0x00(SB)/8, $0x9197E2E474720701
DATA m1_low<>+0x08(SB)/8, $0xC7C1B4B222245157
GLOBL m1_low<>(SB), RODATA, $16
GLOBL m1_low<>(SB), (NOPTR+RODATA), $16
DATA m1_high<>+0x00(SB)/8, $0xE240AB09EB49A200
DATA m1_high<>+0x08(SB)/8, $0xF052B91BF95BB012
GLOBL m1_high<>(SB), RODATA, $16
GLOBL m1_high<>(SB), (NOPTR+RODATA), $16
// Affine transform 2 (low and high hibbles)
DATA m2_low<>+0x00(SB)/8, $0x5B67F2CEA19D0834
DATA m2_low<>+0x08(SB)/8, $0xEDD14478172BBE82
GLOBL m2_low<>(SB), RODATA, $16
GLOBL m2_low<>(SB), (NOPTR+RODATA), $16
DATA m2_high<>+0x00(SB)/8, $0xAE7201DD73AFDC00
DATA m2_high<>+0x08(SB)/8, $0x11CDBE62CC1063BF
GLOBL m2_high<>(SB), RODATA, $16
GLOBL m2_high<>(SB), (NOPTR+RODATA), $16
// left rotations of 32-bit words by 8-bit increments
DATA r08_mask<>+0x00(SB)/8, $0x0605040702010003
DATA r08_mask<>+0x08(SB)/8, $0x0E0D0C0F0A09080B
GLOBL r08_mask<>(SB), RODATA, $16
GLOBL r08_mask<>(SB), (NOPTR+RODATA), $16
DATA r16_mask<>+0x00(SB)/8, $0x0504070601000302
DATA r16_mask<>+0x08(SB)/8, $0x0D0C0F0E09080B0A
GLOBL r16_mask<>(SB), RODATA, $16
GLOBL r16_mask<>(SB), (NOPTR+RODATA), $16
DATA r24_mask<>+0x00(SB)/8, $0x0407060500030201
DATA r24_mask<>+0x08(SB)/8, $0x0C0F0E0D080B0A09
GLOBL r24_mask<>(SB), RODATA, $16
GLOBL r24_mask<>(SB), (NOPTR+RODATA), $16
DATA fk_mask<>+0x00(SB)/8, $0x56aa3350a3b1bac6
DATA fk_mask<>+0x08(SB)/8, $0xb27022dc677d9197
GLOBL fk_mask<>(SB), RODATA, $16
GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
#define SM4_SBOX(x, y) \
; \ //############################# inner affine ############################//
LDP nibble_mask<>(SB), (R0, R1); \
VMOV R0, XTMP6.D[0]; \
VMOV R1, XTMP6.D[1]; \
VAND x.B16, XTMP6.B16, XTMP7.B16; \
LDP m1_low<>(SB), (R0, R1); \
VMOV R0, y.D[0]; \
VMOV R1, y.D[1]; \
VTBL XTMP7.B16, [y.B16], y.B16; \
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
VTBL XTMP7.B16, [M1L.B16], y.B16; \
VUSHR $4, x.D2, x.D2; \
VAND x.B16, XTMP6.B16, XTMP7.B16; \
LDP m1_low<>(SB), (R0, R1); \
VMOV R0, V8.D[0]; \
VMOV R1, V8.D[1]; \
VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \
VEOR y.B16, XTMP7.B16, x.B16; \
LDP inverse_shift_rows<>(SB), (R0, R1); \
VMOV R0, V8.D[0]; \
VMOV R1, V8.D[1]; \
VTBL V8.B16, [x.B16], x.B16; \
VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \
AESE ZERO.B16, x.B16; \
VAND x.B16, XTMP6.B16, XTMP7.B16; \
LDP m2_low<>(SB), (R0, R1); \
VMOV R0, y.D[0]; \
VMOV R1, y.D[1]; \
VTBL XTMP7.B16, [y.B16], y.B16; \
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
VTBL XTMP7.B16, [M2L.B16], y.B16; \
VUSHR $4, x.D2, x.D2; \
VAND x.B16, XTMP6.B16, XTMP7.B16; \
LDP m2_high<>(SB), (R0, R1); \
VMOV R0, V8.D[0]; \
VMOV R1, V8.D[1]; \
VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \
VEOR y.B16, XTMP7.B16, x.B16; \
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \
VEOR y.B16, XTMP7.B16, x.B16
#define SM4_TAO_L1(x, y) \
SM4_SBOX(x, y); \
; \ //#################### 4 parallel L1 linear transforms ##################//
LDP r08_mask<>(SB), (R0, R1); \
VMOV R0, XTMP7.D[0]; \
VMOV R1, XTMP7.D[1]; \
VTBL XTMP7.B16, [x.B16], y.B16; \
VTBL R08_MASK.B16, [x.B16], y.B16; \
VEOR y.B16, x.B16, y.B16; \
LDP r16_mask<>(SB), (R0, R1); \
VMOV R0, V8.D[0]; \
VMOV R1, V8.D[1]; \
VTBL V8.B16, [x.B16], XTMP7.B16; \
VTBL R16_MASK.B16, [x.B16], XTMP7.B16; \
VEOR XTMP7.B16, y.B16, y.B16; \
VSHL $2, y.S4, XTMP7.S4; \
VUSHR $32, y.S4, y.S4; \
VEOR y.B16, XTMP7.B16, y.B16; \
LDP r24_mask<>(SB), (R0, R1); \
VMOV R0, V8.D[0]; \
VMOV R1, V8.D[1]; \
VTBL V8.B16, [x.B16], XTMP7.B16; \
VUSHR $30, y.S4, y.S4; \
VORR y.B16, XTMP7.B16, y.B16; \
VTBL R24_MASK.B16, [x.B16], XTMP7.B16; \
VEOR XTMP7.B16, x.B16, x.B16; \
VEOR y.B16, x.B16, x.B16
@ -133,22 +114,56 @@ GLOBL fk_mask<>(SB), RODATA, $16
VEOR XTMP7.B16, y.B16, y.B16; \
VEOR x.B16, y.B16, x.B16
#define load_global_data_1() \
LDP flip_mask<>(SB), (R0, R1) \
VMOV R0, FLIP_MASK.D[0] \
VMOV R1, FLIP_MASK.D[1] \
LDP nibble_mask<>(SB), (R0, R1) \
VMOV R0, NIBBLE_MASK.D[0] \
VMOV R1, NIBBLE_MASK.D[1] \
LDP m1_low<>(SB), (R0, R1) \
VMOV R0, M1L.D[0] \
VMOV R1, M1L.D[1] \
LDP m1_high<>(SB), (R0, R1) \
VMOV R0, M1H.D[0] \
VMOV R1, M1H.D[1] \
LDP m2_low<>(SB), (R0, R1) \
VMOV R0, M2L.D[0] \
VMOV R1, M2L.D[1] \
LDP m2_high<>(SB), (R0, R1) \
VMOV R0, M2H.D[0] \
VMOV R1, M2H.D[1] \
LDP fk_mask<>(SB), (R0, R1) \
VMOV R0, FK_MASK.D[0] \
VMOV R1, FK_MASK.D[1] \
LDP inverse_shift_rows<>(SB), (R0, R1) \
VMOV R0, INVERSE_SHIFT_ROWS.D[0] \
VMOV R1, INVERSE_SHIFT_ROWS.D[1]
#define load_global_data_2() \
load_global_data_1() \
LDP r08_mask<>(SB), (R0, R1) \
VMOV R0, R08_MASK.D[0] \
VMOV R1, R08_MASK.D[1] \
LDP r16_mask<>(SB), (R0, R1) \
VMOV R0, R16_MASK.D[0] \
VMOV R1, R16_MASK.D[1] \
LDP r24_mask<>(SB), (R0, R1) \
VMOV R0, R24_MASK.D[0] \
VMOV R1, R24_MASK.D[1]
// func expandKeyAsm(key *byte, ck, enc, dec *uint32)
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
MOVD key+0(FP), R8
MOVD ck+8(FP), R9
MOVD enc+16(FP), R10
MOVD dec+24(FP), R11
VLD1 (R8), [t0.B16];
LDP flip_mask<>(SB), (R0, R1)
VMOV R0, FLIP_MASK.D[0]
VMOV R1, FLIP_MASK.D[1]
load_global_data_1()
VLD1 (R8), [t0.B16]
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
LDP fk_mask<>(SB), (R0, R1)
VMOV R0, XTMP7.D[0]
VMOV R1, XTMP7.D[1]
VEOR t0.B16, XTMP7.B16, t0.B16
VEOR t0.B16, FK_MASK.B16, t0.B16
VMOV t0.S[1], t1.S[0]
VMOV t0.S[2], t2.S[0]
VMOV t0.S[3], t3.S[0]
@ -157,7 +172,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0
ADD $124, R11
VEOR ZERO.B16, ZERO.B16, ZERO.B16
loop:
ksLoop:
MOVW.P 4(R9), R19
VMOV R19, x.S[0]
VEOR t1.B16, x.B16, x.B16
@ -203,10 +218,9 @@ loop:
MOVW.P R2, -4(R11)
ADD $16, R0
CMP $4*32, R0
BNE loop
CMP $128, R0
BNE ksLoop
expand_end:
RET
// func encryptBlocksAsm(xk *uint32, dst, src *byte)
@ -243,9 +257,7 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
VMOV R21, t2.S[3]
VMOV R22, t3.S[3]
LDP flip_mask<>(SB), (R0, R1)
VMOV R0, FLIP_MASK.D[0]
VMOV R1, FLIP_MASK.D[1]
load_global_data_2()
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
VTBL FLIP_MASK.B16, [t1.B16], t1.B16
@ -253,10 +265,9 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
VTBL FLIP_MASK.B16, [t3.B16], t3.B16
VEOR ZERO.B16, ZERO.B16, ZERO.B16
EOR R0, R0
loop:
encryptBlocksLoop:
MOVW.P 4(R8), R19
VMOV R19, x.S[0]
VMOV R19, x.S[1]
@ -289,7 +300,6 @@ loop:
VEOR t3.B16, x.B16, x.B16
SM4_TAO_L1(x, y)
VEOR x.B16, t2.B16, t2.B16
ADD $4, R0
MOVW.P 4(R8), R19
VMOV R19, x.S[0]
@ -303,8 +313,8 @@ loop:
VEOR x.B16, t3.B16, t3.B16
ADD $16, R0
CMP $4*32, R0
BNE loop
CMP $128, R0
BNE encryptBlocksLoop
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
VTBL FLIP_MASK.B16, [t1.B16], t1.B16
@ -335,7 +345,6 @@ loop:
VMOV t0.S[3], V8.S[3]
VST1 [V8.B16], (R9)
done_sm4:
RET
@ -356,13 +365,12 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
VMOV R21, t2.S[0]
VMOV R22, t3.S[0]
load_global_data_2()
VEOR ZERO.B16, ZERO.B16, ZERO.B16
LDP flip_mask<>(SB), (R0, R1)
VMOV R0, FLIP_MASK.D[0]
VMOV R1, FLIP_MASK.D[1]
EOR R0, R0
loop:
encryptBlockLoop:
MOVW.P 4(R8), R19
VMOV R19, x.S[0]
VMOV R19, x.S[1]
@ -408,8 +416,8 @@ loop:
VEOR x.B16, t3.B16, t3.B16
ADD $16, R0
CMP $4*32, R0
BNE loop
CMP $128, R0
BNE encryptBlockLoop
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
VTBL FLIP_MASK.B16, [t1.B16], t1.B16
@ -422,5 +430,4 @@ loop:
VMOV t0.S[0], V8.S[3]
VST1 [V8.B16], (R9)
done_sm4:
RET

View File

@ -1,3 +1,6 @@
//go:build amd64 || arm64
// +build amd64 arm64
package sm4
import (

View File

@ -12,7 +12,7 @@ import (
var supportSM4 = cpu.ARM64.HasSM4
var supportsAES = cpu.X86.HasAES || cpu.ARM64.HasAES
var supportsGFMUL = cpu.X86.HasPCLMULQDQ
var supportsGFMUL = cpu.X86.HasPCLMULQDQ || cpu.ARM64.HasPMULL
//go:noescape
func encryptBlocksAsm(xk *uint32, dst, src *byte)
@ -33,9 +33,9 @@ func newCipher(key []byte) (cipher.Block, error) {
}
c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}}
expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0])
//if supportsAES && supportsGFMUL {
// return &sm4CipherGCM{c}, nil
//}
if supportsAES && supportsGFMUL {
return &sm4CipherGCM{c}, nil
}
return &c, nil
}

View File

@ -5,14 +5,12 @@ package sm4
import (
"crypto/rand"
"fmt"
"io"
"reflect"
"testing"
"golang.org/x/sys/cpu"
"time"
)
/*
func TestExpandKey(t *testing.T) {
key := make([]byte, 16)
@ -45,19 +43,3 @@ func TestExpandKey(t *testing.T) {
}
}
}
*/
func TestExpandKeySimple(t *testing.T) {
fmt.Printf("cpu.ARM64.HasAES=%v\n", cpu.ARM64.HasAES)
key := make([]byte, 16)
encRes1 := make([]uint32, 32)
decRes1 := make([]uint32, 32)
encRes2 := make([]uint32, 32)
decRes2 := make([]uint32, 32)
io.ReadFull(rand.Reader, key)
expandKeyGo(key, encRes1, decRes1)
expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0])
fmt.Printf("expected=%v, result=%v\n", encRes1, encRes2)
fmt.Printf("expected=%v, result=%v\n", decRes1, decRes2)
}

View File

@ -1,3 +1,6 @@
//go:build amd64 || arm64
// +build amd64 arm64
package sm4
import (

326
sm4/gcm_arm64.s Normal file
View File

@ -0,0 +1,326 @@
#include "textflag.h"
#define B0 V0
#define B1 V1
#define B2 V2
#define B3 V3
#define B4 V4
#define B5 V5
#define B6 V6
#define B7 V7
#define ACC0 V8
#define ACC1 V9
#define ACCM V10
#define T0 V11
#define T1 V12
#define T2 V13
#define T3 V14
#define POLY V15
#define ZERO V16
#define INC V17
#define CTR V18
#define K0 V19
#define K1 V20
#define K2 V21
#define K3 V22
#define K4 V23
#define K5 V24
#define K6 V25
#define K7 V26
#define K8 V27
#define K9 V28
#define K10 V29
#define K11 V30
#define KLAST V31
#define reduce() \
VEOR ACC0.B16, ACCM.B16, ACCM.B16 \
VEOR ACC1.B16, ACCM.B16, ACCM.B16 \
VEXT $8, ZERO.B16, ACCM.B16, T0.B16 \
VEXT $8, ACCM.B16, ZERO.B16, ACCM.B16 \
VEOR ACCM.B16, ACC0.B16, ACC0.B16 \
VEOR T0.B16, ACC1.B16, ACC1.B16 \
VPMULL POLY.D1, ACC0.D1, T0.Q1 \
VEXT $8, ACC0.B16, ACC0.B16, ACC0.B16 \
VEOR T0.B16, ACC0.B16, ACC0.B16 \
VPMULL POLY.D1, ACC0.D1, T0.Q1 \
VEOR T0.B16, ACC1.B16, ACC1.B16 \
VEXT $8, ACC1.B16, ACC1.B16, ACC1.B16 \
VEOR ACC1.B16, ACC0.B16, ACC0.B16 \
// func gcmSm4Finish(productTable *[256]byte, tagMask, T *[16]byte, pLen, dLen uint64)
TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
#define pTbl R0
#define tMsk R1
#define tPtr R2
#define plen R3
#define dlen R4
MOVD $0xC2, R1
LSL $56, R1
MOVD $1, R0
VMOV R1, POLY.D[0]
VMOV R0, POLY.D[1]
VEOR ZERO.B16, ZERO.B16, ZERO.B16
MOVD productTable+0(FP), pTbl
MOVD tagMask+8(FP), tMsk
MOVD T+16(FP), tPtr
MOVD pLen+24(FP), plen
MOVD dLen+32(FP), dlen
VLD1 (tPtr), [ACC0.B16]
VLD1 (tMsk), [B1.B16]
LSL $3, plen
LSL $3, dlen
VMOV dlen, B0.D[0]
VMOV plen, B0.D[1]
ADD $14*16, pTbl
VLD1.P (pTbl), [T1.B16, T2.B16]
VEOR ACC0.B16, B0.B16, B0.B16
VEXT $8, B0.B16, B0.B16, T0.B16
VEOR B0.B16, T0.B16, T0.B16
VPMULL B0.D1, T1.D1, ACC1.Q1
VPMULL2 B0.D2, T1.D2, ACC0.Q1
VPMULL T0.D1, T2.D1, ACCM.Q1
reduce()
VREV64 ACC0.B16, ACC0.B16
VEOR B1.B16, ACC0.B16, ACC0.B16
VST1 [ACC0.B16], (tPtr)
RET
#undef pTbl
#undef tMsk
#undef tPtr
#undef plen
#undef dlen
// func precomputeTableAsm(productTable *[256]byte, src *[16]byte)
TEXT ·precomputeTableAsm(SB),NOSPLIT,$0
#define pTbl R0
#define SRC R1
#define I R3
MOVD productTable+0(FP), pTbl
MOVD src+8(FP), SRC
MOVD $0xC2, I
LSL $56, I
VMOV I, POLY.D[0]
MOVD $1, I
VMOV I, POLY.D[1]
VEOR ZERO.B16, ZERO.B16, ZERO.B16
VLD1 (SRC), [B0.B16]
VREV64 B0.B16, B0.B16
// Multiply by 2 modulo P
VMOV B0.D[0], I
ASR $63, I
VMOV I, T1.D[0]
VMOV I, T1.D[1]
VAND POLY.B16, T1.B16, T1.B16
VUSHR $63, B0.D2, T2.D2
VEXT $8, ZERO.B16, T2.B16, T2.B16
VSHL $1, B0.D2, B0.D2
VEOR T1.B16, B0.B16, B0.B16
VEOR T2.B16, B0.B16, B0.B16 // Can avoid this when VSLI is available
// Karatsuba pre-computation
VEXT $8, B0.B16, B0.B16, B1.B16
VEOR B0.B16, B1.B16, B1.B16
ADD $14*16, pTbl
VST1 [B0.B16, B1.B16], (pTbl)
SUB $2*16, pTbl
VMOV B0.B16, B2.B16
VMOV B1.B16, B3.B16
MOVD $7, I
initLoop:
// Compute powers of H
SUBS $1, I
VPMULL B0.D1, B2.D1, T1.Q1
VPMULL2 B0.D2, B2.D2, T0.Q1
VPMULL B1.D1, B3.D1, T2.Q1
VEOR T0.B16, T2.B16, T2.B16
VEOR T1.B16, T2.B16, T2.B16
VEXT $8, ZERO.B16, T2.B16, T3.B16
VEXT $8, T2.B16, ZERO.B16, T2.B16
VEOR T2.B16, T0.B16, T0.B16
VEOR T3.B16, T1.B16, T1.B16
VPMULL POLY.D1, T0.D1, T2.Q1
VEXT $8, T0.B16, T0.B16, T0.B16
VEOR T2.B16, T0.B16, T0.B16
VPMULL POLY.D1, T0.D1, T2.Q1
VEXT $8, T0.B16, T0.B16, T0.B16
VEOR T2.B16, T0.B16, T0.B16
VEOR T1.B16, T0.B16, B2.B16
VMOV B2.B16, B3.B16
VEXT $8, B2.B16, B2.B16, B2.B16
VEOR B2.B16, B3.B16, B3.B16
VST1 [B2.B16, B3.B16], (pTbl)
SUB $2*16, pTbl
BNE initLoop
RET
#undef I
#undef SRC
#undef pTbl
// func gcmSm4Data(productTable *[256]byte, data []byte, T *[16]byte)
TEXT ·gcmSm4Data(SB),NOSPLIT,$0
#define pTbl R0
#define aut R1
#define tPtr R2
#define autLen R3
#define H0 R4
#define pTblSave R5
#define mulRound(X) \
VLD1.P 32(pTbl), [T1.B16, T2.B16] \
VREV64 X.B16, X.B16 \
VEXT $8, X.B16, X.B16, T0.B16 \
VEOR X.B16, T0.B16, T0.B16 \
VPMULL X.D1, T1.D1, T3.Q1 \
VEOR T3.B16, ACC1.B16, ACC1.B16 \
VPMULL2 X.D2, T1.D2, T3.Q1 \
VEOR T3.B16, ACC0.B16, ACC0.B16 \
VPMULL T0.D1, T2.D1, T3.Q1 \
VEOR T3.B16, ACCM.B16, ACCM.B16
MOVD productTable+0(FP), pTbl
MOVD data_base+8(FP), aut
MOVD data_len+16(FP), autLen
MOVD T+32(FP), tPtr
//VEOR ACC0.B16, ACC0.B16, ACC0.B16
VLD1 (tPtr), [ACC0.B16]
CBZ autLen, dataBail
MOVD $0xC2, H0
LSL $56, H0
VMOV H0, POLY.D[0]
MOVD $1, H0
VMOV H0, POLY.D[1]
VEOR ZERO.B16, ZERO.B16, ZERO.B16
MOVD pTbl, pTblSave
CMP $13, autLen
BEQ dataTLS
CMP $128, autLen
BLT startSinglesLoop
B octetsLoop
dataTLS:
ADD $14*16, pTbl
VLD1.P (pTbl), [T1.B16, T2.B16]
VEOR B0.B16, B0.B16, B0.B16
MOVD (aut), H0
VMOV H0, B0.D[0]
MOVW 8(aut), H0
VMOV H0, B0.S[2]
MOVB 12(aut), H0
VMOV H0, B0.B[12]
MOVD $0, autLen
B dataMul
octetsLoop:
CMP $128, autLen
BLT startSinglesLoop
SUB $128, autLen
VLD1.P 32(aut), [B0.B16, B1.B16]
VLD1.P 32(pTbl), [T1.B16, T2.B16]
VREV64 B0.B16, B0.B16
VEOR ACC0.B16, B0.B16, B0.B16
VEXT $8, B0.B16, B0.B16, T0.B16
VEOR B0.B16, T0.B16, T0.B16
VPMULL B0.D1, T1.D1, ACC1.Q1
VPMULL2 B0.D2, T1.D2, ACC0.Q1
VPMULL T0.D1, T2.D1, ACCM.Q1
mulRound(B1)
VLD1.P 32(aut), [B2.B16, B3.B16]
mulRound(B2)
mulRound(B3)
VLD1.P 32(aut), [B4.B16, B5.B16]
mulRound(B4)
mulRound(B5)
VLD1.P 32(aut), [B6.B16, B7.B16]
mulRound(B6)
mulRound(B7)
MOVD pTblSave, pTbl
reduce()
B octetsLoop
startSinglesLoop:
ADD $14*16, pTbl
VLD1.P (pTbl), [T1.B16, T2.B16]
singlesLoop:
CMP $16, autLen
BLT dataEnd
SUB $16, autLen
VLD1.P 16(aut), [B0.B16]
dataMul:
VREV64 B0.B16, B0.B16
VEOR ACC0.B16, B0.B16, B0.B16
VEXT $8, B0.B16, B0.B16, T0.B16
VEOR B0.B16, T0.B16, T0.B16
VPMULL B0.D1, T1.D1, ACC1.Q1
VPMULL2 B0.D2, T1.D2, ACC0.Q1
VPMULL T0.D1, T2.D1, ACCM.Q1
reduce()
B singlesLoop
dataEnd:
CBZ autLen, dataBail
VEOR B0.B16, B0.B16, B0.B16
ADD autLen, aut
dataLoadLoop:
MOVB.W -1(aut), H0
VEXT $15, B0.B16, ZERO.B16, B0.B16
VMOV H0, B0.B[0]
SUBS $1, autLen
BNE dataLoadLoop
B dataMul
dataBail:
VST1 [ACC0.B16], (tPtr)
RET
#undef pTbl
#undef aut
#undef tPtr
#undef autLen
#undef H0
#undef pTblSave

View File

@ -1,3 +1,6 @@
//go:build amd64 || arm64
// +build amd64 arm64
package sm4
import (

View File

@ -1,5 +1,5 @@
//go:build amd64
// +build amd64
//go:build amd64 || arm64
// +build amd64 arm64
package sm4

146
sm4/sm4_gcm_test.go Normal file
View File

@ -0,0 +1,146 @@
//go:build amd64 || arm64
// +build amd64 arm64
package sm4
import (
"fmt"
"testing"
)
func genPrecomputeTable() *gcmAsm {
key := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}
c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}}
expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0])
c1 := &sm4CipherGCM{c}
g := &gcmAsm{}
g.cipher = &c1.sm4CipherAsm
var key1 [gcmBlockSize]byte
c1.Encrypt(key1[:], key1[:])
fmt.Printf("%v\n", key1)
precomputeTableAsm(&g.bytesProductTable, &key1)
return g
}
/*
amd64 result = {
0xEF, 0xE0, 0x28, 0x75, 0x21, 0x1F, 0x10, 0x4B, 0x6C, 0xC6, 0x39, 0x8A, 0x88, 0xE0, 0x26, 0x16,
0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D,
0xD1, 0x99, 0x07, 0x39, 0xBA, 0x15, 0x68, 0xA7, 0xB8, 0x50, 0xC2, 0xB3, 0xD6, 0xFA, 0xA7, 0x02,
0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5,
0xC4, 0x65, 0xCA, 0xCA, 0x55, 0x7F, 0x2B, 0x72, 0xB1, 0xA4, 0x14, 0x62, 0xDE, 0xBD, 0x1B, 0x00,
0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72,
0x85, 0xF6, 0x58, 0x15, 0x09, 0x45, 0xB9, 0x72, 0x00, 0x30, 0xAB, 0x91, 0x2A, 0x73, 0xB7, 0x1C,
0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E,
0x70, 0xD7, 0xD2, 0x6D, 0x60, 0xBA, 0x5E, 0x2E, 0x43, 0x4C, 0x4A, 0xCF, 0xFA, 0xE2, 0xF1, 0x5B,
0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75,
0xED, 0xEB, 0x6C, 0xD4, 0x1B, 0x6C, 0x86, 0x6A, 0xA1, 0x16, 0xA5, 0xFF, 0x33, 0xDC, 0xBB, 0xC0,
0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA,
0xBF, 0x7C, 0x2D, 0x4E, 0xFD, 0xDD, 0x55, 0x77, 0x1C, 0x7E, 0x73, 0xC7, 0xAA, 0x8B, 0x73, 0x2F,
0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58,
0x54, 0x44, 0xA9, 0xB7, 0x20, 0x66, 0xAA, 0x2E, 0x99, 0x45, 0x82, 0x13, 0xD6, 0xE8, 0xEF, 0x4C,
0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, }
arm64 result = {
0x6C, 0xC6, 0x39, 0x8A, 0x88, 0xE0, 0x26, 0x16, 0xEF, 0xE0, 0x28, 0x75, 0x21, 0x1F, 0x10, 0x4B,
0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D, 0x83, 0x26, 0x11, 0xFF, 0xA9, 0xFF, 0x36, 0x5D,
0xB8, 0x50, 0xC2, 0xB3, 0xD6, 0xFA, 0xA7, 0x02, 0xD1, 0x99, 0x07, 0x39, 0xBA, 0x15, 0x68, 0xA7,
0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5, 0x69, 0xC9, 0xC5, 0x8A, 0x6C, 0xEF, 0xCF, 0xA5,
0xB1, 0xA4, 0x14, 0x62, 0xDE, 0xBD, 0x1B, 0x00, 0xC4, 0x65, 0xCA, 0xCA, 0x55, 0x7F, 0x2B, 0x72,
0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72, 0x75, 0xC1, 0xDE, 0xA8, 0x8B, 0xC2, 0x30, 0x72,
0x00, 0x30, 0xAB, 0x91, 0x2A, 0x73, 0xB7, 0x1C, 0x85, 0xF6, 0x58, 0x15, 0x09, 0x45, 0xB9, 0x72,
0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E, 0x85, 0xC6, 0xF3, 0x84, 0x23, 0x36, 0x0E, 0x6E,
0x43, 0x4C, 0x4A, 0xCF, 0xFA, 0xE2, 0xF1, 0x5B, 0x70, 0xD7, 0xD2, 0x6D, 0x60, 0xBA, 0x5E, 0x2E,
0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75, 0x33, 0x9B, 0x98, 0xA2, 0x9A, 0x58, 0xAF, 0x75,
0xA1, 0x16, 0xA5, 0xFF, 0x33, 0xDC, 0xBB, 0xC0, 0xED, 0xEB, 0x6C, 0xD4, 0x1B, 0x6C, 0x86, 0x6A,
0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA, 0x4C, 0xFD, 0xC9, 0x2B, 0x28, 0xB0, 0x3D, 0xAA,
0x1C, 0x7E, 0x73, 0xC7, 0xAA, 0x8B, 0x73, 0x2F, 0xBF, 0x7C, 0x2D, 0x4E, 0xFD, 0xDD, 0x55, 0x77,
0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58, 0xA3, 0x02, 0x5E, 0x89, 0x57, 0x56, 0x26, 0x58,
0x99, 0x45, 0x82, 0x13, 0xD6, 0xE8, 0xEF, 0x4C, 0x54, 0x44, 0xA9, 0xB7, 0x20, 0x66, 0xAA, 0x2E,
0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62, 0xCD, 0x01, 0x2B, 0xA4, 0xF6, 0x8E, 0x45, 0x62,
}
*/
func TestPrecomputeTableAsm(t *testing.T) {
g := genPrecomputeTable()
for i := 0; i < 16; i++ {
for j := 0; j < 16; j++ {
fmt.Printf("0x%02X, ", g.bytesProductTable[i*16+j])
}
fmt.Println()
}
}
/*
amd64 result = {
7D 13 81 A2 78 ED 2D 5E 91 3E 7F 9A 15 2C 76 DA
}
arm64 result = {
91 3E 7F 9A 15 2C 76 DA 7D 13 81 A2 78 ED 2D 5E
}
*/
func TestGcmSm4Data(t *testing.T) {
g := genPrecomputeTable()
var counter [gcmBlockSize]byte
nonce := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}
gcmSm4Data(&g.bytesProductTable, nonce, &counter)
for j := 0; j < 16; j++ {
fmt.Printf("%02X ", counter[j])
}
fmt.Println()
}
/*
amd64 result = {
8F F3 05 10 EA 99 A8 D7 41 D9 E3 BA 67 D6 18 EE
}
arm64 result = {
8F F3 05 10 EA 99 A8 D7 41 D9 E3 BA 67 D6 18 EE
}
*/
func TestGcmSm4Finish(t *testing.T) {
g := genPrecomputeTable()
var counter, tagMask [gcmBlockSize]byte
nonce := []byte{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13}
gcmSm4Data(&g.bytesProductTable, nonce, &counter)
gcmSm4Finish(&g.bytesProductTable, &tagMask, &counter, uint64(len(nonce)), uint64(0))
for j := 0; j < 16; j++ {
fmt.Printf("%02X ", counter[j])
}
fmt.Println()
}
/*
amd64 result= {
71 F0 B5 6E B6 6A 89 11 98 01 23 72 4B F6 0D 0C
5B 36 17 D5 95 7E B6 42 8C 6A C7 E1 80 76 70 B6
16 3E 35 A0 B7 51 62 AA 1D AF C1 15 2D C4 3B 9D
}
arm64 result = {
98 01 23 72 4B F6 0D 0C 71 F0 B5 6E B6 6A 89 11
8C 6A C7 E1 80 76 70 B6 5B 36 17 D5 95 7E B6 42
16 3E 35 A0 B7 51 62 AA 1D AF C1 15 2D C4 3B 9D
}
*/
func TestBothDataPlaintext(t *testing.T) {
g := genPrecomputeTable()
var tagOut, tagMask [gcmBlockSize]byte
data := []byte("emmansun")
gcmSm4Data(&g.bytesProductTable, data, &tagOut)
for j := 0; j < 16; j++ {
tagMask[j] = byte(j)
}
for j := 0; j < 16; j++ {
fmt.Printf("%02X ", tagOut[j])
}
fmt.Println()
gcmSm4Data(&g.bytesProductTable, []byte("emmansunemmansunemmansunemmansun"), &tagOut)
for j := 0; j < 16; j++ {
fmt.Printf("%02X ", tagOut[j])
}
fmt.Println()
gcmSm4Finish(&g.bytesProductTable, &tagMask, &tagOut, uint64(32), uint64(8))
for j := 0; j < 16; j++ {
fmt.Printf("%02X ", tagOut[j])
}
fmt.Println()
}

View File

@ -75,42 +75,42 @@ var sm4GCMTests = []struct {
"",
"97ce841f7d174d76969fb46b19e742cf28983f4439909cbb6c27662dd4fbbc73",
},
{
{ //#9
"fe47fcce5fc32665d2ae399e4eec72ba",
"5adb9609dbaeb58cbd6e7275",
"7c0e88c88899a779228465074797cd4c2e1498d259b54390b85e3eef1c02df60e743f1b840382c4bccaf3bafb4ca8429bea063",
"88319d6e1d3ffa5f987199166c8a9b56c2aeba5a",
"2276da0e9a4ccaa2a5934c96ba1dc6b0a52b3430ca011b4db4bf6e298b3a58425402952806350fdda7ac20bc38838d7124ee7c333e395b9a94c508b6bf0ce6b2d10d61",
},
{
{ //#10
"ec0c2ba17aa95cd6afffe949da9cc3a8",
"296bce5b50b7d66096d627ef",
"b85b3753535b825cbe5f632c0b843c741351f18aa484281aebec2f45bb9eea2d79d987b764b9611f6c0f8641843d5d58f3a242",
"f8d00f05d22bf68599bcdeb131292ad6e2df5d14",
"3175cd3cb772af34490e4f5203b6a5743cd9b3798c387b7bda2708ff82d520c35d3022767b2d0fe4addff59fb25ead69ca3dd4d73ce1b4cb53a7c4cdc6a4c1fb06c316",
},
{
{ //#11
"2c1f21cf0f6fb3661943155c3e3d8492",
"23cb5ff362e22426984d1907",
"42f758836986954db44bf37c6ef5e4ac0adaf38f27252a1b82d02ea949c8a1a2dbc0d68b5615ba7c1220ff6510e259f06655d8",
"5d3624879d35e46849953e45a32a624d6a6c536ed9857c613b572b0333e701557a713e3f010ecdf9a6bd6c9e3e44b065208645aff4aabee611b391528514170084ccf587177f4488f33cfb5e979e42b6e1cfc0a60238982a7aec",
"9db299bb7f9d6914c4a13589cf41ab014445e4914c1571745d50508bf0f6adeaa41aa4b081a444ee82fed6769da92f5e727d004b21791f961e212a69bfe80af14e7adf",
},
{
{ //#12
"d9f7d2411091f947b4d6f1e2d1f0fb2e",
"e1934f5db57cc983e6b180e7",
"73ed042327f70fe9c572a61545eda8b2a0c6e1d6c291ef19248e973aee6c312012f490c2c6f6166f4a59431e182663fcaea05a",
"0a8a18a7150e940c3d87b38e73baee9a5c049ee21795663e264b694a949822b639092d0e67015e86363583fcf0ca645af9f43375f05fdb4ce84f411dcbca73c2220dea03a20115d2e51398344b16bee1ed7c499b353d6c597af8",
"65c81d83857626a3ec94c913a9f44fa065b6cd61ca5dd6e15e15bb7f16e757202ef966ab1f1e8e6dcbc82f002d29ba6070f53cd79767b1cbcb8cdb656a6a4369f297fc",
},
{
{ //#13
"fe9bb47deb3a61e423c2231841cfd1fb",
"4d328eb776f500a2f7fb47aa",
"f1cc3818e421876bb6b8bbd6c9",
"",
"d8ce306b812aa1b09299ceef804e76b1cb3f736791a5b0d93774d40c2a",
},
{
{ //#14
"6703df3701a7f54911ca72e24dca046a",
"12823ab601c350ea4bc2488c",
"793cd125b0b84a043e3ac67717",
@ -118,49 +118,49 @@ var sm4GCMTests = []struct {
"f42f741a51c02f71a99519f60a55c8dbdcc9a15549158cc1acd6754847",
},
// These cases test non-standard nonce sizes.
{
{ //#15
"1672c3537afa82004c6b8a46f6f0d026",
"05",
"",
"",
"65bde02c20351976153d5d2b49790e30",
},
{
{ //#16
"9a4fea86a621a91ab371e492457796c0",
"75",
"ca6131faf0ff210e4e693d6c31c109fc5b6f54224eb120f37de31dc59ec669b6",
"4f6e2585c161f05a9ae1f2f894e9f0ab52b45d0f",
"b86d6055e7e07a664801ccce38172bf7d91dc20babf2c0662d635cc9111ffefb308ee64ce01afe544b6ee1a65b803cb9",
},
{
{ //#17
"d0f1f4defa1e8c08b4b26d576392027c",
"42b4f01eb9f5a1ea5b1eb73b0fb0baed54f387ecaa0393c7d7dffc6af50146ecc021abf7eb9038d4303d91f8d741a11743166c0860208bcc02c6258fd9511a2fa626f96d60b72fcff773af4e88e7a923506e4916ecbd814651e9f445adef4ad6a6b6c7290cc13b956130eef5b837c939fcac0cbbcc9656cd75b13823ee5acdac",
"",
"",
"1edcf8ea546af4879379e7653c53dddc",
},
{
{ //#18
"4a0c00a3d284dea9d4bf8b8dde86685e",
"f8cbe82588e784bcacbe092cd9089b51e01527297f635bf294b3aa787d91057ef23869789698ac960707857f163ecb242135a228ad93964f5dc4a4d7f88fd7b3b07dd0a5b37f9768fb05a523639f108c34c661498a56879e501a2321c8a4a94d7e1b89db255ac1f685e185263368e99735ebe62a7f2931b47282be8eb165e4d7",
"6d4bf87640a6a48a50d28797b7",
"8d8c7ffc55086d539b5a8f0d1232654c",
"193952a26ab455b3c16db216bb2597cba90a9946dec5b7d085ceb7408e",
},
{
{ //#19
"0e18a844ac5bf38e4cd72d9b0942e506",
"0870d4b28a2954489a0abcd5",
"67c6697351ff4aec29cdbaabf2fbe3467cc254f81be8e78d765a2e63339fc99a66320db73158a35a255d051758e95ed4abb2cdc69bb454110e827441213ddc8770e93ea141e1fc673e017e97eadc6b968f385c2aecb03bfb32af3c54ec18db5c021afe43fbfaaa3afb29d1e6053c7c9475d8be6189f95cbba8990f95b1ebf1b3",
"05eff700e9a13ae5ca0bcbd0484764bd1f231ea81c7b64c514735ac55e4b79633b706424119e09dcaad4acf21b10af3b33cde3504847155cbb6f2219ba9b7df50be11a1c7f23f829f8a41b13b5ca4ee8983238e0794d3d34bc5f4e77facb6c05ac86212baa1a55a2be70b5733b045cd33694b3afe2f0e49e4f321549fd824ea9",
"f492d37084697e941acd69c3d8b53d91760f4bced0fdff529327fb03000b865fbf87133c5816bdafdd23013f1440a30835b7e4d57bb6660e14b438b19b5b07a03f74369f2a11a163e5fcc4fd7ea139982ccf589533011d8efab4a44f6154043099b39f19754a4f434290299c2faa838b92453a1b989f354e7b50ea558daf1f6a88ea50b481a4ffcdd634f324f27cb3f6",
},
{
{ //#20
"1f6c3a3bc0542aabba4ef8f6c7169e73",
"f3584606472b260e0dd2ebb2",
"67c6697351ff4aec29cdbaabf2fbe3467cc254f81be8e78d765a2e63339fc99a66320db73158a35a255d051758e95ed4abb2cdc69bb454110e827441213ddc8770e93ea141e1fc673e017e97eadc6b968f385c2aecb03bfb32af3c54ec18db5c021afe43fbfaaa3afb29d1e6053c7c9475d8be6189f95cbba8990f95b1ebf1b305eff700e9a13ae5ca0bcbd0484764bd1f231ea81c7b64c514735ac55e4b79633b706424119e09dcaad4acf21b10af3b33cde3504847155cbb6f2219ba9b7df50be11a1c7f23f829f8a41b13b5ca4ee8983238e0794d3d34bc5f4e77facb6c05ac86212baa1a55a2be70b5733b045cd33694b3afe2f0e49e4f321549fd824ea90870d4b28a2954489a0abcd50e18a844ac5bf38e4cd72d9b0942e506c433afcda3847f2dadd47647de321cec4ac430f62023856cfbb20704f4ec0bb920ba86c33e05f1ecd96733b79950a3e314d3d934f75ea0f210a8f6059401beb4bc4478fa4969e623d01ada696a7e4c7e5125b34884533a94fb319990325744ee9bbce9e525cf08f5e9e25e5360aad2b2d085fa54d835e8d466826498d9a8877565705a8a3f62802944de7ca5894e5759d351adac869580ec17e485f18c0c66f17cc07cbb22fce466da610b63af62bc83b4692f3affaf271693ac071fb86d11342d8def4f89d4b66335c1c7e4248367d8ed9612ec453902d8e50af89d7709d1a596c1f41f",
"95aa82ca6c49ae90cd1668baac7aa6f2b4a8ca99b2c2372acb08cf61c9c3805e6e0328da4cd76a19edd2d3994c798b0022569ad418d1fee4d9cd45a391c601ffc92ad91501432fee150287617c13629e69fc7281cd7165a63eab49cf714bce3a75a74f76ea7e64ff81eb61fdfec39b67bf0de98c7e4e32bdf97c8c6ac75ba43c02f4b2ed7216ecf3014df000108b67cf99505b179f8ed4980a6103d1bca70dbe9bbfab0ed59801d6e5f2d6f67d3ec5168e212e2daf02c6b963c98a1f7097de0c56891a2b211b01070dd8fd8b16c2a1a4e3cfd292d2984b3561d555d16c33ddc2bcf7edde13efe520c7e2abdda44d81881c531aeeeb66244c3b791ea8acfb6a68",
"c40924873aa2ef1b1b7bf4e16576446b4d24ab529c3f526cdbf7ea1cf64a73f26e4077d1464d1af165b26138ae65281dc3ca0d0998cce7b3c4fe2de5007c5c47ae586016fb11eb1b5ee1f775005b00f2c030c22fbebffc4c7fb3f4ae5b0032e7ab79b3fa48e17bb576486ba73ada0322577efd52b79f229da7e05d00a215ab3a1d717ede7c383c2eff400c4fd13c2eb6dd9e4165f67a7f5260619e459d7d9e2d276f44839ea1ec8bcc460a94b759b12b49f49ba350dab04313953d9ac0a8ac2fdd2b5cbfc70c62cdfaea658427afdc7a8a86c6a3b85c795364077fab193e87965a2cc45cbc82656e62410f027b79276317d7a1a81ebc721af6174f34e7d524c2b333e9802d2ecebec414bbdecd4587bc15079001ef140d65f689bb8f686cd670376d1e579a23fc5d098137ef2f11ec4413fcc308e689f4fcb11bde15c657651ee82694cdb676a286b2059fdf41210eceb9f03c3add1e316495a613d85e9126f4e4ba4565a2465fe578587748476360e353c2cd0e880100be8821ddae242f54efb4e7079420312443834db98e9252456b97cd1925880fffba64b0fcf2c8c05f49e0739c78df846975d99d8072b7c3c2ed5df96cdc3ad3a5dfb9d9fa8a73154765f33ca68a64bfced57391bd54250d5681aa09c28970f1fad0627205a0ea68e02bba7edb8e4f2468d70c879a585461349637639887d41f3206da7421bba36c142947a5bfa91ed341b466f8f6c8c12af0f2",
},
{
{ //#21
"0795d80bc7f40f4d41c280271a2e4f7f",
"ff824c906594aff365d3cb1f",
"1ad4e74d127f935beee57cff920665babe7ce56227377afe570ba786193ded3412d4812453157f42fafc418c02a746c1232c234a639d49baa8f041c12e2ef540027764568ce49886e0d913e28059a3a485c6eee96337a30b28e4cd5612c2961539fa6bc5de034cbedc5fa15db844013e0bef276e27ca7a4faf47a5c1093bd643354108144454d221b3737e6cb87faac36ed131959babe44af2890cfcc4e23ffa24470e689ce0894f5407bb0c8665cff536008ad2ac6f1c9ef8289abd0bd9b72f21c597bda5210cf928c805af2dd4a464d52e36819d521f967bba5386930ab5b4cf4c71746d7e6e964673457348e9d71d170d9eb560bd4bdb779e610ba816bf776231ebd0af5966f5cdab6815944032ab4dd060ad8dab880549e910f1ffcf6862005432afad",
@ -168,35 +168,35 @@ var sm4GCMTests = []struct {
"598798e51e3b70677ee1cd17c25dd6a4752f42aa51b2d055df9992e46afc8e48ac0e99f645bbab4388bc22bc674ecd3bea4f59dbe77a3e33f1b66d751f2772b59eb462443d2de8f27cbf057b8e00c000e2653a597c440cdd3a87a83f7a2f26f3966ba26fc60c05de7da075e635fdd3b5fefa816398855e099ab746278fc57f65b7573f5372a676ca5a9835d0e158f16201ea16fb6685da1829cffc6cea57a9937e822dc6becd7679239c55df5b88caa91522eeb3223dd9357d374a5b3be015624ca21ff667f427d94e9c5cd6e9ec227d3fb2b8c3835dfe5cd8949da744f8d30470a5f36dc33f3f57586ff9e4f117d94b1d1a94318a7cecb61f0386b2e34d4d39e965640e2fc211f34552352ef1df24f409583f82d4b259bf0f9358c3330bea2a2cab2fd303d8cd22abce5339576d8a6736f46589d8",
},
// These cases test non-standard tag sizes.
{
{ //#22
"89c54b0d3bc3c397d5039058c220685f",
"bc7f45c00868758d62d4bb4d",
"582670b0baf5540a3775b6615605bd05",
"48d16cda0337105a50e2ed76fd18e114",
"6e37e818153f115f2fab4c890f3eac139a3ee8b30bf2cbcb54c39ff0651313",
},
{
{ //#23
"bad6049678bf75c9087b3e3ae7e72c13",
"a0a017b83a67d8f1b883e561",
"a1be93012f05a1958440f74a5311f4a1",
"f7c27b51d5367161dc2ff1e9e3edc6f2",
"baa7c826af7983e1824558e7e31d04063543c8a5e80eb58af0e38b7a1581",
},
{
{ //#24
"66a3c722ccf9709525650973ecc100a9",
"1621d42d3a6d42a2d2bf9494",
"61fa9dbbed2190fbc2ffabf5d2ea4ff8",
"d7a9b6523b8827068a6354a6d166c6b9",
"4e920aff4744aef585b81c80fe962231d13d8f7f03e56a06cb33d12491",
},
{
{ //#25
"562ae8aadb8d23e0f271a99a7d1bd4d1",
"f7a5e2399413b89b6ad31aff",
"bbdc3504d803682aa08a773cde5f231a",
"2b9680b886b3efb7c6354b38c63b5373",
"716a4e0150125a51e72f95d900814fc37b0ddba2a85bda1f8819b774",
},
{
{ //#26
"11754cd72aec309bf52f7687212e8957",
"",
"",