mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-22 10:16:18 +08:00
align format
This commit is contained in:
parent
488c0db854
commit
57d899613d
476
sm4/asm_arm64.s
476
sm4/asm_arm64.s
@ -66,179 +66,179 @@ DATA fk_mask<>+0x08(SB)/8, $0xb27022dc677d9197
|
||||
GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
#define SM4_SBOX(x, y) \
|
||||
; \ //############################# inner affine ############################//
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M1L.B16], y.B16; \
|
||||
VUSHR $4, x.D2, x.D2; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \
|
||||
VEOR y.B16, XTMP7.B16, x.B16; \
|
||||
VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \
|
||||
AESE ZERO.B16, x.B16; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M2L.B16], y.B16; \
|
||||
VUSHR $4, x.D2, x.D2; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \
|
||||
VEOR y.B16, XTMP7.B16, x.B16
|
||||
; \ //############################# inner affine ############################//
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M1L.B16], y.B16; \
|
||||
VUSHR $4, x.D2, x.D2; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \
|
||||
VEOR y.B16, XTMP7.B16, x.B16; \
|
||||
VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \
|
||||
AESE ZERO.B16, x.B16; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M2L.B16], y.B16; \
|
||||
VUSHR $4, x.D2, x.D2; \
|
||||
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||
VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \
|
||||
VEOR y.B16, XTMP7.B16, x.B16
|
||||
|
||||
#define SM4_TAO_L1(x, y) \
|
||||
SM4_SBOX(x, y); \
|
||||
; \ //#################### 4 parallel L1 linear transforms ##################//
|
||||
VTBL R08_MASK.B16, [x.B16], y.B16; \
|
||||
VEOR y.B16, x.B16, y.B16; \
|
||||
VTBL R16_MASK.B16, [x.B16], XTMP7.B16; \
|
||||
VEOR XTMP7.B16, y.B16, y.B16; \
|
||||
VSHL $2, y.S4, XTMP7.S4; \
|
||||
VUSHR $30, y.S4, y.S4; \
|
||||
VORR y.B16, XTMP7.B16, y.B16; \
|
||||
VTBL R24_MASK.B16, [x.B16], XTMP7.B16; \
|
||||
VEOR XTMP7.B16, x.B16, x.B16; \
|
||||
VEOR y.B16, x.B16, x.B16
|
||||
SM4_SBOX(x, y); \
|
||||
; \ //#################### 4 parallel L1 linear transforms ##################//
|
||||
VTBL R08_MASK.B16, [x.B16], y.B16; \
|
||||
VEOR y.B16, x.B16, y.B16; \
|
||||
VTBL R16_MASK.B16, [x.B16], XTMP7.B16; \
|
||||
VEOR XTMP7.B16, y.B16, y.B16; \
|
||||
VSHL $2, y.S4, XTMP7.S4; \
|
||||
VUSHR $30, y.S4, y.S4; \
|
||||
VORR y.B16, XTMP7.B16, y.B16; \
|
||||
VTBL R24_MASK.B16, [x.B16], XTMP7.B16; \
|
||||
VEOR XTMP7.B16, x.B16, x.B16; \
|
||||
VEOR y.B16, x.B16, x.B16
|
||||
|
||||
#define SM4_TAO_L2(x, y) \
|
||||
SM4_SBOX(x, y); \
|
||||
; \ //#################### 4 parallel L2 linear transforms ##################//
|
||||
VSHL $13, x.S4, XTMP6.S4; \
|
||||
VUSHR $19, x.S4, y.S4; \
|
||||
VORR XTMP6.B16, y.B16, y.B16; \
|
||||
VSHL $23, x.S4, XTMP6.S4; \
|
||||
VUSHR $9, x.S4, XTMP7.S4; \
|
||||
VORR XTMP6.B16, XTMP7.B16, XTMP7.B16; \
|
||||
VEOR XTMP7.B16, y.B16, y.B16; \
|
||||
VEOR x.B16, y.B16, x.B16
|
||||
SM4_SBOX(x, y); \
|
||||
; \ //#################### 4 parallel L2 linear transforms ##################//
|
||||
VSHL $13, x.S4, XTMP6.S4; \
|
||||
VUSHR $19, x.S4, y.S4; \
|
||||
VORR XTMP6.B16, y.B16, y.B16; \
|
||||
VSHL $23, x.S4, XTMP6.S4; \
|
||||
VUSHR $9, x.S4, XTMP7.S4; \
|
||||
VORR XTMP6.B16, XTMP7.B16, XTMP7.B16; \
|
||||
VEOR XTMP7.B16, y.B16, y.B16; \
|
||||
VEOR x.B16, y.B16, x.B16
|
||||
|
||||
#define SM4_ROUND(RK, x, y, t0, t1, t2, t3) \
|
||||
MOVW.P 4(RK), R19; \
|
||||
VMOV R19, x.S4; \
|
||||
VEOR t1.B16, x.B16, x.B16; \
|
||||
VEOR t2.B16, x.B16, x.B16; \
|
||||
VEOR t3.B16, x.B16, x.B16; \
|
||||
SM4_TAO_L1(x, y); \
|
||||
VEOR x.B16, t0.B16, t0.B16
|
||||
MOVW.P 4(RK), R19; \
|
||||
VMOV R19, x.S4; \
|
||||
VEOR t1.B16, x.B16, x.B16; \
|
||||
VEOR t2.B16, x.B16, x.B16; \
|
||||
VEOR t3.B16, x.B16, x.B16; \
|
||||
SM4_TAO_L1(x, y); \
|
||||
VEOR x.B16, t0.B16, t0.B16
|
||||
|
||||
#define SM4_EXPANDKEY_ROUND(x, y, t0, t1, t2, t3) \
|
||||
MOVW.P 4(R9), R19; \
|
||||
VMOV R19, x.S[0]; \
|
||||
VEOR t1.B16, x.B16, x.B16; \
|
||||
VEOR t2.B16, x.B16, x.B16; \
|
||||
VEOR t3.B16, x.B16, x.B16; \
|
||||
SM4_TAO_L2(x, y); \
|
||||
VEOR x.B16, t0.B16, t0.B16; \
|
||||
VMOV t0.S[0], R2; \
|
||||
MOVW.P R2, 4(R10); \
|
||||
MOVW.P R2, -4(R11)
|
||||
MOVW.P 4(R9), R19; \
|
||||
VMOV R19, x.S[0]; \
|
||||
VEOR t1.B16, x.B16, x.B16; \
|
||||
VEOR t2.B16, x.B16, x.B16; \
|
||||
VEOR t3.B16, x.B16, x.B16; \
|
||||
SM4_TAO_L2(x, y); \
|
||||
VEOR x.B16, t0.B16, t0.B16; \
|
||||
VMOV t0.S[0], R2; \
|
||||
MOVW.P R2, 4(R10); \
|
||||
MOVW.P R2, -4(R11)
|
||||
|
||||
#define load_global_data_1() \
|
||||
LDP nibble_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, NIBBLE_MASK.D[0] \
|
||||
VMOV R1, NIBBLE_MASK.D[1] \
|
||||
LDP m1_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M1L.D[0] \
|
||||
VMOV R1, M1L.D[1] \
|
||||
LDP m1_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M1H.D[0] \
|
||||
VMOV R1, M1H.D[1] \
|
||||
LDP m2_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M2L.D[0] \
|
||||
VMOV R1, M2L.D[1] \
|
||||
LDP m2_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M2H.D[0] \
|
||||
VMOV R1, M2H.D[1] \
|
||||
LDP fk_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, FK_MASK.D[0] \
|
||||
VMOV R1, FK_MASK.D[1] \
|
||||
LDP inverse_shift_rows<>(SB), (R0, R1) \
|
||||
VMOV R0, INVERSE_SHIFT_ROWS.D[0] \
|
||||
VMOV R1, INVERSE_SHIFT_ROWS.D[1]
|
||||
LDP nibble_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, NIBBLE_MASK.D[0] \
|
||||
VMOV R1, NIBBLE_MASK.D[1] \
|
||||
LDP m1_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M1L.D[0] \
|
||||
VMOV R1, M1L.D[1] \
|
||||
LDP m1_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M1H.D[0] \
|
||||
VMOV R1, M1H.D[1] \
|
||||
LDP m2_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M2L.D[0] \
|
||||
VMOV R1, M2L.D[1] \
|
||||
LDP m2_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M2H.D[0] \
|
||||
VMOV R1, M2H.D[1] \
|
||||
LDP fk_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, FK_MASK.D[0] \
|
||||
VMOV R1, FK_MASK.D[1] \
|
||||
LDP inverse_shift_rows<>(SB), (R0, R1) \
|
||||
VMOV R0, INVERSE_SHIFT_ROWS.D[0] \
|
||||
VMOV R1, INVERSE_SHIFT_ROWS.D[1]
|
||||
|
||||
#define load_global_data_2() \
|
||||
load_global_data_1() \
|
||||
LDP r08_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R08_MASK.D[0] \
|
||||
VMOV R1, R08_MASK.D[1] \
|
||||
LDP r16_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R16_MASK.D[0] \
|
||||
VMOV R1, R16_MASK.D[1] \
|
||||
LDP r24_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R24_MASK.D[0] \
|
||||
VMOV R1, R24_MASK.D[1]
|
||||
load_global_data_1() \
|
||||
LDP r08_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R08_MASK.D[0] \
|
||||
VMOV R1, R08_MASK.D[1] \
|
||||
LDP r16_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R16_MASK.D[0] \
|
||||
VMOV R1, R16_MASK.D[1] \
|
||||
LDP r24_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R24_MASK.D[0] \
|
||||
VMOV R1, R24_MASK.D[1]
|
||||
|
||||
#define SM4EKEY_EXPORT_KEYS() \
|
||||
VMOV V9.S[3], V10.S[0] \
|
||||
VMOV V9.S[2], V10.S[1] \
|
||||
VMOV V9.S[1], V10.S[2] \
|
||||
VMOV V9.S[0], V10.S[3] \
|
||||
VMOV V8.S[3], V11.S[0] \
|
||||
VMOV V8.S[2], V11.S[1] \
|
||||
VMOV V8.S[1], V11.S[2] \
|
||||
VMOV V8.S[0], V11.S[3] \
|
||||
VST1.P [V8.S4, V9.S4], 32(R10) \
|
||||
VST1 [V10.S4, V11.S4], (R11) \
|
||||
SUB $32, R11, R11
|
||||
VMOV V9.S[3], V10.S[0] \
|
||||
VMOV V9.S[2], V10.S[1] \
|
||||
VMOV V9.S[1], V10.S[2] \
|
||||
VMOV V9.S[0], V10.S[3] \
|
||||
VMOV V8.S[3], V11.S[0] \
|
||||
VMOV V8.S[2], V11.S[1] \
|
||||
VMOV V8.S[1], V11.S[2] \
|
||||
VMOV V8.S[0], V11.S[3] \
|
||||
VST1.P [V8.S4, V9.S4], 32(R10) \
|
||||
VST1 [V10.S4, V11.S4], (R11) \
|
||||
SUB $32, R11, R11
|
||||
|
||||
#define SM4E_ROUND() \
|
||||
VLD1.P 16(R10), [V8.B16] \
|
||||
VREV32 V8.B16, V8.B16 \
|
||||
WORD $0x0884c0ce \
|
||||
WORD $0x2884c0ce \
|
||||
WORD $0x4884c0ce \
|
||||
WORD $0x6884c0ce \
|
||||
WORD $0x8884c0ce \
|
||||
WORD $0xa884c0ce \
|
||||
WORD $0xc884c0ce \
|
||||
WORD $0xe884c0ce \
|
||||
VREV32 V8.B16, V8.B16 \
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
VLD1.P 16(R10), [V8.B16] \
|
||||
VREV32 V8.B16, V8.B16 \
|
||||
WORD $0x0884c0ce \
|
||||
WORD $0x2884c0ce \
|
||||
WORD $0x4884c0ce \
|
||||
WORD $0x6884c0ce \
|
||||
WORD $0x8884c0ce \
|
||||
WORD $0xa884c0ce \
|
||||
WORD $0xc884c0ce \
|
||||
WORD $0xe884c0ce \
|
||||
VREV32 V8.B16, V8.B16 \
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
|
||||
// func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int)
|
||||
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
||||
MOVD key+0(FP), R8
|
||||
MOVD ck+8(FP), R9
|
||||
MOVD enc+16(FP), R10
|
||||
MOVD dec+24(FP), R11
|
||||
MOVD inst+32(FP), R12
|
||||
MOVD key+0(FP), R8
|
||||
MOVD ck+8(FP), R9
|
||||
MOVD enc+16(FP), R10
|
||||
MOVD dec+24(FP), R11
|
||||
MOVD inst+32(FP), R12
|
||||
|
||||
CMP $1, R12
|
||||
BEQ sm4ekey
|
||||
CMP $1, R12
|
||||
BEQ sm4ekey
|
||||
|
||||
load_global_data_1()
|
||||
load_global_data_1()
|
||||
|
||||
VLD1 (R8), [t0.B16]
|
||||
VREV32 t0.B16, t0.B16
|
||||
VEOR t0.B16, FK_MASK.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
VMOV t0.S[3], t3.S[0]
|
||||
VLD1 (R8), [t0.B16]
|
||||
VREV32 t0.B16, t0.B16
|
||||
VEOR t0.B16, FK_MASK.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
VMOV t0.S[3], t3.S[0]
|
||||
|
||||
EOR R0, R0
|
||||
ADD $124, R11
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
EOR R0, R0
|
||||
ADD $124, R11
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
|
||||
ksLoop:
|
||||
SM4_EXPANDKEY_ROUND(x, y, t0, t1, t2, t3)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t1, t2, t3, t0)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t2, t3, t0, t1)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t3, t0, t1, t2)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t0, t1, t2, t3)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t1, t2, t3, t0)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t2, t3, t0, t1)
|
||||
SM4_EXPANDKEY_ROUND(x, y, t3, t0, t1, t2)
|
||||
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE ksLoop
|
||||
RET
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE ksLoop
|
||||
RET
|
||||
|
||||
sm4ekey:
|
||||
LDP fk_mask<>(SB), (R0, R1)
|
||||
VMOV R0, FK_MASK.D[0]
|
||||
VMOV R1, FK_MASK.D[1]
|
||||
LDP fk_mask<>(SB), (R0, R1)
|
||||
VMOV R0, FK_MASK.D[0]
|
||||
VMOV R1, FK_MASK.D[1]
|
||||
VLD1 (R8), [V9.B16]
|
||||
VREV32 V9.B16, V9.B16
|
||||
VEOR FK_MASK.B16, V9.B16, V9.B16
|
||||
ADD $96, R11
|
||||
ADD $96, R11
|
||||
|
||||
VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||
WORD $0x28c960ce //SM4EKEY V8.4S, V9.4S, V0.4S
|
||||
WORD $0x09c961ce //SM4EKEY V9.4S, V8.4S, V1.4S
|
||||
SM4EKEY_EXPORT_KEYS()
|
||||
SM4EKEY_EXPORT_KEYS()
|
||||
|
||||
WORD $0x28c962ce //SM4EKEY V8.4S, V9.4S, V2.4S
|
||||
WORD $0x09c963ce //SM4EKEY V9.4S, V8.4S, V3.4S
|
||||
@ -252,141 +252,141 @@ sm4ekey:
|
||||
WORD $0x28c962ce //SM4EKEY V8.4S, V9.4S, V2.4S
|
||||
WORD $0x09c963ce //SM4EKEY V9.4S, V8.4S, V3.4S
|
||||
SM4EKEY_EXPORT_KEYS()
|
||||
RET
|
||||
RET
|
||||
|
||||
// func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int)
|
||||
TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
||||
MOVD xk+0(FP), R8
|
||||
MOVD dst+8(FP), R9
|
||||
MOVD src+32(FP), R10
|
||||
MOVD src_len+40(FP), R12
|
||||
MOVD inst+56(FP), R11
|
||||
MOVD xk+0(FP), R8
|
||||
MOVD dst+8(FP), R9
|
||||
MOVD src+32(FP), R10
|
||||
MOVD src_len+40(FP), R12
|
||||
MOVD inst+56(FP), R11
|
||||
|
||||
CMP $1, R11
|
||||
BEQ sm4niblocks
|
||||
CMP $1, R11
|
||||
BEQ sm4niblocks
|
||||
|
||||
VLD1 (R10), [V5.S4, V6.S4, V7.S4, V8.S4]
|
||||
VMOV V5.S[0], t0.S[0]
|
||||
VMOV V5.S[1], t1.S[0]
|
||||
VMOV V5.S[2], t2.S[0]
|
||||
VMOV V5.S[3], t3.S[0]
|
||||
VLD1 (R10), [V5.S4, V6.S4, V7.S4, V8.S4]
|
||||
VMOV V5.S[0], t0.S[0]
|
||||
VMOV V5.S[1], t1.S[0]
|
||||
VMOV V5.S[2], t2.S[0]
|
||||
VMOV V5.S[3], t3.S[0]
|
||||
|
||||
VMOV V6.S[0], t0.S[1]
|
||||
VMOV V6.S[1], t1.S[1]
|
||||
VMOV V6.S[2], t2.S[1]
|
||||
VMOV V6.S[3], t3.S[1]
|
||||
VMOV V6.S[0], t0.S[1]
|
||||
VMOV V6.S[1], t1.S[1]
|
||||
VMOV V6.S[2], t2.S[1]
|
||||
VMOV V6.S[3], t3.S[1]
|
||||
|
||||
VMOV V7.S[0], t0.S[2]
|
||||
VMOV V7.S[1], t1.S[2]
|
||||
VMOV V7.S[2], t2.S[2]
|
||||
VMOV V7.S[3], t3.S[2]
|
||||
VMOV V7.S[0], t0.S[2]
|
||||
VMOV V7.S[1], t1.S[2]
|
||||
VMOV V7.S[2], t2.S[2]
|
||||
VMOV V7.S[3], t3.S[2]
|
||||
|
||||
VMOV V8.S[0], t0.S[3]
|
||||
VMOV V8.S[1], t1.S[3]
|
||||
VMOV V8.S[2], t2.S[3]
|
||||
VMOV V8.S[3], t3.S[3]
|
||||
VMOV V8.S[0], t0.S[3]
|
||||
VMOV V8.S[1], t1.S[3]
|
||||
VMOV V8.S[2], t2.S[3]
|
||||
VMOV V8.S[3], t3.S[3]
|
||||
|
||||
load_global_data_2()
|
||||
load_global_data_2()
|
||||
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
EOR R0, R0
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
EOR R0, R0
|
||||
|
||||
encryptBlocksLoop:
|
||||
SM4_ROUND(R8, x, y, t0, t1, t2, t3)
|
||||
SM4_ROUND(R8, x, y, t1, t2, t3, t0)
|
||||
SM4_ROUND(R8, x, y, t2, t3, t0, t1)
|
||||
SM4_ROUND(R8, x, y, t3, t0, t1, t2)
|
||||
SM4_ROUND(R8, x, y, t0, t1, t2, t3)
|
||||
SM4_ROUND(R8, x, y, t1, t2, t3, t0)
|
||||
SM4_ROUND(R8, x, y, t2, t3, t0, t1)
|
||||
SM4_ROUND(R8, x, y, t3, t0, t1, t2)
|
||||
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE encryptBlocksLoop
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE encryptBlocksLoop
|
||||
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
|
||||
VMOV t3.S[0], V8.S[0]
|
||||
VMOV t2.S[0], V8.S[1]
|
||||
VMOV t1.S[0], V8.S[2]
|
||||
VMOV t0.S[0], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
VMOV t3.S[0], V8.S[0]
|
||||
VMOV t2.S[0], V8.S[1]
|
||||
VMOV t1.S[0], V8.S[2]
|
||||
VMOV t0.S[0], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
|
||||
VMOV t3.S[1], V8.S[0]
|
||||
VMOV t2.S[1], V8.S[1]
|
||||
VMOV t1.S[1], V8.S[2]
|
||||
VMOV t0.S[1], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
VMOV t3.S[1], V8.S[0]
|
||||
VMOV t2.S[1], V8.S[1]
|
||||
VMOV t1.S[1], V8.S[2]
|
||||
VMOV t0.S[1], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
|
||||
VMOV t3.S[2], V8.S[0]
|
||||
VMOV t2.S[2], V8.S[1]
|
||||
VMOV t1.S[2], V8.S[2]
|
||||
VMOV t0.S[2], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
VMOV t3.S[2], V8.S[0]
|
||||
VMOV t2.S[2], V8.S[1]
|
||||
VMOV t1.S[2], V8.S[2]
|
||||
VMOV t0.S[2], V8.S[3]
|
||||
VST1.P [V8.B16], 16(R9)
|
||||
|
||||
VMOV t3.S[3], V8.S[0]
|
||||
VMOV t2.S[3], V8.S[1]
|
||||
VMOV t1.S[3], V8.S[2]
|
||||
VMOV t0.S[3], V8.S[3]
|
||||
VST1 [V8.B16], (R9)
|
||||
RET
|
||||
VMOV t3.S[3], V8.S[0]
|
||||
VMOV t2.S[3], V8.S[1]
|
||||
VMOV t1.S[3], V8.S[2]
|
||||
VMOV t0.S[3], V8.S[3]
|
||||
VST1 [V8.B16], (R9)
|
||||
RET
|
||||
|
||||
sm4niblocks:
|
||||
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||
VLD1.P 64(R8), [V4.S4, V5.S4, V6.S4, V7.S4]
|
||||
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||
VLD1.P 64(R8), [V4.S4, V5.S4, V6.S4, V7.S4]
|
||||
sm4niblockloop:
|
||||
SM4E_ROUND()
|
||||
SM4E_ROUND()
|
||||
SUB $16, R12, R12 // message length - 16bytes, then compare with 16bytes
|
||||
CBNZ R12, sm4niblockloop
|
||||
RET
|
||||
RET
|
||||
|
||||
// func encryptBlockAsm(xk *uint32, dst, src *byte, inst int)
|
||||
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
||||
MOVD xk+0(FP), R8
|
||||
MOVD dst+8(FP), R9
|
||||
MOVD src+16(FP), R10
|
||||
MOVD inst+24(FP), R11
|
||||
MOVD xk+0(FP), R8
|
||||
MOVD dst+8(FP), R9
|
||||
MOVD src+16(FP), R10
|
||||
MOVD inst+24(FP), R11
|
||||
|
||||
CMP $1, R11
|
||||
BEQ sm4niblock
|
||||
CMP $1, R11
|
||||
BEQ sm4niblock
|
||||
|
||||
VLD1 (R10), [t0.S4]
|
||||
VREV32 t0.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
VMOV t0.S[3], t3.S[0]
|
||||
VLD1 (R10), [t0.S4]
|
||||
VREV32 t0.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
VMOV t0.S[3], t3.S[0]
|
||||
|
||||
load_global_data_2()
|
||||
load_global_data_2()
|
||||
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
EOR R0, R0
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
EOR R0, R0
|
||||
|
||||
encryptBlockLoop:
|
||||
SM4_ROUND(R8, x, y, t0, t1, t2, t3)
|
||||
SM4_ROUND(R8, x, y, t1, t2, t3, t0)
|
||||
SM4_ROUND(R8, x, y, t2, t3, t0, t1)
|
||||
SM4_ROUND(R8, x, y, t3, t0, t1, t2)
|
||||
SM4_ROUND(R8, x, y, t0, t1, t2, t3)
|
||||
SM4_ROUND(R8, x, y, t1, t2, t3, t0)
|
||||
SM4_ROUND(R8, x, y, t2, t3, t0, t1)
|
||||
SM4_ROUND(R8, x, y, t3, t0, t1, t2)
|
||||
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE encryptBlockLoop
|
||||
ADD $16, R0
|
||||
CMP $128, R0
|
||||
BNE encryptBlockLoop
|
||||
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
VREV32 t3.B16, t3.B16
|
||||
|
||||
VMOV t3.S[0], V8.S[0]
|
||||
VMOV t2.S[0], V8.S[1]
|
||||
VMOV t1.S[0], V8.S[2]
|
||||
VMOV t0.S[0], V8.S[3]
|
||||
VST1 [V8.B16], (R9)
|
||||
RET
|
||||
VMOV t3.S[0], V8.S[0]
|
||||
VMOV t2.S[0], V8.S[1]
|
||||
VMOV t1.S[0], V8.S[2]
|
||||
VMOV t0.S[0], V8.S[3]
|
||||
VST1 [V8.B16], (R9)
|
||||
RET
|
||||
|
||||
sm4niblock:
|
||||
VLD1 (R10), [V8.B16]
|
||||
@ -403,4 +403,4 @@ sm4niblock:
|
||||
WORD $0x6884c0ce //SM4E V8.4S, V3.4S
|
||||
VREV32 V8.B16, V8.B16
|
||||
VST1 [V8.B16], (R9)
|
||||
RET
|
||||
RET
|
||||
|
@ -60,73 +60,73 @@
|
||||
VEOR T3.B16, ACCM.B16, ACCM.B16
|
||||
|
||||
#define sm4eEnc1block() \
|
||||
WORD $0x6086c0ce \ //SM4E V0.4S, V19.4S
|
||||
WORD $0x8086c0ce \ //SM4E V0.4S, V20.4S
|
||||
WORD $0xa086c0ce \ //SM4E V0.4S, V21.4S
|
||||
WORD $0xc086c0ce \ //SM4E V0.4S, V22.4S
|
||||
WORD $0xe086c0ce \ //SM4E V0.4S, V23.4S
|
||||
WORD $0x0087c0ce \ //SM4E V0.4S, V24.4S
|
||||
WORD $0x2087c0ce \ //SM4E V0.4S, V25.4S
|
||||
WORD $0x4087c0ce //SM4E V0.4S, V26.4S
|
||||
WORD $0x6086c0ce \ //SM4E V0.4S, V19.4S
|
||||
WORD $0x8086c0ce \ //SM4E V0.4S, V20.4S
|
||||
WORD $0xa086c0ce \ //SM4E V0.4S, V21.4S
|
||||
WORD $0xc086c0ce \ //SM4E V0.4S, V22.4S
|
||||
WORD $0xe086c0ce \ //SM4E V0.4S, V23.4S
|
||||
WORD $0x0087c0ce \ //SM4E V0.4S, V24.4S
|
||||
WORD $0x2087c0ce \ //SM4E V0.4S, V25.4S
|
||||
WORD $0x4087c0ce //SM4E V0.4S, V26.4S
|
||||
|
||||
#define sm4eEnc8blocks() \
|
||||
sm4eEnc1block() \
|
||||
WORD $0x6186c0ce \ //SM4E V1.4S, V19.4S
|
||||
WORD $0x8186c0ce \ //SM4E V1.4S, V20.4S
|
||||
WORD $0xa186c0ce \ //SM4E V1.4S, V21.4S
|
||||
WORD $0xc186c0ce \ //SM4E V1.4S, V22.4S
|
||||
WORD $0xe186c0ce \ //SM4E V1.4S, V23.4S
|
||||
WORD $0x0187c0ce \ //SM4E V1.4S, V24.4S
|
||||
WORD $0x2187c0ce \ //SM4E V1.4S, V25.4S
|
||||
WORD $0x4187c0ce \ //SM4E V1.4S, V26.4S
|
||||
WORD $0x6286c0ce \ //SM4E V2.4S, V19.4S
|
||||
WORD $0x8286c0ce \ //SM4E V2.4S, V20.4S
|
||||
WORD $0xa286c0ce \ //SM4E V2.4S, V21.4S
|
||||
WORD $0xc286c0ce \ //SM4E V2.4S, V22.4S
|
||||
WORD $0xe286c0ce \ //SM4E V2.4S, V23.4S
|
||||
WORD $0x0287c0ce \ //SM4E V2.4S, V24.4S
|
||||
WORD $0x2287c0ce \ //SM4E V2.4S, V25.4S
|
||||
WORD $0x4287c0ce \ //SM4E V2.4S, V26.4S
|
||||
WORD $0x6386c0ce \ //SM4E V3.4S, V19.4S
|
||||
WORD $0x8386c0ce \ //SM4E V3.4S, V20.4S
|
||||
WORD $0xa386c0ce \ //SM4E V3.4S, V21.4S
|
||||
WORD $0xc386c0ce \ //SM4E V3.4S, V22.4S
|
||||
WORD $0xe386c0ce \ //SM4E V3.4S, V23.4S
|
||||
WORD $0x0387c0ce \ //SM4E V3.4S, V24.4S
|
||||
WORD $0x2387c0ce \ //SM4E V3.4S, V25.4S
|
||||
WORD $0x4387c0ce \ //SM4E V3.4S, V26.4S
|
||||
WORD $0x6486c0ce \ //SM4E V4.4S, V19.4S
|
||||
WORD $0x8486c0ce \ //SM4E V4.4S, V20.4S
|
||||
WORD $0xa486c0ce \ //SM4E V4.4S, V21.4S
|
||||
WORD $0xc486c0ce \ //SM4E V4.4S, V22.4S
|
||||
WORD $0xe486c0ce \ //SM4E V4.4S, V23.4S
|
||||
WORD $0x0487c0ce \ //SM4E V4.4S, V24.4S
|
||||
WORD $0x2487c0ce \ //SM4E V4.4S, V25.4S
|
||||
WORD $0x4487c0ce \ //SM4E V4.4S, V26.4S
|
||||
WORD $0x6586c0ce \ //SM4E V5.4S, V19.4S
|
||||
WORD $0x8586c0ce \ //SM4E V5.4S, V20.4S
|
||||
WORD $0xa586c0ce \ //SM4E V5.4S, V21.4S
|
||||
WORD $0xc586c0ce \ //SM4E V5.4S, V22.4S
|
||||
WORD $0xe586c0ce \ //SM4E V5.4S, V23.4S
|
||||
WORD $0x0587c0ce \ //SM4E V5.4S, V24.4S
|
||||
WORD $0x2587c0ce \ //SM4E V5.4S, V25.4S
|
||||
WORD $0x4587c0ce \ //SM4E V5.4S, V26.4S
|
||||
WORD $0x6686c0ce \ //SM4E V6.4S, V19.4S
|
||||
WORD $0x8686c0ce \ //SM4E V6.4S, V20.4S
|
||||
WORD $0xa686c0ce \ //SM4E V6.4S, V21.4S
|
||||
WORD $0xc686c0ce \ //SM4E V6.4S, V22.4S
|
||||
WORD $0xe686c0ce \ //SM4E V6.4S, V23.4S
|
||||
WORD $0x0687c0ce \ //SM4E V6.4S, V24.4S
|
||||
WORD $0x2687c0ce \ //SM4E V6.4S, V25.4S
|
||||
WORD $0x4687c0ce \ //SM4E V6.4S, V26.4S
|
||||
WORD $0x6786c0ce \ //SM4E V7.4S, V19.4S
|
||||
WORD $0x8786c0ce \ //SM4E V7.4S, V20.4S
|
||||
WORD $0xa786c0ce \ //SM4E V7.4S, V21.4S
|
||||
WORD $0xc786c0ce \ //SM4E V7.4S, V22.4S
|
||||
WORD $0xe786c0ce \ //SM4E V7.4S, V23.4S
|
||||
WORD $0x0787c0ce \ //SM4E V7.4S, V24.4S
|
||||
WORD $0x2787c0ce \ //SM4E V7.4S, V25.4S
|
||||
WORD $0x4787c0ce //SM4E V7.4S, V26.4S
|
||||
sm4eEnc1block() \
|
||||
WORD $0x6186c0ce \ //SM4E V1.4S, V19.4S
|
||||
WORD $0x8186c0ce \ //SM4E V1.4S, V20.4S
|
||||
WORD $0xa186c0ce \ //SM4E V1.4S, V21.4S
|
||||
WORD $0xc186c0ce \ //SM4E V1.4S, V22.4S
|
||||
WORD $0xe186c0ce \ //SM4E V1.4S, V23.4S
|
||||
WORD $0x0187c0ce \ //SM4E V1.4S, V24.4S
|
||||
WORD $0x2187c0ce \ //SM4E V1.4S, V25.4S
|
||||
WORD $0x4187c0ce \ //SM4E V1.4S, V26.4S
|
||||
WORD $0x6286c0ce \ //SM4E V2.4S, V19.4S
|
||||
WORD $0x8286c0ce \ //SM4E V2.4S, V20.4S
|
||||
WORD $0xa286c0ce \ //SM4E V2.4S, V21.4S
|
||||
WORD $0xc286c0ce \ //SM4E V2.4S, V22.4S
|
||||
WORD $0xe286c0ce \ //SM4E V2.4S, V23.4S
|
||||
WORD $0x0287c0ce \ //SM4E V2.4S, V24.4S
|
||||
WORD $0x2287c0ce \ //SM4E V2.4S, V25.4S
|
||||
WORD $0x4287c0ce \ //SM4E V2.4S, V26.4S
|
||||
WORD $0x6386c0ce \ //SM4E V3.4S, V19.4S
|
||||
WORD $0x8386c0ce \ //SM4E V3.4S, V20.4S
|
||||
WORD $0xa386c0ce \ //SM4E V3.4S, V21.4S
|
||||
WORD $0xc386c0ce \ //SM4E V3.4S, V22.4S
|
||||
WORD $0xe386c0ce \ //SM4E V3.4S, V23.4S
|
||||
WORD $0x0387c0ce \ //SM4E V3.4S, V24.4S
|
||||
WORD $0x2387c0ce \ //SM4E V3.4S, V25.4S
|
||||
WORD $0x4387c0ce \ //SM4E V3.4S, V26.4S
|
||||
WORD $0x6486c0ce \ //SM4E V4.4S, V19.4S
|
||||
WORD $0x8486c0ce \ //SM4E V4.4S, V20.4S
|
||||
WORD $0xa486c0ce \ //SM4E V4.4S, V21.4S
|
||||
WORD $0xc486c0ce \ //SM4E V4.4S, V22.4S
|
||||
WORD $0xe486c0ce \ //SM4E V4.4S, V23.4S
|
||||
WORD $0x0487c0ce \ //SM4E V4.4S, V24.4S
|
||||
WORD $0x2487c0ce \ //SM4E V4.4S, V25.4S
|
||||
WORD $0x4487c0ce \ //SM4E V4.4S, V26.4S
|
||||
WORD $0x6586c0ce \ //SM4E V5.4S, V19.4S
|
||||
WORD $0x8586c0ce \ //SM4E V5.4S, V20.4S
|
||||
WORD $0xa586c0ce \ //SM4E V5.4S, V21.4S
|
||||
WORD $0xc586c0ce \ //SM4E V5.4S, V22.4S
|
||||
WORD $0xe586c0ce \ //SM4E V5.4S, V23.4S
|
||||
WORD $0x0587c0ce \ //SM4E V5.4S, V24.4S
|
||||
WORD $0x2587c0ce \ //SM4E V5.4S, V25.4S
|
||||
WORD $0x4587c0ce \ //SM4E V5.4S, V26.4S
|
||||
WORD $0x6686c0ce \ //SM4E V6.4S, V19.4S
|
||||
WORD $0x8686c0ce \ //SM4E V6.4S, V20.4S
|
||||
WORD $0xa686c0ce \ //SM4E V6.4S, V21.4S
|
||||
WORD $0xc686c0ce \ //SM4E V6.4S, V22.4S
|
||||
WORD $0xe686c0ce \ //SM4E V6.4S, V23.4S
|
||||
WORD $0x0687c0ce \ //SM4E V6.4S, V24.4S
|
||||
WORD $0x2687c0ce \ //SM4E V6.4S, V25.4S
|
||||
WORD $0x4687c0ce \ //SM4E V6.4S, V26.4S
|
||||
WORD $0x6786c0ce \ //SM4E V7.4S, V19.4S
|
||||
WORD $0x8786c0ce \ //SM4E V7.4S, V20.4S
|
||||
WORD $0xa786c0ce \ //SM4E V7.4S, V21.4S
|
||||
WORD $0xc786c0ce \ //SM4E V7.4S, V22.4S
|
||||
WORD $0xe786c0ce \ //SM4E V7.4S, V23.4S
|
||||
WORD $0x0787c0ce \ //SM4E V7.4S, V24.4S
|
||||
WORD $0x2787c0ce \ //SM4E V7.4S, V25.4S
|
||||
WORD $0x4787c0ce //SM4E V7.4S, V26.4S
|
||||
|
||||
// func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
|
||||
TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
|
||||
@ -193,15 +193,15 @@ octetsLoop:
|
||||
VADD B6.S4, INC.S4, B7.S4
|
||||
VADD B7.S4, INC.S4, CTR.S4
|
||||
|
||||
sm4eEnc8blocks()
|
||||
VREV32 B0.B16, B0.B16
|
||||
VREV32 B1.B16, B1.B16
|
||||
VREV32 B2.B16, B2.B16
|
||||
VREV32 B3.B16, B3.B16
|
||||
VREV32 B4.B16, B4.B16
|
||||
VREV32 B5.B16, B5.B16
|
||||
VREV32 B6.B16, B6.B16
|
||||
VREV32 B7.B16, B7.B16
|
||||
sm4eEnc8blocks()
|
||||
VREV32 B0.B16, B0.B16
|
||||
VREV32 B1.B16, B1.B16
|
||||
VREV32 B2.B16, B2.B16
|
||||
VREV32 B3.B16, B3.B16
|
||||
VREV32 B4.B16, B4.B16
|
||||
VREV32 B5.B16, B5.B16
|
||||
VREV32 B6.B16, B6.B16
|
||||
VREV32 B7.B16, B7.B16
|
||||
|
||||
// XOR plaintext and store ciphertext
|
||||
VLD1.P 32(srcPtr), [T1.B16, T2.B16]
|
||||
@ -254,14 +254,14 @@ singlesLoop:
|
||||
BLT tail
|
||||
SUB $16, srcPtrLen
|
||||
|
||||
VMOV CTR.B16, B0.B16
|
||||
VMOV CTR.B16, B0.B16
|
||||
VADD CTR.S4, INC.S4, CTR.S4
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
|
||||
singlesLast:
|
||||
VLD1.P 16(srcPtr), [T0.B16]
|
||||
VEOR T0.B16, B0.B16, B0.B16
|
||||
VLD1.P 16(srcPtr), [T0.B16]
|
||||
VEOR T0.B16, B0.B16, B0.B16
|
||||
|
||||
encReduce:
|
||||
VST1.P [B0.B16], 16(dstPtr)
|
||||
@ -315,9 +315,9 @@ ld1:
|
||||
VMOV H1, T3.B[0]
|
||||
ld0:
|
||||
MOVD ZR, srcPtrLen
|
||||
VMOV CTR.B16, B0.B16
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
VMOV CTR.B16, B0.B16
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
|
||||
tailLast:
|
||||
VEOR T0.B16, B0.B16, B0.B16
|
||||
@ -326,7 +326,7 @@ tailLast:
|
||||
|
||||
done:
|
||||
VST1 [ACC0.B16], (tPtr)
|
||||
RET
|
||||
RET
|
||||
|
||||
// func gcmSm4niDec(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
|
||||
TEXT ·gcmSm4niDec(SB),NOSPLIT,$0
|
||||
@ -381,15 +381,15 @@ octetsLoop:
|
||||
VADD B6.S4, INC.S4, B7.S4
|
||||
VADD B7.S4, INC.S4, CTR.S4
|
||||
|
||||
sm4eEnc8blocks()
|
||||
sm4eEnc8blocks()
|
||||
VREV32 B0.B16, T1.B16
|
||||
VREV32 B1.B16, T2.B16
|
||||
VREV32 B2.B16, B2.B16
|
||||
VREV32 B3.B16, B3.B16
|
||||
VREV32 B4.B16, B4.B16
|
||||
VREV32 B5.B16, B5.B16
|
||||
VREV32 B6.B16, B6.B16
|
||||
VREV32 B7.B16, B7.B16
|
||||
VREV32 B2.B16, B2.B16
|
||||
VREV32 B3.B16, B3.B16
|
||||
VREV32 B4.B16, B4.B16
|
||||
VREV32 B5.B16, B5.B16
|
||||
VREV32 B6.B16, B6.B16
|
||||
VREV32 B7.B16, B7.B16
|
||||
|
||||
VLD1.P 32(srcPtr), [B0.B16, B1.B16]
|
||||
VEOR B0.B16, T1.B16, T1.B16
|
||||
@ -443,17 +443,17 @@ singlesLoop:
|
||||
CMP $16, srcPtrLen
|
||||
BLT tail
|
||||
SUB $16, srcPtrLen
|
||||
|
||||
VLD1.P 16(srcPtr), [T0.B16]
|
||||
VREV64 T0.B16, B5.B16
|
||||
|
||||
VMOV CTR.B16, B0.B16
|
||||
|
||||
VLD1.P 16(srcPtr), [T0.B16]
|
||||
VREV64 T0.B16, B5.B16
|
||||
|
||||
VMOV CTR.B16, B0.B16
|
||||
VADD CTR.S4, INC.S4, CTR.S4
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
|
||||
singlesLast:
|
||||
VEOR T0.B16, B0.B16, B0.B16
|
||||
VEOR T0.B16, B0.B16, B0.B16
|
||||
VST1.P [B0.B16], 16(dstPtr)
|
||||
|
||||
VEOR ACC0.B16, B5.B16, B5.B16
|
||||
@ -467,13 +467,13 @@ singlesLast:
|
||||
B singlesLoop
|
||||
tail:
|
||||
CBZ srcPtrLen, done
|
||||
VMOV CTR.B16, B0.B16
|
||||
VADD CTR.S4, INC.S4, CTR.S4
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
VMOV CTR.B16, B0.B16
|
||||
VADD CTR.S4, INC.S4, CTR.S4
|
||||
sm4eEnc1block()
|
||||
VREV32 B0.B16, B0.B16
|
||||
tailLast:
|
||||
// Assuming it is safe to load past dstPtr due to the presence of the tag
|
||||
// B5 stored last ciphertext
|
||||
// B5 stored last ciphertext
|
||||
VLD1 (srcPtr), [B5.B16]
|
||||
|
||||
VEOR B5.B16, B0.B16, B0.B16
|
||||
@ -522,4 +522,4 @@ ld0:
|
||||
done:
|
||||
VST1 [ACC0.B16], (tPtr)
|
||||
|
||||
RET
|
||||
RET
|
||||
|
@ -64,6 +64,22 @@ var cbcSM4Tests = []struct {
|
||||
0x62, 0xb5, 0xe7, 0x50, 0x44, 0xea, 0x24, 0xcc, 0x9b, 0x5e, 0x07, 0x48, 0x04, 0x89, 0xa2, 0x74,
|
||||
},
|
||||
},
|
||||
{
|
||||
"7 blocks",
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("0123456789ABCDEF"),
|
||||
[]byte("Hello World Hello World Hello World Hello World Hello World Hello World Hello World Hello World Hello World Hell"),
|
||||
[]byte{
|
||||
0xd3, 0x1e, 0x36, 0x83, 0xe4, 0xfc, 0x9b, 0x51, 0x6a, 0x2c, 0x0f, 0x98, 0x36, 0x76, 0xa9, 0xeb,
|
||||
0x1f, 0xdc, 0xc3, 0x2a, 0xf3, 0x84, 0x08, 0x97, 0x81, 0x57, 0xa2, 0x06, 0x5d, 0xe3, 0x4c, 0x6a,
|
||||
0xe0, 0x02, 0xd6, 0xe4, 0xf5, 0x66, 0x87, 0xc4, 0xcc, 0x54, 0x1d, 0x1f, 0x1c, 0xc4, 0x2f, 0xe6,
|
||||
0xe5, 0x1d, 0xea, 0x52, 0xb8, 0x0c, 0xc8, 0xbe, 0xae, 0xcc, 0x44, 0xa8, 0x51, 0x81, 0x08, 0x60,
|
||||
0xb6, 0x09, 0x7b, 0xb8, 0x7e, 0xdb, 0x53, 0x4b, 0xea, 0x2a, 0xc6, 0xa1, 0xe5, 0xa0, 0x2a, 0xe9,
|
||||
0x22, 0x65, 0x5b, 0xa3, 0xb9, 0xcc, 0x63, 0x92, 0x16, 0x0e, 0x2f, 0xf4, 0x3b, 0x93, 0x06, 0x82,
|
||||
0xb3, 0x8c, 0x26, 0x2e, 0x06, 0x51, 0x34, 0x2c, 0xe4, 0x3d, 0xd0, 0xc7, 0x2b, 0x8f, 0x31, 0x15,
|
||||
0xb7, 0x8f, 0xd0, 0x47, 0x45, 0x40, 0xec, 0x02, 0x1b, 0xef, 0xc1, 0xd2, 0xe5, 0xa2, 0x35, 0xd2,
|
||||
},
|
||||
},
|
||||
{
|
||||
"9 blocks",
|
||||
[]byte("0123456789ABCDEF"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user