sm4: arm64 transpose matrix use VZIP1 VZIP2

This commit is contained in:
Sun Yimin 2022-07-25 16:37:23 +08:00 committed by GitHub
parent 9b364dca8b
commit fd34c2eff2
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 51 deletions

View File

@ -82,22 +82,15 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
// t1 = t3.S1, t2.S1, t1.S1, t0.S1
// t2 = t3.S2, t2.S2, t1.S2, t0.S2
// t3 = t3.S3, t2.S3, t1.S3, t0.S3
#define PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, K) \
VMOV t0.B16, K.B16 \
VMOV t1.S[0], t0.S[1] \
VMOV t2.S[0], t0.S[2] \
VMOV t3.S[0], t0.S[3] \
VMOV K.S[1], t1.S[0] \
VMOV K.S[2], t2.S[0] \
VMOV K.S[3], t3.S[0] \
VMOV t1.D[1], K.D[1] \
VMOV t2.S[1], t1.S[2] \
VMOV t3.S[1], t1.S[3] \
VMOV K.S[2], t2.S[1] \
VMOV K.S[3], t3.S[1] \
VMOV t2.S[3], K.S[3] \
VMOV t3.S[2], t2.S[3] \
VMOV K.S[3], t3.S[2]
#define PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, RTMP0, RTMP1, RTMP2, RTMP3) \
VZIP1 t1.S4, t0.S4, RTMP0.S4 \
VZIP1 t3.S4, t2.S4, RTMP1.S4 \
VZIP2 t1.S4, t0.S4, RTMP2.S4 \
VZIP2 t3.S4, t2.S4, RTMP3.S4 \
VZIP1 RTMP1.D2, RTMP0.D2, t0.D2 \
VZIP2 RTMP1.D2, RTMP0.D2, t1.D2 \
VZIP1 RTMP3.D2, RTMP2.D2, t2.D2 \
VZIP2 RTMP3.D2, RTMP2.D2, t3.D2
// input: from high to low
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
@ -109,25 +102,15 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
// t1 = t0.S1, t1.S1, t2.S1, t3.S1
// t2 = t0.S2, t1.S2, t2.S2, t3.S2
// t3 = t0.S3, t1.S3, t2.S3, t3.S3
#define TRANSPOSE_MATRIX(t0, t1, t2, t3, K) \
VMOV t0.B16, K.B16 \
VMOV t3.S[0], t0.S[0] \
VMOV t2.S[0], t0.S[1] \
VMOV t1.S[0], t0.S[2] \
VMOV K0.S[0], t0.S[3] \
VMOV t3.S[1], t1.S[0] \
VMOV t3.S[2], t2.S[0] \
VMOV t3.S[3], t3.S[0] \
VMOV t2.S[3], t3.S[1] \
VMOV t1.S[3], t3.S[2] \
VMOV K.S[3], t3.S[3] \
VMOV K.S[2], t2.S[3] \
VMOV K.S[1], t1.S[3] \
VMOV t1.B16, K.B16 \
VMOV t2.S[1], t1.S[1] \
VMOV K.S[1], t1.S[2] \
VMOV t2.S[2], t2.S[1] \
VMOV K.S[2], t2.S[2]
#define TRANSPOSE_MATRIX(t0, t1, t2, t3, RTMP0, RTMP1, RTMP2, RTMP3) \
VZIP1 t0.S4, t1.S4, RTMP0.S4 \
VZIP2 t0.S4, t1.S4, RTMP1.S4 \
VZIP1 t2.S4, t3.S4, RTMP2.S4 \
VZIP2 t2.S4, t3.S4, RTMP3.S4 \
VZIP1 RTMP0.D2, RTMP2.D2, t0.D2 \
VZIP2 RTMP0.D2, RTMP2.D2, t1.D2 \
VZIP1 RTMP1.D2, RTMP3.D2, t2.D2 \
VZIP2 RTMP1.D2, RTMP3.D2, t3.D2
// SM4 sbox function
// parameters:

View File

@ -448,7 +448,7 @@ encOctetsLoop:
VADD B7.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -465,9 +465,9 @@ encOctetsEnc4Blocks1:
VREV32 B1.B16, B1.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
// encryption second 4 blocks
PRE_TRANSPOSE_MATRIX(B4, B5, B6, B7, K0)
PRE_TRANSPOSE_MATRIX(B4, B5, B6, B7, K0, K1, K2, K3)
MOVD rkSave, rk
encOctetsEnc4Blocks2:
@ -483,7 +483,7 @@ encOctetsEnc4Blocks2:
VREV32 B5.B16, B5.B16
VREV32 B6.B16, B6.B16
VREV32 B7.B16, B7.B16
TRANSPOSE_MATRIX(B4, B5, B6, B7, K0)
TRANSPOSE_MATRIX(B4, B5, B6, B7, K0, K1, K2, K3)
// XOR plaintext and store ciphertext
VLD1.P 32(srcPtr), [T1.B16, T2.B16]
@ -543,7 +543,7 @@ encNibblesLoop:
VADD B3.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -560,7 +560,7 @@ encNibblesEnc4Blocks:
VREV32 B1.B16, B1.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
// XOR plaintext and store ciphertext
VLD1.P 32(srcPtr), [K1.B16, K2.B16]
@ -588,7 +588,7 @@ encStartSingles:
VADD B3.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -605,7 +605,7 @@ encSinglesEnc4Blocks:
VREV32 B1.B16, B1.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
VMOV B0.B16, K0.B16
CMP $16, srcPtrLen
@ -740,7 +740,7 @@ decOctetsLoop:
VADD B7.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -757,10 +757,10 @@ decOctetsEnc4Blocks1:
VREV32 B1.B16, T2.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(T1, T2, B2, B3, K0)
TRANSPOSE_MATRIX(T1, T2, B2, B3, K0, K1, K2, K3)
// encryption second 4 blocks
PRE_TRANSPOSE_MATRIX(B4, B5, B6, B7, K0)
PRE_TRANSPOSE_MATRIX(B4, B5, B6, B7, K0, K1, K2, K3)
MOVD rkSave, rk
decOctetsEnc4Blocks2:
@ -776,7 +776,7 @@ decOctetsEnc4Blocks2:
VREV32 B5.B16, B5.B16
VREV32 B6.B16, B6.B16
VREV32 B7.B16, B7.B16
TRANSPOSE_MATRIX(B4, B5, B6, B7, K0)
TRANSPOSE_MATRIX(B4, B5, B6, B7, K0, K1, K2, K3)
VLD1.P 32(srcPtr), [B0.B16, B1.B16]
VEOR B0.B16, T1.B16, T1.B16
@ -837,7 +837,7 @@ decNibblesLoop:
VADD B3.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -854,7 +854,7 @@ decNibblesEnc4Blocks:
VREV32 B1.B16, B1.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
// XOR plaintext and store ciphertext
VLD1.P 32(srcPtr), [K1.B16, K2.B16]
@ -885,7 +885,7 @@ decStartSingles:
VADD B3.S4, INC.S4, CTR.S4
// encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
EOR R13, R13
MOVD rkSave, rk
@ -902,7 +902,7 @@ decSinglesEnc4Blocks:
VREV32 B1.B16, B1.B16
VREV32 B2.B16, B2.B16
VREV32 B3.B16, B3.B16
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0)
TRANSPOSE_MATRIX(B0, B1, B2, B3, K0, K1, K2, K3)
VMOV B0.B16, K0.B16
CMP $16, srcPtrLen