[SM4] ARM64 fix error

This commit is contained in:
emmansun 2022-01-18 19:32:57 +08:00
parent 252325c878
commit 88d9e01205

View File

@ -68,16 +68,6 @@ GLOBL r24_mask<>(SB), (NOPTR+RODATA), $16
#define K1 V20 #define K1 V20
#define K2 V21 #define K2 V21
#define K3 V22 #define K3 V22
#define K4 V23
#define K5 V24
#define K6 V25
#define K7 V26
#define K8 V27
#define K9 V28
#define K10 V29
#define K11 V30
#define KLAST V31
#define NIBBLE_MASK V23 #define NIBBLE_MASK V23
#define INVERSE_SHIFT_ROWS V24 #define INVERSE_SHIFT_ROWS V24
#define M1L V25 #define M1L V25
@ -577,21 +567,13 @@ encOctetsLoop:
// Prepare 8 counters // Prepare 8 counters
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, B4.S4 VADD B3.S4, INC.S4, B4.S4
VREV32 B3.B16, B3.B16
VADD B4.S4, INC.S4, B5.S4 VADD B4.S4, INC.S4, B5.S4
VREV32 B4.B16, B4.B16
VADD B5.S4, INC.S4, B6.S4 VADD B5.S4, INC.S4, B6.S4
VREV32 B5.B16, B5.B16
VADD B6.S4, INC.S4, B7.S4 VADD B6.S4, INC.S4, B7.S4
VREV32 B6.B16, B6.B16
VADD B7.S4, INC.S4, CTR.S4 VADD B7.S4, INC.S4, CTR.S4
VREV32 B7.B16, B7.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)
@ -684,13 +666,9 @@ encNibblesLoop:
// Prepare 4 counters // Prepare 4 counters
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, CTR.S4 VADD B3.S4, INC.S4, CTR.S4
VREV32 B3.B16, B3.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)
@ -733,13 +711,9 @@ encStartSingles:
// Prepare 4 counters // Prepare 4 counters
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
//VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
//VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
//VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, CTR.S4 VADD B3.S4, INC.S4, CTR.S4
//VREV32 B3.B16, B3.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)
@ -885,21 +859,13 @@ decOctetsLoop:
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, B4.S4 VADD B3.S4, INC.S4, B4.S4
VREV32 B3.B16, B3.B16
VADD B4.S4, INC.S4, B5.S4 VADD B4.S4, INC.S4, B5.S4
VREV32 B4.B16, B4.B16
VADD B5.S4, INC.S4, B6.S4 VADD B5.S4, INC.S4, B6.S4
VREV32 B5.B16, B5.B16
VADD B6.S4, INC.S4, B7.S4 VADD B6.S4, INC.S4, B7.S4
VREV32 B6.B16, B6.B16
VADD B7.S4, INC.S4, CTR.S4 VADD B7.S4, INC.S4, CTR.S4
VREV32 B7.B16, B7.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)
@ -994,13 +960,9 @@ decNibblesLoop:
// Prepare 4 counters // Prepare 4 counters
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, CTR.S4 VADD B3.S4, INC.S4, CTR.S4
VREV32 B3.B16, B3.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)
@ -1046,13 +1008,9 @@ decStartSingles:
// Prepare 4 counters // Prepare 4 counters
VMOV CTR.B16, B0.B16 VMOV CTR.B16, B0.B16
VADD B0.S4, INC.S4, B1.S4 VADD B0.S4, INC.S4, B1.S4
VREV32 B0.B16, B0.B16
VADD B1.S4, INC.S4, B2.S4 VADD B1.S4, INC.S4, B2.S4
VREV32 B1.B16, B1.B16
VADD B2.S4, INC.S4, B3.S4 VADD B2.S4, INC.S4, B3.S4
VREV32 B2.B16, B2.B16
VADD B3.S4, INC.S4, CTR.S4 VADD B3.S4, INC.S4, CTR.S4
VREV32 B3.B16, B3.B16
// encryption first 4 blocks // encryption first 4 blocks
PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)