diff --git a/sm4/gcm_arm64.s b/sm4/gcm_arm64.s index 7ef9d40..80fa535 100644 --- a/sm4/gcm_arm64.s +++ b/sm4/gcm_arm64.s @@ -68,16 +68,6 @@ GLOBL r24_mask<>(SB), (NOPTR+RODATA), $16 #define K1 V20 #define K2 V21 #define K3 V22 -#define K4 V23 -#define K5 V24 -#define K6 V25 -#define K7 V26 -#define K8 V27 -#define K9 V28 -#define K10 V29 -#define K11 V30 -#define KLAST V31 - #define NIBBLE_MASK V23 #define INVERSE_SHIFT_ROWS V24 #define M1L V25 @@ -577,21 +567,13 @@ encOctetsLoop: // Prepare 8 counters VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, B4.S4 - VREV32 B3.B16, B3.B16 VADD B4.S4, INC.S4, B5.S4 - VREV32 B4.B16, B4.B16 VADD B5.S4, INC.S4, B6.S4 - VREV32 B5.B16, B5.B16 VADD B6.S4, INC.S4, B7.S4 - VREV32 B6.B16, B6.B16 VADD B7.S4, INC.S4, CTR.S4 - VREV32 B7.B16, B7.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) @@ -684,13 +666,9 @@ encNibblesLoop: // Prepare 4 counters VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, CTR.S4 - VREV32 B3.B16, B3.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) @@ -733,13 +711,9 @@ encStartSingles: // Prepare 4 counters VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - //VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - //VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - //VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, CTR.S4 - //VREV32 B3.B16, B3.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) @@ -885,21 +859,13 @@ decOctetsLoop: VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, B4.S4 - VREV32 B3.B16, B3.B16 VADD B4.S4, INC.S4, B5.S4 - VREV32 B4.B16, B4.B16 VADD B5.S4, INC.S4, B6.S4 - VREV32 B5.B16, B5.B16 VADD B6.S4, INC.S4, B7.S4 - VREV32 B6.B16, B6.B16 VADD B7.S4, INC.S4, CTR.S4 - VREV32 B7.B16, B7.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) @@ -994,13 +960,9 @@ decNibblesLoop: // Prepare 4 counters VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, CTR.S4 - VREV32 B3.B16, B3.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3) @@ -1046,13 +1008,9 @@ decStartSingles: // Prepare 4 counters VMOV CTR.B16, B0.B16 VADD B0.S4, INC.S4, B1.S4 - VREV32 B0.B16, B0.B16 VADD B1.S4, INC.S4, B2.S4 - VREV32 B1.B16, B1.B16 VADD B2.S4, INC.S4, B3.S4 - VREV32 B2.B16, B2.B16 VADD B3.S4, INC.S4, CTR.S4 - VREV32 B3.B16, B3.B16 // encryption first 4 blocks PRE_TRANSPOSE_MATRIX(B0, B1, B2, B3)