diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 38d31cb..4ef5e1a 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -121,7 +121,13 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16 VEOR y.B16, x.B16, x.B16 #define SM4_TAO_L2(x, y) \ - SM4_SBOX(x, y); \ + LDP nibble_mask<>(SB), (R0, R1); \ + VMOV R0, XTMP6.D[0]; \ + VMOV R1, XTMP6.D[1]; \ + VAND x.B16, XTMP6.B16, XTMP7.B16; \ + LDP m1_low<>(SB), (R0, R1); \ + VMOV R0, y.D[0]; \ + VMOV R1, y.D[1]; \ ; \ //#################### 4 parallel L2 linear transforms ##################// VSHL $13, x.S4, XTMP6.S4; \ VUSHR $19, x.S4, y.S4; \