diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 1647dea..9b4b9b0 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -158,6 +158,10 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VMOV R0, FK_MASK.D[0] VMOV R1, FK_MASK.D[1] + LDP inverse_shift_rows<>(SB), (R0, R1) + VMOV R0, INVERSE_SHIFT_ROWS.D[0] + VMOV R1, INVERSE_SHIFT_ROWS.D[1] + VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 @@ -176,14 +180,13 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 -/* VUSHR $4, x.D2, x.D2 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 -*/ - VEOR y.B16, t0.B16, t0.B16 + + VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10)