diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 0ec3f62..62fd329 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -226,85 +226,6 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0 VEOR ZERO.B16, ZERO.B16, ZERO.B16 EOR R0, R0 - -encryptBlocksLoop: - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t0.B16, t0.B16 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t1.B16, t1.B16 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t1.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t2.B16, t2.B16 - ADD $4, R0 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t3.B16, t3.B16 - - ADD $16, R0 - CMP $128, R0 - BNE encryptBlocksLoop - - VTBL FLIP_MASK.B16, [t0.B16], t0.B16 - VTBL FLIP_MASK.B16, [t1.B16], t1.B16 - VTBL FLIP_MASK.B16, [t2.B16], t2.B16 - VTBL FLIP_MASK.B16, [t3.B16], t3.B16 - - VMOV t3.S[0], V8.S[0] - VMOV t2.S[0], V8.S[1] - VMOV t1.S[0], V8.S[2] - VMOV t0.S[0], V8.S[3] - VST1.P [V8.B16], 16(R9) - - VMOV t3.S[1], V8.S[0] - VMOV t2.S[1], V8.S[1] - VMOV t1.S[1], V8.S[2] - VMOV t0.S[1], V8.S[3] - VST1.P [V8.B16], 16(R9) - - VMOV t3.S[2], V8.S[0] - VMOV t2.S[2], V8.S[1] - VMOV t1.S[2], V8.S[2] - VMOV t0.S[2], V8.S[3] - VST1.P [V8.B16], 16(R9) - - VMOV t3.S[3], V8.S[0] - VMOV t2.S[3], V8.S[1] - VMOV t1.S[3], V8.S[2] - VMOV t0.S[3], V8.S[3] - VST1 [V8.B16], (R9) RET @@ -330,64 +251,4 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 VMOV R0, FLIP_MASK.D[0] VMOV R1, FLIP_MASK.D[1] EOR R0, R0 - -encryptBlockLoop: - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t0.B16, t0.B16 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t1.B16, t1.B16 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t1.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t2.B16, t2.B16 - - MOVW.P 4(R8), R19 - VMOV R19, x.S[0] - VMOV R19, x.S[1] - VMOV R19, x.S[2] - VMOV R19, x.S[3] - VEOR t0.B16, x.B16, x.B16 - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - SM4_TAO_L1(x, y) - VEOR x.B16, t3.B16, t3.B16 - - ADD $16, R0 - CMP $128, R0 - BNE encryptBlockLoop - - VTBL FLIP_MASK.B16, [t0.B16], t0.B16 - VTBL FLIP_MASK.B16, [t1.B16], t1.B16 - VTBL FLIP_MASK.B16, [t2.B16], t2.B16 - VTBL FLIP_MASK.B16, [t3.B16], t3.B16 - - VMOV t3.S[0], V8.S[0] - VMOV t2.S[0], V8.S[1] - VMOV t1.S[0], V8.S[2] - VMOV t0.S[0], V8.S[3] - VST1 [V8.B16], (R9) RET