From 80e88ea2b2e6ab761290a6710e433f25724aaff6 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Wed, 8 Nov 2023 13:23:04 +0800 Subject: [PATCH] sm4: use new method to load global data --- sm4/aesni_macros_arm64.s | 30 ++++++++++++------------------ sm4/asm_arm64.s | 36 +++++++++++++++--------------------- 2 files changed, 27 insertions(+), 39 deletions(-) diff --git a/sm4/aesni_macros_arm64.s b/sm4/aesni_macros_arm64.s index d4f5cb8..4084295 100644 --- a/sm4/aesni_macros_arm64.s +++ b/sm4/aesni_macros_arm64.s @@ -33,24 +33,18 @@ GLOBL fk_mask<>(SB), (16+8), $16 #define LOAD_SM4_AESNI_CONSTS() \ MOVW $0x0F0F0F0F, R20 \ VMOV R20, NIBBLE_MASK.S4 \ - LDP m1_low<>(SB), (R20, R21) \ - VMOV R20, M1L.D[0] \ - VMOV R21, M1L.D[1] \ - LDP m1_high<>(SB), (R20, R21) \ - VMOV R20, M1H.D[0] \ - VMOV R21, M1H.D[1] \ - LDP m2_low<>(SB), (R20, R21) \ - VMOV R20, M2L.D[0] \ - VMOV R21, M2L.D[1] \ - LDP m2_high<>(SB), (R20, R21) \ - VMOV R20, M2H.D[0] \ - VMOV R21, M2H.D[1] \ - LDP inverse_shift_rows<>(SB), (R20, R21) \ - VMOV R20, INVERSE_SHIFT_ROWS.D[0] \ - VMOV R21, INVERSE_SHIFT_ROWS.D[1] \ - LDP r08_mask<>(SB), (R20, R21) \ - VMOV R20, R08_MASK.D[0] \ - VMOV R21, R08_MASK.D[1] + MOVD $m1_low<>(SB), R20 \ + VLD1 (R20), [M1L.B16] \ + MOVD $m1_high<>(SB), R20 \ + VLD1 (R20), [M1H.B16] \ + MOVD $m2_low<>(SB), R20 \ + VLD1 (R20), [M2L.B16] \ + MOVD $m2_high<>(SB), R20 \ + VLD1 (R20), [M2H.B16] \ + MOVD $inverse_shift_rows<>(SB), R20 \ + VLD1 (R20), [INVERSE_SHIFT_ROWS.B16] \ + MOVD $r08_mask<>(SB), R20 \ + VLD1 (R20), [R08_MASK.B16] \ // input: from high to low // t0 = t0.S3, t0.S2, t0.S1, t0.S0 diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index d50fe38..98fccbe 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -52,30 +52,24 @@ #define load_global_data_1() \ MOVW $0x0F0F0F0F, R0 \ VMOV R0, NIBBLE_MASK.S4 \ - LDP m1_low<>(SB), (R0, R1) \ - VMOV R0, M1L.D[0] \ - VMOV R1, M1L.D[1] \ - LDP m1_high<>(SB), (R0, R1) \ - VMOV R0, M1H.D[0] \ - VMOV R1, M1H.D[1] \ - LDP m2_low<>(SB), (R0, R1) \ - VMOV R0, M2L.D[0] \ - VMOV R1, M2L.D[1] \ - LDP m2_high<>(SB), (R0, R1) \ - VMOV R0, M2H.D[0] \ - VMOV R1, M2H.D[1] \ - LDP fk_mask<>(SB), (R0, R1) \ - VMOV R0, FK_MASK.D[0] \ - VMOV R1, FK_MASK.D[1] \ - LDP inverse_shift_rows<>(SB), (R0, R1) \ - VMOV R0, INVERSE_SHIFT_ROWS.D[0] \ - VMOV R1, INVERSE_SHIFT_ROWS.D[1] + MOVD $m1_low<>(SB), R0 \ + VLD1 (R0), [M1L.B16] \ + MOVD $m1_high<>(SB), R0 \ + VLD1 (R0), [M1H.B16] \ + MOVD $m2_low<>(SB), R0 \ + VLD1 (R0), [M2L.B16] \ + MOVD $m2_high<>(SB), R0 \ + VLD1 (R0), [M2H.B16] \ + MOVD $fk_mask<>(SB), R0 \ + VLD1 (R0), [FK_MASK.B16] \ + MOVD $inverse_shift_rows<>(SB), R0 \ + VLD1 (R0), [INVERSE_SHIFT_ROWS.B16] \ + #define load_global_data_2() \ load_global_data_1() \ - LDP r08_mask<>(SB), (R0, R1) \ - VMOV R0, R08_MASK.D[0] \ - VMOV R1, R08_MASK.D[1] + MOVD $r08_mask<>(SB), R0 \ + VLD1 (R0), [R08_MASK.B16] \ #define SM4EKEY_EXPORT_KEYS() \ VMOV V9.S[3], V10.S[0] \