From 84045dc25f49b81e8f94fbfa7dc39711993f39a5 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Wed, 8 Nov 2023 13:43:56 +0800 Subject: [PATCH] zuc: arm64, uses new method to load global data --- zuc/asm_arm64.s | 65 ++++++++++++++++++--------------------------- zuc/eia_asm_arm64.s | 25 +++++++---------- 2 files changed, 36 insertions(+), 54 deletions(-) diff --git a/zuc/asm_arm64.s b/zuc/asm_arm64.s index bb3f1f4..42a2416 100644 --- a/zuc/asm_arm64.s +++ b/zuc/asm_arm64.s @@ -86,45 +86,32 @@ GLOBL mask_S1<>(SB), RODATA, $16 #define OFFSET_BRC_X3 (21*4) #define LOAD_GLOBAL_DATA() \ - LDP nibble_mask<>(SB), (R0, R1) \ - VMOV R0, NIBBLE_MASK.D[0] \ - VMOV R1, NIBBLE_MASK.D[1] \ - LDP Top3_bits_of_the_byte<>(SB), (R0, R1) \ - VMOV R0, TOP3_BITS.D[0] \ - VMOV R1, TOP3_BITS.D[1] \ - LDP Bottom5_bits_of_the_byte<>(SB), (R0, R1) \ - VMOV R0, BOTTOM5_BITS.D[0] \ - VMOV R1, BOTTOM5_BITS.D[1] \ - LDP Aes_to_Zuc_mul_low_nibble<>(SB), (R0, R1) \ - VMOV R0, M1L.D[0] \ - VMOV R1, M1L.D[1] \ - LDP Aes_to_Zuc_mul_high_nibble<>(SB), (R0, R1) \ - VMOV R0, M1H.D[0] \ - VMOV R1, M1H.D[1] \ - LDP Comb_matrix_mul_low_nibble<>(SB), (R0, R1) \ - VMOV R0, M2L.D[0] \ - VMOV R1, M2L.D[1] \ - LDP Comb_matrix_mul_high_nibble<>(SB), (R0, R1) \ - VMOV R0, M2H.D[0] \ - VMOV R1, M2H.D[1] \ - LDP P1_data<>(SB), (R0, R1) \ - VMOV R0, P1.D[0] \ - VMOV R1, P1.D[1] \ - LDP P2_data<>(SB), (R0, R1) \ - VMOV R0, P2.D[0] \ - VMOV R1, P2.D[1] \ - LDP P3_data<>(SB), (R0, R1) \ - VMOV R0, P3.D[0] \ - VMOV R1, P3.D[1] \ - LDP mask_S0<>(SB), (R0, R1) \ - VMOV R0, S0_MASK.D[0] \ - VMOV R1, S0_MASK.D[1] \ - LDP mask_S1<>(SB), (R0, R1) \ - VMOV R0, S1_MASK.D[0] \ - VMOV R1, S1_MASK.D[1] \ - LDP Shuf_mask<>(SB), (R0, R1) \ - VMOV R0, INVERSE_SHIFT_ROWS.D[0] \ - VMOV R1, INVERSE_SHIFT_ROWS.D[1] + MOVD $nibble_mask<>(SB), R0 \ + VLD1 (R0), [NIBBLE_MASK.B16] \ + MOVD $Top3_bits_of_the_byte<>(SB), R0 \ + VLD1 (R0), [TOP3_BITS.B16] \ + MOVD $Bottom5_bits_of_the_byte<>(SB), R0 \ + VLD1 (R0), [BOTTOM5_BITS.B16] \ + MOVD $Aes_to_Zuc_mul_low_nibble<>(SB), R0 \ + VLD1 (R0), [M1L.B16] \ + MOVD $Aes_to_Zuc_mul_high_nibble<>(SB), R0 \ + VLD1 (R0), [M1H.B16] \ + MOVD $Comb_matrix_mul_low_nibble<>(SB), R0 \ + VLD1 (R0), [M2L.B16] \ + MOVD $Comb_matrix_mul_high_nibble<>(SB), R0 \ + VLD1 (R0), [M2H.B16] \ + MOVD $P1_data<>(SB), R0 \ + VLD1 (R0), [P1.B16] \ + MOVD $P2_data<>(SB), R0 \ + VLD1 (R0), [P2.B16] \ + MOVD $P3_data<>(SB), R0 \ + VLD1 (R0), [P3.B16] \ + MOVD $mask_S0<>(SB), R0 \ + VLD1 (R0), [S0_MASK.B16] \ + MOVD $mask_S1<>(SB), R0 \ + VLD1 (R0), [S1_MASK.B16] \ + MOVD $Shuf_mask<>(SB), R0 \ + VLD1 (R0), [INVERSE_SHIFT_ROWS.B16] \ #define SHLDL(a, b, n) \ // NO SHLDL in GOLANG now LSLW n, a \ diff --git a/zuc/eia_asm_arm64.s b/zuc/eia_asm_arm64.s index ee03e49..baff5be 100644 --- a/zuc/eia_asm_arm64.s +++ b/zuc/eia_asm_arm64.s @@ -47,21 +47,16 @@ GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16 #define SHUF_MASK_DW2_DW3 V24 #define LOAD_GLOBAL_DATA() \ - LDP bit_reverse_table_l<>(SB), (R0, R1) \ - VMOV R0, BIT_REV_TAB_L.D[0] \ - VMOV R1, BIT_REV_TAB_L.D[1] \ - LDP bit_reverse_table_h<>(SB), (R0, R1) \ - VMOV R0, BIT_REV_TAB_H.D[0] \ - VMOV R1, BIT_REV_TAB_H.D[1] \ - LDP bit_reverse_and_table<>(SB), (R0, R1) \ - VMOV R0, BIT_REV_AND_TAB.D[0] \ - VMOV R1, BIT_REV_AND_TAB.D[1] \ - LDP shuf_mask_dw0_0_dw1_0<>(SB), (R0, R1) \ - VMOV R0, SHUF_MASK_DW0_DW1.D[0] \ - VMOV R1, SHUF_MASK_DW0_DW1.D[1] \ - LDP shuf_mask_dw2_0_dw3_0<>(SB), (R0, R1) \ - VMOV R0, SHUF_MASK_DW2_DW3.D[0] \ - VMOV R1, SHUF_MASK_DW2_DW3.D[1] + MOVD $bit_reverse_table_l<>(SB), R0 \ + VLD1 (R0), [BIT_REV_TAB_L.B16] \ + MOVD $bit_reverse_table_h<>(SB), R0 \ + VLD1 (R0), [BIT_REV_TAB_H.B16] \ + MOVD $bit_reverse_and_table<>(SB), R0 \ + VLD1 (R0), [BIT_REV_AND_TAB.B16] \ + MOVD $shuf_mask_dw0_0_dw1_0<>(SB), R0 \ + VLD1 (R0), [SHUF_MASK_DW0_DW1.B16] \ + MOVD $shuf_mask_dw2_0_dw3_0<>(SB), R0 \ + VLD1 (R0), [SHUF_MASK_DW2_DW3.B16] \ // func eia3Round16B(t *uint32, keyStream *uint32, p *byte, tagSize int) TEXT ·eia3Round16B(SB),NOSPLIT,$0