mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
zuc: optimize constant loading on arm64
This commit is contained in:
parent
9a45c4101b
commit
fe4e699b19
105
zuc/asm_arm64.s
105
zuc/asm_arm64.s
@ -2,57 +2,40 @@
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA Top3_bits_of_the_byte<>+0x00(SB)/8, $0xe0e0e0e0e0e0e0e0
|
||||
DATA Top3_bits_of_the_byte<>+0x08(SB)/8, $0xe0e0e0e0e0e0e0e0
|
||||
GLOBL Top3_bits_of_the_byte<>(SB), RODATA, $16
|
||||
DATA Top3_Bottom5_bits_of_the_byte<>+0x00(SB)/8, $0xe0e0e0e0e0e0e0e0
|
||||
DATA Top3_Bottom5_bits_of_the_byte<>+0x08(SB)/8, $0xe0e0e0e0e0e0e0e0
|
||||
DATA Top3_Bottom5_bits_of_the_byte<>+0x10(SB)/8, $0x1f1f1f1f1f1f1f1f
|
||||
DATA Top3_Bottom5_bits_of_the_byte<>+0x18(SB)/8, $0x1f1f1f1f1f1f1f1f
|
||||
GLOBL Top3_Bottom5_bits_of_the_byte<>(SB), RODATA, $32
|
||||
|
||||
DATA Bottom5_bits_of_the_byte<>+0x00(SB)/8, $0x1f1f1f1f1f1f1f1f
|
||||
DATA Bottom5_bits_of_the_byte<>+0x08(SB)/8, $0x1f1f1f1f1f1f1f1f
|
||||
GLOBL Bottom5_bits_of_the_byte<>(SB), RODATA, $16
|
||||
DATA P123_data<>+0x00(SB)/8, $0x0A020F0F0E000F09
|
||||
DATA P123_data<>+0x08(SB)/8, $0x090305070C000400
|
||||
DATA P123_data<>+0x10(SB)/8, $0x040C000705060D08
|
||||
DATA P123_data<>+0x18(SB)/8, $0x0209030F0A0E010B
|
||||
DATA P123_data<>+0x20(SB)/8, $0x0F0A0D00060A0602
|
||||
DATA P123_data<>+0x28(SB)/8, $0x0D0C0900050D0303
|
||||
GLOBL P123_data<>(SB), RODATA, $48
|
||||
|
||||
DATA nibble_mask<>+0x00(SB)/8, $0x0F0F0F0F0F0F0F0F
|
||||
DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F
|
||||
GLOBL nibble_mask<>(SB), RODATA, $16
|
||||
|
||||
DATA P1_data<>+0x00(SB)/8, $0x0A020F0F0E000F09
|
||||
DATA P1_data<>+0x08(SB)/8, $0x090305070C000400
|
||||
GLOBL P1_data<>(SB), RODATA, $16
|
||||
|
||||
DATA P2_data<>+0x00(SB)/8, $0x040C000705060D08
|
||||
DATA P2_data<>+0x08(SB)/8, $0x0209030F0A0E010B
|
||||
GLOBL P2_data<>(SB), RODATA, $16
|
||||
|
||||
DATA P3_data<>+0x00(SB)/8, $0x0F0A0D00060A0602
|
||||
DATA P3_data<>+0x08(SB)/8, $0x0D0C0900050D0303
|
||||
GLOBL P3_data<>(SB), RODATA, $16
|
||||
|
||||
DATA Aes_to_Zuc_mul_low_nibble<>+0x00(SB)/8, $0x1D1C9F9E83820100
|
||||
DATA Aes_to_Zuc_mul_low_nibble<>+0x08(SB)/8, $0x3938BBBAA7A62524
|
||||
GLOBL Aes_to_Zuc_mul_low_nibble<>(SB), RODATA, $16
|
||||
|
||||
DATA Aes_to_Zuc_mul_high_nibble<>+0x00(SB)/8, $0xA174A97CDD08D500
|
||||
DATA Aes_to_Zuc_mul_high_nibble<>+0x08(SB)/8, $0x3DE835E04194499C
|
||||
GLOBL Aes_to_Zuc_mul_high_nibble<>(SB), RODATA, $16
|
||||
|
||||
DATA Comb_matrix_mul_low_nibble<>+0x00(SB)/8, $0xA8BC0216D9CD7367
|
||||
DATA Comb_matrix_mul_low_nibble<>+0x08(SB)/8, $0x1F0BB5A16E7AC4D0
|
||||
GLOBL Comb_matrix_mul_low_nibble<>(SB), RODATA, $16
|
||||
|
||||
DATA Comb_matrix_mul_high_nibble<>+0x00(SB)/8, $0x638CFA1523CCBA55
|
||||
DATA Comb_matrix_mul_high_nibble<>+0x08(SB)/8, $0x3FD0A6497F90E609
|
||||
GLOBL Comb_matrix_mul_high_nibble<>(SB), RODATA, $16
|
||||
// Affine transform 1 & 2 (low and high nibbles)
|
||||
DATA m1_2<>+0x00(SB)/8, $0x1D1C9F9E83820100
|
||||
DATA m1_2<>+0x08(SB)/8, $0x3938BBBAA7A62524
|
||||
DATA m1_2<>+0x10(SB)/8, $0xA174A97CDD08D500
|
||||
DATA m1_2<>+0x18(SB)/8, $0x3DE835E04194499C
|
||||
DATA m1_2<>+0x20(SB)/8, $0xA8BC0216D9CD7367
|
||||
DATA m1_2<>+0x28(SB)/8, $0x1F0BB5A16E7AC4D0
|
||||
DATA m1_2<>+0x30(SB)/8, $0x638CFA1523CCBA55
|
||||
DATA m1_2<>+0x38(SB)/8, $0x3FD0A6497F90E609
|
||||
GLOBL m1_2<>(SB), RODATA, $64
|
||||
|
||||
DATA Shuf_mask<>+0x00(SB)/8, $0x0B0E0104070A0D00
|
||||
DATA Shuf_mask<>+0x08(SB)/8, $0x0306090C0F020508
|
||||
GLOBL Shuf_mask<>(SB), RODATA, $16
|
||||
|
||||
DATA mask_S0<>+0x00(SB)/8, $0xff00ff00ff00ff00
|
||||
DATA mask_S0<>+0x08(SB)/8, $0xff00ff00ff00ff00
|
||||
GLOBL mask_S0<>(SB), RODATA, $16
|
||||
|
||||
DATA mask_S1<>+0x00(SB)/8, $0x00ff00ff00ff00ff
|
||||
DATA mask_S1<>+0x08(SB)/8, $0x00ff00ff00ff00ff
|
||||
GLOBL mask_S1<>(SB), RODATA, $16
|
||||
DATA mask_S01<>+0x00(SB)/8, $0xff00ff00ff00ff00
|
||||
DATA mask_S01<>+0x08(SB)/8, $0xff00ff00ff00ff00
|
||||
DATA mask_S01<>+0x10(SB)/8, $0x00ff00ff00ff00ff
|
||||
DATA mask_S01<>+0x18(SB)/8, $0x00ff00ff00ff00ff
|
||||
GLOBL mask_S01<>(SB), RODATA, $32
|
||||
|
||||
#define SI R0
|
||||
#define DI R1
|
||||
@ -85,30 +68,16 @@ GLOBL mask_S1<>(SB), RODATA, $16
|
||||
#define OFFSET_BRC_X3 (21*4)
|
||||
|
||||
#define LOAD_GLOBAL_DATA() \
|
||||
MOVD $nibble_mask<>(SB), R0 \
|
||||
VLD1 (R0), [NIBBLE_MASK.B16] \
|
||||
MOVD $Top3_bits_of_the_byte<>(SB), R0 \
|
||||
VLD1 (R0), [TOP3_BITS.B16] \
|
||||
MOVD $Bottom5_bits_of_the_byte<>(SB), R0 \
|
||||
VLD1 (R0), [BOTTOM5_BITS.B16] \
|
||||
MOVD $Aes_to_Zuc_mul_low_nibble<>(SB), R0 \
|
||||
VLD1 (R0), [M1L.B16] \
|
||||
MOVD $Aes_to_Zuc_mul_high_nibble<>(SB), R0 \
|
||||
VLD1 (R0), [M1H.B16] \
|
||||
MOVD $Comb_matrix_mul_low_nibble<>(SB), R0 \
|
||||
VLD1 (R0), [M2L.B16] \
|
||||
MOVD $Comb_matrix_mul_high_nibble<>(SB), R0 \
|
||||
VLD1 (R0), [M2H.B16] \
|
||||
MOVD $P1_data<>(SB), R0 \
|
||||
VLD1 (R0), [P1.B16] \
|
||||
MOVD $P2_data<>(SB), R0 \
|
||||
VLD1 (R0), [P2.B16] \
|
||||
MOVD $P3_data<>(SB), R0 \
|
||||
VLD1 (R0), [P3.B16] \
|
||||
MOVD $mask_S0<>(SB), R0 \
|
||||
VLD1 (R0), [S0_MASK.B16] \
|
||||
MOVD $mask_S1<>(SB), R0 \
|
||||
VLD1 (R0), [S1_MASK.B16] \
|
||||
MOVW $0x0F0F0F0F, R0 \
|
||||
VDUP R0, NIBBLE_MASK.S4 \
|
||||
MOVD $Top3_Bottom5_bits_of_the_byte<>(SB), R0 \
|
||||
VLD1 (R0), [TOP3_BITS.B16, BOTTOM5_BITS.B16] \
|
||||
MOVD $m1_2<>(SB), R0 \
|
||||
VLD1 (R0), [M1L.B16, M1H.B16, M2L.B16, M2H.B16] \
|
||||
MOVD $P123_data<>(SB), R0 \
|
||||
VLD1 (R0), [P1.B16, P2.B16, P3.B16] \
|
||||
MOVD $mask_S01<>(SB), R0 \
|
||||
VLD1 (R0), [S0_MASK.B16, S1_MASK.B16] \
|
||||
MOVD $Shuf_mask<>(SB), R0 \
|
||||
VLD1 (R0), [INVERSE_SHIFT_ROWS.B16] \
|
||||
|
||||
|
@ -2,25 +2,17 @@
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA bit_reverse_table_l<>+0x00(SB)/8, $0x0e060a020c040800
|
||||
DATA bit_reverse_table_l<>+0x08(SB)/8, $0x0f070b030d050901
|
||||
GLOBL bit_reverse_table_l<>(SB), RODATA, $16
|
||||
DATA bit_reverse_table<>+0x00(SB)/8, $0x0e060a020c040800
|
||||
DATA bit_reverse_table<>+0x08(SB)/8, $0x0f070b030d050901
|
||||
DATA bit_reverse_table<>+0x10(SB)/8, $0xe060a020c0408000
|
||||
DATA bit_reverse_table<>+0x18(SB)/8, $0xf070b030d0509010
|
||||
GLOBL bit_reverse_table<>(SB), RODATA, $32
|
||||
|
||||
DATA bit_reverse_table_h<>+0x00(SB)/8, $0xe060a020c0408000
|
||||
DATA bit_reverse_table_h<>+0x08(SB)/8, $0xf070b030d0509010
|
||||
GLOBL bit_reverse_table_h<>(SB), RODATA, $16
|
||||
|
||||
DATA bit_reverse_and_table<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
DATA bit_reverse_and_table<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
GLOBL bit_reverse_and_table<>(SB), RODATA, $16
|
||||
|
||||
DATA shuf_mask_dw0_0_dw1_0<>+0x00(SB)/8, $0xffffffff03020100
|
||||
DATA shuf_mask_dw0_0_dw1_0<>+0x08(SB)/8, $0xffffffff07060504
|
||||
GLOBL shuf_mask_dw0_0_dw1_0<>(SB), RODATA, $16
|
||||
|
||||
DATA shuf_mask_dw2_0_dw3_0<>+0x00(SB)/8, $0xffffffff0b0a0908
|
||||
DATA shuf_mask_dw2_0_dw3_0<>+0x08(SB)/8, $0xffffffff0f0e0d0c
|
||||
GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16
|
||||
DATA shuf_mask_dw<>+0x00(SB)/8, $0xffffffff03020100
|
||||
DATA shuf_mask_dw<>+0x08(SB)/8, $0xffffffff07060504
|
||||
DATA shuf_mask_dw<>+0x10(SB)/8, $0xffffffff0b0a0908
|
||||
DATA shuf_mask_dw<>+0x18(SB)/8, $0xffffffff0f0e0d0c
|
||||
GLOBL shuf_mask_dw<>(SB), RODATA, $32
|
||||
|
||||
#define AX R2
|
||||
#define BX R3
|
||||
@ -46,16 +38,12 @@ GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16
|
||||
#define SHUF_MASK_DW2_DW3 V24
|
||||
|
||||
#define LOAD_GLOBAL_DATA() \
|
||||
MOVD $bit_reverse_table_l<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_L.B16] \
|
||||
MOVD $bit_reverse_table_h<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_H.B16] \
|
||||
MOVD $bit_reverse_and_table<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_AND_TAB.B16] \
|
||||
MOVD $shuf_mask_dw0_0_dw1_0<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW0_DW1.B16] \
|
||||
MOVD $shuf_mask_dw2_0_dw3_0<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW2_DW3.B16] \
|
||||
MOVD $bit_reverse_table<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_L.B16, BIT_REV_TAB_H.B16] \
|
||||
MOVW $0x0F0F0F0F, R0 \
|
||||
VDUP R0, BIT_REV_AND_TAB.S4 \
|
||||
MOVD $shuf_mask_dw<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW0_DW1.B16, SHUF_MASK_DW2_DW3.B16]
|
||||
|
||||
// func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte)
|
||||
TEXT ·eia256RoundTag8(SB),NOSPLIT,$0
|
||||
|
@ -2,25 +2,17 @@
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA bit_reverse_table_l<>+0x00(SB)/8, $0x0e060a020c040800
|
||||
DATA bit_reverse_table_l<>+0x08(SB)/8, $0x0f070b030d050901
|
||||
GLOBL bit_reverse_table_l<>(SB), RODATA, $16
|
||||
DATA bit_reverse_table<>+0x00(SB)/8, $0x0e060a020c040800
|
||||
DATA bit_reverse_table<>+0x08(SB)/8, $0x0f070b030d050901
|
||||
DATA bit_reverse_table<>+0x10(SB)/8, $0xe060a020c0408000
|
||||
DATA bit_reverse_table<>+0x18(SB)/8, $0xf070b030d0509010
|
||||
GLOBL bit_reverse_table<>(SB), RODATA, $32
|
||||
|
||||
DATA bit_reverse_table_h<>+0x00(SB)/8, $0xe060a020c0408000
|
||||
DATA bit_reverse_table_h<>+0x08(SB)/8, $0xf070b030d0509010
|
||||
GLOBL bit_reverse_table_h<>(SB), RODATA, $16
|
||||
|
||||
DATA bit_reverse_and_table<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
DATA bit_reverse_and_table<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f
|
||||
GLOBL bit_reverse_and_table<>(SB), RODATA, $16
|
||||
|
||||
DATA shuf_mask_dw0_0_dw1_0<>+0x00(SB)/8, $0xffffffff03020100
|
||||
DATA shuf_mask_dw0_0_dw1_0<>+0x08(SB)/8, $0xffffffff07060504
|
||||
GLOBL shuf_mask_dw0_0_dw1_0<>(SB), RODATA, $16
|
||||
|
||||
DATA shuf_mask_dw2_0_dw3_0<>+0x00(SB)/8, $0xffffffff0b0a0908
|
||||
DATA shuf_mask_dw2_0_dw3_0<>+0x08(SB)/8, $0xffffffff0f0e0d0c
|
||||
GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16
|
||||
DATA shuf_mask_dw<>+0x00(SB)/8, $0xffffffff03020100
|
||||
DATA shuf_mask_dw<>+0x08(SB)/8, $0xffffffff07060504
|
||||
DATA shuf_mask_dw<>+0x10(SB)/8, $0xffffffff0b0a0908
|
||||
DATA shuf_mask_dw<>+0x18(SB)/8, $0xffffffff0f0e0d0c
|
||||
GLOBL shuf_mask_dw<>(SB), RODATA, $32
|
||||
|
||||
#define AX R2
|
||||
#define BX R3
|
||||
@ -46,16 +38,12 @@ GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16
|
||||
#define SHUF_MASK_DW2_DW3 V24
|
||||
|
||||
#define LOAD_GLOBAL_DATA() \
|
||||
MOVD $bit_reverse_table_l<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_L.B16] \
|
||||
MOVD $bit_reverse_table_h<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_H.B16] \
|
||||
MOVD $bit_reverse_and_table<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_AND_TAB.B16] \
|
||||
MOVD $shuf_mask_dw0_0_dw1_0<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW0_DW1.B16] \
|
||||
MOVD $shuf_mask_dw2_0_dw3_0<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW2_DW3.B16] \
|
||||
MOVD $bit_reverse_table<>(SB), R0 \
|
||||
VLD1 (R0), [BIT_REV_TAB_L.B16, BIT_REV_TAB_H.B16] \
|
||||
MOVW $0x0F0F0F0F, R0 \
|
||||
VDUP R0, BIT_REV_AND_TAB.S4 \
|
||||
MOVD $shuf_mask_dw<>(SB), R0 \
|
||||
VLD1 (R0), [SHUF_MASK_DW0_DW1.B16, SHUF_MASK_DW2_DW3.B16]
|
||||
|
||||
// func eia3Round16B(t *uint32, keyStream *uint32, p *byte, tagSize int)
|
||||
TEXT ·eia3Round16B(SB),NOSPLIT,$0
|
||||
|
Loading…
x
Reference in New Issue
Block a user