mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
Change VMOV to VDUP #179
This commit is contained in:
parent
4f511e2125
commit
851d02b3ea
@ -99,7 +99,7 @@ TEXT ·p256MovCond(SB),NOSPLIT,$0
|
|||||||
MOVD cond+24(FP), R3
|
MOVD cond+24(FP), R3
|
||||||
|
|
||||||
VEOR V0.B16, V0.B16, V0.B16
|
VEOR V0.B16, V0.B16, V0.B16
|
||||||
VMOV R3, V1.S4
|
VDUP R3, V1.S4
|
||||||
VCMEQ V0.S4, V1.S4, V2.S4
|
VCMEQ V0.S4, V1.S4, V2.S4
|
||||||
|
|
||||||
VLD1.P (48)(a_ptr), [V3.B16, V4.B16, V5.B16]
|
VLD1.P (48)(a_ptr), [V3.B16, V4.B16, V5.B16]
|
||||||
@ -278,7 +278,7 @@ TEXT ·p256Select(SB),NOSPLIT,$0
|
|||||||
MOVD table+8(FP), b_ptr
|
MOVD table+8(FP), b_ptr
|
||||||
MOVD res+0(FP), res_ptr
|
MOVD res+0(FP), res_ptr
|
||||||
|
|
||||||
VMOV const0, V0.S4 // will use VDUP after upgrade go to 1.17+
|
VDUP const0, V0.S4
|
||||||
|
|
||||||
VEOR V2.B16, V2.B16, V2.B16
|
VEOR V2.B16, V2.B16, V2.B16
|
||||||
VEOR V3.B16, V3.B16, V3.B16
|
VEOR V3.B16, V3.B16, V3.B16
|
||||||
@ -291,7 +291,7 @@ TEXT ·p256Select(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
loop_select:
|
loop_select:
|
||||||
ADD $1, const1
|
ADD $1, const1
|
||||||
VMOV const1, V1.S4 // will use VDUP after upgrade go to 1.17+
|
VDUP const1, V1.S4
|
||||||
VCMEQ V0.S4, V1.S4, V14.S4
|
VCMEQ V0.S4, V1.S4, V14.S4
|
||||||
VLD1.P (48)(b_ptr), [V8.B16, V9.B16, V10.B16]
|
VLD1.P (48)(b_ptr), [V8.B16, V9.B16, V10.B16]
|
||||||
VLD1.P (48)(b_ptr), [V11.B16, V12.B16, V13.B16]
|
VLD1.P (48)(b_ptr), [V11.B16, V12.B16, V13.B16]
|
||||||
@ -314,7 +314,7 @@ TEXT ·p256SelectAffine(SB),NOSPLIT,$0
|
|||||||
MOVD table+8(FP), t1
|
MOVD table+8(FP), t1
|
||||||
MOVD res+0(FP), res_ptr
|
MOVD res+0(FP), res_ptr
|
||||||
|
|
||||||
VMOV t0, V0.S4 // will use VDUP after upgrade go to 1.17+
|
VDUP t0, V0.S4
|
||||||
|
|
||||||
VEOR V2.B16, V2.B16, V2.B16
|
VEOR V2.B16, V2.B16, V2.B16
|
||||||
VEOR V3.B16, V3.B16, V3.B16
|
VEOR V3.B16, V3.B16, V3.B16
|
||||||
@ -325,7 +325,7 @@ TEXT ·p256SelectAffine(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
loop_select:
|
loop_select:
|
||||||
ADD $1, t2
|
ADD $1, t2
|
||||||
VMOV t2, V1.S4 // will use VDUP after upgrade go to 1.17+
|
VDUP t2, V1.S4
|
||||||
VCMEQ V0.S4, V1.S4, V10.S4
|
VCMEQ V0.S4, V1.S4, V10.S4
|
||||||
VLD1.P (64)(t1), [V6.B16, V7.B16, V8.B16, V9.B16]
|
VLD1.P (64)(t1), [V6.B16, V7.B16, V8.B16, V9.B16]
|
||||||
VBIT V10.B16, V6.B16, V2.B16
|
VBIT V10.B16, V6.B16, V2.B16
|
||||||
@ -1153,9 +1153,9 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-48
|
|||||||
MOVD zero+40(FP), t1
|
MOVD zero+40(FP), t1
|
||||||
|
|
||||||
VEOR V12.B16, V12.B16, V12.B16
|
VEOR V12.B16, V12.B16, V12.B16
|
||||||
VMOV hlp1, V13.S4 // will use VDUP after go 1.17
|
VDUP hlp1, V13.S4
|
||||||
VCMEQ V12.S4, V13.S4, V13.S4
|
VCMEQ V12.S4, V13.S4, V13.S4
|
||||||
VMOV t1, V14.S4 // will use VDUP after go 1.17
|
VDUP t1, V14.S4
|
||||||
VCMEQ V12.S4, V14.S4, V14.S4
|
VCMEQ V12.S4, V14.S4, V14.S4
|
||||||
|
|
||||||
LDP p256p<>+0x00(SB), (const0, const1)
|
LDP p256p<>+0x00(SB), (const0, const1)
|
||||||
|
@ -139,7 +139,7 @@ GLOBL fk_mask<>(SB), (16+8), $16
|
|||||||
// - t3: 128 bits register for data
|
// - t3: 128 bits register for data
|
||||||
#define SM4_ROUND(RK, tmp32, x, y, z, t0, t1, t2, t3) \
|
#define SM4_ROUND(RK, tmp32, x, y, z, t0, t1, t2, t3) \
|
||||||
MOVW.P 4(RK), tmp32; \
|
MOVW.P 4(RK), tmp32; \
|
||||||
VMOV tmp32, x.S4; \
|
VDUP tmp32, x.S4; \
|
||||||
VEOR t1.B16, x.B16, x.B16; \
|
VEOR t1.B16, x.B16, x.B16; \
|
||||||
VEOR t2.B16, x.B16, x.B16; \
|
VEOR t2.B16, x.B16, x.B16; \
|
||||||
VEOR t3.B16, x.B16, x.B16; \
|
VEOR t3.B16, x.B16, x.B16; \
|
||||||
@ -160,7 +160,7 @@ GLOBL fk_mask<>(SB), (16+8), $16
|
|||||||
// - t3: 128 bits register for data
|
// - t3: 128 bits register for data
|
||||||
#define SM4_8BLOCKS_ROUND(RK, tmp32, x, y, z, tmp, t0, t1, t2, t3, t4, t5, t6, t7) \
|
#define SM4_8BLOCKS_ROUND(RK, tmp32, x, y, z, tmp, t0, t1, t2, t3, t4, t5, t6, t7) \
|
||||||
MOVW.P 4(RK), tmp32; \
|
MOVW.P 4(RK), tmp32; \
|
||||||
VMOV tmp32, tmp.S4; \
|
VDUP tmp32, tmp.S4; \
|
||||||
VEOR t1.B16, tmp.B16, x.B16; \
|
VEOR t1.B16, tmp.B16, x.B16; \
|
||||||
VEOR t2.B16, x.B16, x.B16; \
|
VEOR t2.B16, x.B16, x.B16; \
|
||||||
VEOR t3.B16, x.B16, x.B16; \
|
VEOR t3.B16, x.B16, x.B16; \
|
||||||
|
@ -243,7 +243,7 @@ GLOBL mask_S1<>(SB), RODATA, $16
|
|||||||
EORW R9, R11 \ // V = L2(Q) = R11D, hi(R11)=0
|
EORW R9, R11 \ // V = L2(Q) = R11D, hi(R11)=0
|
||||||
LSL $32, R11 \
|
LSL $32, R11 \
|
||||||
EOR R11, DX \
|
EOR R11, DX \
|
||||||
VMOV DX, V0.D2 \
|
VDUP DX, V0.D2 \
|
||||||
VMOV V0.B16, V1.B16 \
|
VMOV V0.B16, V1.B16 \
|
||||||
S0_comput(V1, V2, V3) \
|
S0_comput(V1, V2, V3) \
|
||||||
S1_comput(V0, V2, V3) \
|
S1_comput(V0, V2, V3) \
|
||||||
|
@ -46,21 +46,16 @@ GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16
|
|||||||
#define SHUF_MASK_DW2_DW3 V24
|
#define SHUF_MASK_DW2_DW3 V24
|
||||||
|
|
||||||
#define LOAD_GLOBAL_DATA() \
|
#define LOAD_GLOBAL_DATA() \
|
||||||
LDP bit_reverse_table_l<>(SB), (R0, R1) \
|
MOVD $bit_reverse_table_l<>(SB), R0 \
|
||||||
VMOV R0, BIT_REV_TAB_L.D[0] \
|
VMOV (R0), [BIT_REV_TAB_L.B16] \
|
||||||
VMOV R1, BIT_REV_TAB_L.D[1] \
|
MOVD $bit_reverse_table_h<>(SB), R0 \
|
||||||
LDP bit_reverse_table_h<>(SB), (R0, R1) \
|
VMOV (R0), [BIT_REV_TAB_H.B16] \
|
||||||
VMOV R0, BIT_REV_TAB_H.D[0] \
|
MOVD $bit_reverse_and_table<>(SB), R0 \
|
||||||
VMOV R1, BIT_REV_TAB_H.D[1] \
|
VMOV (R0), [BIT_REV_AND_TAB.B16] \
|
||||||
LDP bit_reverse_and_table<>(SB), (R0, R1) \
|
MOVD $shuf_mask_dw0_0_dw1_0<>(SB), R0 \
|
||||||
VMOV R0, BIT_REV_AND_TAB.D[0] \
|
VMOV (R0), [SHUF_MASK_DW0_DW1.B16] \
|
||||||
VMOV R1, BIT_REV_AND_TAB.D[1] \
|
MOVD $shuf_mask_dw2_0_dw3_0<>(SB), R0 \
|
||||||
LDP shuf_mask_dw0_0_dw1_0<>(SB), (R0, R1) \
|
VMOV (R0), [SHUF_MASK_DW2_DW3.B16] \
|
||||||
VMOV R0, SHUF_MASK_DW0_DW1.D[0] \
|
|
||||||
VMOV R1, SHUF_MASK_DW0_DW1.D[1] \
|
|
||||||
LDP shuf_mask_dw2_0_dw3_0<>(SB), (R0, R1) \
|
|
||||||
VMOV R0, SHUF_MASK_DW2_DW3.D[0] \
|
|
||||||
VMOV R1, SHUF_MASK_DW2_DW3.D[1]
|
|
||||||
|
|
||||||
// func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte)
|
// func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte)
|
||||||
TEXT ·eia256RoundTag8(SB),NOSPLIT,$0
|
TEXT ·eia256RoundTag8(SB),NOSPLIT,$0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user