mirror of
https://github.com/emmansun/gmsm.git
synced 2025-05-13 20:46:17 +08:00
internal/sm2ec: amd64, optimize select SIMD
This commit is contained in:
parent
f7beee3dae
commit
48589f0876
@ -483,8 +483,6 @@ internalSqrBMI2:
|
|||||||
ST (yout) \
|
ST (yout) \
|
||||||
\// Load stored values from stack
|
\// Load stored values from stack
|
||||||
MOVQ rptr, AX \
|
MOVQ rptr, AX \
|
||||||
MOVL sel_save, BX \
|
|
||||||
MOVL zero_save, CX \
|
|
||||||
|
|
||||||
// func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int)
|
// func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int)
|
||||||
TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
||||||
@ -528,8 +526,8 @@ TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
|||||||
MOVOU zout(16*0), X4
|
MOVOU zout(16*0), X4
|
||||||
MOVOU zout(16*1), X5
|
MOVOU zout(16*1), X5
|
||||||
|
|
||||||
MOVL BX, X6 // sel
|
MOVL sel_save, X6 // sel
|
||||||
MOVL CX, X7 // zero
|
MOVL zero_save, X7 // zero
|
||||||
|
|
||||||
PXOR X8, X8 // X8's bits are all 0
|
PXOR X8, X8 // X8's bits are all 0
|
||||||
PCMPEQL X9, X9 // X9's bits are all 1
|
PCMPEQL X9, X9 // X9's bits are all 1
|
||||||
@ -626,13 +624,9 @@ pointaddaffine_avx2:
|
|||||||
|
|
||||||
p256PointAddAffineInline()
|
p256PointAddAffineInline()
|
||||||
// The result is not valid if (sel == 0), conditional choose
|
// The result is not valid if (sel == 0), conditional choose
|
||||||
MOVL BX, X6 // sel
|
|
||||||
MOVL CX, X7 // zero
|
|
||||||
|
|
||||||
VPXOR Y8, Y8, Y8 // Y8's bits are all 0
|
VPXOR Y8, Y8, Y8 // Y8's bits are all 0
|
||||||
|
VPBROADCASTD sel_save, Y6 // sel
|
||||||
VPBROADCASTD X6, Y6
|
VPBROADCASTD zero_save, Y7 // zero
|
||||||
VPBROADCASTD X7, Y7
|
|
||||||
|
|
||||||
VPCMPEQD Y8, Y6, Y6 // Y6's bits are all 1 if sel = 0, else are 0
|
VPCMPEQD Y8, Y6, Y6 // Y6's bits are all 1 if sel = 0, else are 0
|
||||||
VPCMPEQD Y8, Y7, Y7 // Y7's bits are all 1 if zero = 0, else are 0
|
VPCMPEQD Y8, Y7, Y7 // Y7's bits are all 1 if zero = 0, else are 0
|
||||||
|
@ -705,9 +705,8 @@ loop_select:
|
|||||||
select_avx2:
|
select_avx2:
|
||||||
VPXOR Y15, Y15, Y15
|
VPXOR Y15, Y15, Y15
|
||||||
VPCMPEQD Y14, Y14, Y14
|
VPCMPEQD Y14, Y14, Y14
|
||||||
VPSUBD Y14, Y15, Y15
|
VPSUBD Y14, Y15, Y15 // Y15 = 1
|
||||||
MOVL idx+16(FP), X14 // x14 = idx
|
VPBROADCASTD idx+16(FP), Y14
|
||||||
VPBROADCASTD X14, Y14
|
|
||||||
|
|
||||||
MOVQ limit+24(FP),AX
|
MOVQ limit+24(FP),AX
|
||||||
VMOVDQU Y15, Y13
|
VMOVDQU Y15, Y13
|
||||||
@ -717,9 +716,8 @@ select_avx2:
|
|||||||
VPXOR Y2, Y2, Y2
|
VPXOR Y2, Y2, Y2
|
||||||
|
|
||||||
loop_select_avx2:
|
loop_select_avx2:
|
||||||
VMOVDQU Y13, Y12
|
VPCMPEQD Y14, Y13, Y12
|
||||||
VPADDD Y15, Y13, Y13
|
VPADDD Y15, Y13, Y13
|
||||||
VPCMPEQD Y14, Y12, Y12
|
|
||||||
|
|
||||||
VPAND (32*0)(DI), Y12, Y3
|
VPAND (32*0)(DI), Y12, Y3
|
||||||
VPAND (32*1)(DI), Y12, Y4
|
VPAND (32*1)(DI), Y12, Y4
|
||||||
@ -753,7 +751,7 @@ TEXT ·p256SelectAffine(SB),NOSPLIT,$0
|
|||||||
PXOR X15, X15 // X15 = 0
|
PXOR X15, X15 // X15 = 0
|
||||||
PCMPEQL X14, X14 // X14 = -1
|
PCMPEQL X14, X14 // X14 = -1
|
||||||
PSUBL X14, X15 // X15 = 1
|
PSUBL X14, X15 // X15 = 1
|
||||||
MOVL AX, X14 // x14 = idx
|
MOVL idx+16(FP), X14 // x14 = idx
|
||||||
PSHUFD $0, X14, X14
|
PSHUFD $0, X14, X14
|
||||||
|
|
||||||
MOVQ $16, AX
|
MOVQ $16, AX
|
||||||
@ -820,8 +818,7 @@ select_base_avx2:
|
|||||||
VPXOR Y15, Y15, Y15
|
VPXOR Y15, Y15, Y15
|
||||||
VPCMPEQD Y14, Y14, Y14
|
VPCMPEQD Y14, Y14, Y14
|
||||||
VPSUBD Y14, Y15, Y15
|
VPSUBD Y14, Y15, Y15
|
||||||
MOVL AX, X14 // x14 = idx
|
VPBROADCASTD idx+16(FP), Y14
|
||||||
VPBROADCASTD X14, Y14
|
|
||||||
|
|
||||||
MOVQ $16, AX
|
MOVQ $16, AX
|
||||||
VMOVDQU Y15, Y13
|
VMOVDQU Y15, Y13
|
||||||
@ -829,16 +826,14 @@ select_base_avx2:
|
|||||||
VPXOR Y1, Y1, Y1
|
VPXOR Y1, Y1, Y1
|
||||||
|
|
||||||
loop_select_base_avx2:
|
loop_select_base_avx2:
|
||||||
VMOVDQU Y13, Y12
|
VPCMPEQD Y14, Y13, Y12
|
||||||
VPADDD Y15, Y13, Y13
|
VPADDD Y15, Y13, Y13
|
||||||
VPCMPEQD Y14, Y12, Y12
|
|
||||||
|
|
||||||
VPAND (32*0)(DI), Y12, Y2
|
VPAND (32*0)(DI), Y12, Y2
|
||||||
VPAND (32*1)(DI), Y12, Y3
|
VPAND (32*1)(DI), Y12, Y3
|
||||||
|
|
||||||
VMOVDQU Y13, Y12
|
VPCMPEQD Y14, Y13, Y12
|
||||||
VPADDD Y15, Y13, Y13
|
VPADDD Y15, Y13, Y13
|
||||||
VPCMPEQD Y14, Y12, Y12
|
|
||||||
|
|
||||||
VPAND (32*2)(DI), Y12, Y4
|
VPAND (32*2)(DI), Y12, Y4
|
||||||
VPAND (32*3)(DI), Y12, Y5
|
VPAND (32*3)(DI), Y12, Y5
|
||||||
|
@ -500,8 +500,6 @@ internalSqrBMI2:
|
|||||||
ST (yout) \
|
ST (yout) \
|
||||||
\// Load stored values from stack
|
\// Load stored values from stack
|
||||||
MOVQ rptr, AX \
|
MOVQ rptr, AX \
|
||||||
MOVL sel_save, BX \
|
|
||||||
MOVL zero_save, CX \
|
|
||||||
|
|
||||||
// func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int)
|
// func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int)
|
||||||
TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
||||||
@ -545,8 +543,8 @@ TEXT ·p256PointAddAffineAsm(SB),0,$512-48
|
|||||||
MOVOU zout(16*0), X4
|
MOVOU zout(16*0), X4
|
||||||
MOVOU zout(16*1), X5
|
MOVOU zout(16*1), X5
|
||||||
|
|
||||||
MOVL BX, X6 // sel
|
MOVL sel_save, X6 // sel
|
||||||
MOVL CX, X7 // zero
|
MOVL zero_save, X7 // zero
|
||||||
|
|
||||||
PXOR X8, X8 // X8's bits are all 0
|
PXOR X8, X8 // X8's bits are all 0
|
||||||
PCMPEQL X9, X9 // X9's bits are all 1
|
PCMPEQL X9, X9 // X9's bits are all 1
|
||||||
@ -643,13 +641,9 @@ pointaddaffine_avx2:
|
|||||||
|
|
||||||
p256PointAddAffineInline()
|
p256PointAddAffineInline()
|
||||||
// The result is not valid if (sel == 0), conditional choose
|
// The result is not valid if (sel == 0), conditional choose
|
||||||
MOVL BX, X6 // sel
|
|
||||||
MOVL CX, X7 // zero
|
|
||||||
|
|
||||||
VPXOR Y8, Y8, Y8 // Y8's bits are all 0
|
VPXOR Y8, Y8, Y8 // Y8's bits are all 0
|
||||||
|
VPBROADCASTD sel_save, Y6 // sel
|
||||||
VPBROADCASTD X6, Y6
|
VPBROADCASTD zero_save, Y7 // zero
|
||||||
VPBROADCASTD X7, Y7
|
|
||||||
|
|
||||||
VPCMPEQD Y8, Y6, Y6 // Y6's bits are all 1 if sel = 0, else are 0
|
VPCMPEQD Y8, Y6, Y6 // Y6's bits are all 1 if sel = 0, else are 0
|
||||||
VPCMPEQD Y8, Y7, Y7 // Y7's bits are all 1 if zero = 0, else are 0
|
VPCMPEQD Y8, Y7, Y7 // Y7's bits are all 1 if zero = 0, else are 0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user