mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
internal/sm2ec: use neon inst. for p256Select
This commit is contained in:
parent
b5c593005d
commit
2f60f0062b
@ -279,52 +279,34 @@ TEXT ·p256Select(SB),NOSPLIT,$0
|
|||||||
MOVD table+8(FP), b_ptr
|
MOVD table+8(FP), b_ptr
|
||||||
MOVD res+0(FP), res_ptr
|
MOVD res+0(FP), res_ptr
|
||||||
|
|
||||||
EOR x0, x0, x0
|
VMOV const0, V0.S4 // will use VDUP after upgrade go to 1.17+
|
||||||
EOR x1, x1, x1
|
|
||||||
EOR x2, x2, x2
|
VEOR V2.B16, V2.B16, V2.B16
|
||||||
EOR x3, x3, x3
|
VEOR V3.B16, V3.B16, V3.B16
|
||||||
EOR y0, y0, y0
|
VEOR V4.B16, V4.B16, V4.B16
|
||||||
EOR y1, y1, y1
|
VEOR V5.B16, V5.B16, V5.B16
|
||||||
EOR y2, y2, y2
|
VEOR V6.B16, V6.B16, V6.B16
|
||||||
EOR y3, y3, y3
|
VEOR V7.B16, V7.B16, V7.B16
|
||||||
EOR t0, t0, t0
|
|
||||||
EOR t1, t1, t1
|
|
||||||
EOR t2, t2, t2
|
|
||||||
EOR t3, t3, t3
|
|
||||||
|
|
||||||
MOVD $0, const1
|
MOVD $0, const1
|
||||||
|
|
||||||
loop_select:
|
loop_select:
|
||||||
ADD $1, const1
|
ADD $1, const1
|
||||||
CMP const0, const1
|
VMOV const1, V1.S4 // will use VDUP after upgrade go to 1.17+
|
||||||
LDP.P 16(b_ptr), (acc0, acc1)
|
VCMEQ V0.S4, V1.S4, V14.S4
|
||||||
CSEL EQ, acc0, x0, x0
|
VLD1.P (48)(b_ptr), [V8.B16, V9.B16, V10.B16]
|
||||||
CSEL EQ, acc1, x1, x1
|
VLD1.P (48)(b_ptr), [V11.B16, V12.B16, V13.B16]
|
||||||
LDP.P 16(b_ptr), (acc2, acc3)
|
VBIT V14.B16, V8.B16, V2.B16
|
||||||
CSEL EQ, acc2, x2, x2
|
VBIT V14.B16, V9.B16, V3.B16
|
||||||
CSEL EQ, acc3, x3, x3
|
VBIT V14.B16, V10.B16, V4.B16
|
||||||
LDP.P 16(b_ptr), (acc4, acc5)
|
VBIT V14.B16, V11.B16, V5.B16
|
||||||
CSEL EQ, acc4, y0, y0
|
VBIT V14.B16, V12.B16, V6.B16
|
||||||
CSEL EQ, acc5, y1, y1
|
VBIT V14.B16, V13.B16, V7.B16
|
||||||
LDP.P 16(b_ptr), (acc6, acc7)
|
|
||||||
CSEL EQ, acc6, y2, y2
|
|
||||||
CSEL EQ, acc7, y3, y3
|
|
||||||
LDP.P 16(b_ptr), (acc0, acc1)
|
|
||||||
CSEL EQ, acc0, t0, t0
|
|
||||||
CSEL EQ, acc1, t1, t1
|
|
||||||
LDP.P 16(b_ptr), (acc2, acc3)
|
|
||||||
CSEL EQ, acc2, t2, t2
|
|
||||||
CSEL EQ, acc3, t3, t3
|
|
||||||
|
|
||||||
CMP a_ptr, const1
|
CMP a_ptr, const1
|
||||||
BNE loop_select
|
BNE loop_select
|
||||||
|
VST1.P [V2.B16, V3.B16, V4.B16], (48)(res_ptr)
|
||||||
STP (x0, x1), 0*16(res_ptr)
|
VST1 [V5.B16, V6.B16, V7.B16], (res_ptr)
|
||||||
STP (x2, x3), 1*16(res_ptr)
|
|
||||||
STP (y0, y1), 2*16(res_ptr)
|
|
||||||
STP (y2, y3), 3*16(res_ptr)
|
|
||||||
STP (t0, t1), 4*16(res_ptr)
|
|
||||||
STP (t2, t3), 5*16(res_ptr)
|
|
||||||
RET
|
RET
|
||||||
/* ---------------------------------------*/
|
/* ---------------------------------------*/
|
||||||
// func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
|
// func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user