mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
sm9/bn256: reduce register move
This commit is contained in:
parent
a10e64f6f5
commit
e3d14fb41a
@ -965,6 +965,28 @@ noAdxSqr:
|
||||
CMOVQCS acc6, t2;\
|
||||
CMOVQCS acc7, t3;
|
||||
|
||||
// (acc7, acc6, acc5, acc4) = 2(acc7, acc6, acc5, acc4)
|
||||
#define gfpMulBy2Inline2 \
|
||||
XORQ mul0, mul0;\
|
||||
ADDQ acc4, acc4;\
|
||||
ADCQ acc5, acc5;\
|
||||
ADCQ acc6, acc6;\
|
||||
ADCQ acc7, acc7;\
|
||||
ADCQ $0, mul0;\
|
||||
MOVQ acc4, t0;\
|
||||
MOVQ acc5, t1;\
|
||||
MOVQ acc6, t2;\
|
||||
MOVQ acc7, t3;\
|
||||
SUBQ ·p2+0(SB), acc4;\
|
||||
SBBQ ·p2+8(SB), acc5;\
|
||||
SBBQ ·p2+16(SB), acc6;\
|
||||
SBBQ ·p2+24(SB), acc7;\
|
||||
SBBQ $0, mul0;\
|
||||
CMOVQCS t0, acc4;\ // CMOVQCS: Move if below (CF == 1)
|
||||
CMOVQCS t1, acc5;\
|
||||
CMOVQCS t2, acc6;\
|
||||
CMOVQCS t3, acc7;
|
||||
|
||||
/* ---------------------------------------*/
|
||||
// (t3, t2, t1, t0) = (acc7, acc6, acc5, acc4) + (t3, t2, t1, t0)
|
||||
#define gfpAddInline \
|
||||
@ -1294,8 +1316,7 @@ TEXT ·gfp2SquareU(SB),NOSPLIT,$160-16
|
||||
MOVQ t3, (16*0 + 8*3)(AX)
|
||||
|
||||
LDacc (cyout)
|
||||
gfpMulBy2Inline
|
||||
t2acc
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline
|
||||
XORQ acc4, acc4
|
||||
XORQ acc5, acc5
|
||||
@ -1358,41 +1379,41 @@ TEXT ·curvePointDoubleComplete(SB),NOSPLIT,$288-16
|
||||
CALL gfpSqrInternal(SB) // t0 := Y^2
|
||||
ST (tmp0)
|
||||
|
||||
gfpMulBy2Inline // Z3 := t0 + t0
|
||||
t2acc
|
||||
gfpMulBy2Inline // Z3 := Z3 + Z3
|
||||
t2acc
|
||||
gfpMulBy2Inline // Z3 := Z3 + Z3
|
||||
gfpMulBy2Inline2 // Z3 := t0 + t0
|
||||
gfpMulBy2Inline2 // Z3 := Z3 + Z3
|
||||
gfpMulBy2Inline // Z3 := Z3 + Z3
|
||||
STt (zout)
|
||||
|
||||
LDacc (zin)
|
||||
CALL gfpSqrInternal(SB) // t2 := Z^2
|
||||
ST (tmp2)
|
||||
gfpMulBy2Inline
|
||||
t2acc
|
||||
gfpMulBy2Inline
|
||||
t2acc
|
||||
gfpMulBy2Inline
|
||||
t2acc
|
||||
gfpMulBy2Inline
|
||||
t2acc
|
||||
LDt (tmp2)
|
||||
MOVQ acc4, acc0
|
||||
MOVQ acc5, acc1
|
||||
MOVQ acc6, acc2
|
||||
MOVQ acc7, acc3
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
MOVQ acc0, t0
|
||||
MOVQ acc1, t1
|
||||
MOVQ acc2, t2
|
||||
MOVQ acc3, t3
|
||||
CALL gfpSubInternal(SB) // t2 := 3b * t2
|
||||
ST (tmp2)
|
||||
LDt (zout)
|
||||
CALL gfpMulInternal(SB) // X3 := Z3 * t2
|
||||
CALL gfpMulInternal(SB) // X3 := Z3 * t2
|
||||
ST (xout)
|
||||
|
||||
LDacc (tmp0)
|
||||
LDt (tmp2)
|
||||
gfpAddInline // Y3 := t0 + t2
|
||||
gfpAddInline // Y3 := t0 + t2
|
||||
STt (yout)
|
||||
|
||||
LDacc (yin)
|
||||
LDt (zin)
|
||||
CALL gfpMulInternal(SB) // t1 := YZ
|
||||
CALL gfpMulInternal(SB) // t1 := YZ
|
||||
LDt (zout)
|
||||
CALL gfpMulInternal(SB) // Z3 := t1 * Z3
|
||||
CALL gfpMulInternal(SB) // Z3 := t1 * Z3
|
||||
MOVQ rptr, AX
|
||||
// Store Z
|
||||
MOVQ acc4, (16*4 + 8*0)(AX)
|
||||
@ -1403,14 +1424,14 @@ TEXT ·curvePointDoubleComplete(SB),NOSPLIT,$288-16
|
||||
LDacc (tmp2)
|
||||
gfpMulBy2Inline
|
||||
LDacc (tmp2)
|
||||
gfpAddInline // t2 := t2 + t2 + t2
|
||||
gfpAddInline // t2 := t2 + t2 + t2
|
||||
LDacc (tmp0)
|
||||
CALL gfpSubInternal(SB) // t0 := t0 - t2
|
||||
ST (tmp0)
|
||||
LDt (yout)
|
||||
CALL gfpMulInternal(SB) // Y3 = t0 * Y3
|
||||
LDt (xout)
|
||||
gfpAddInline // Y3 := X3 + Y3
|
||||
gfpAddInline // Y3 := X3 + Y3
|
||||
MOVQ rptr, AX
|
||||
// Store y
|
||||
MOVQ t0, (16*2 + 8*0)(AX)
|
||||
@ -1563,14 +1584,10 @@ TEXT gfpIsZero(SB),NOSPLIT,$0
|
||||
STt (tmp0) \
|
||||
\
|
||||
LDacc (tmp2) \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
LDt (tmp2) \
|
||||
CALL gfpSubInternal(SB) \ // t2 := 3b * t2 = 3bZ1Z2
|
||||
ST (tmp2) \
|
||||
@ -1585,14 +1602,10 @@ TEXT gfpIsZero(SB),NOSPLIT,$0
|
||||
ST (tmp1) \
|
||||
\
|
||||
LDacc (yout) \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline \
|
||||
t2acc \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
gfpMulBy2Inline2 \
|
||||
LDt (yout) \
|
||||
CALL gfpSubInternal(SB) \ // Y3 = 3b * Y3 = 3b(X1Z2 + X2Z1)
|
||||
ST (yout) \
|
||||
|
@ -463,6 +463,23 @@ TEXT gfpSqrInternal(SB),NOSPLIT,$0
|
||||
CSEL CC, x2, acc2, x2;\
|
||||
CSEL CC, x3, acc3, x3;
|
||||
|
||||
// (y3, y2, y1, y0) = 2(y3, y2, y1, y0)
|
||||
#define gfpMulBy2Inline2 \
|
||||
ADDS y0, y0, x0; \
|
||||
ADCS y1, y1, x1; \
|
||||
ADCS y2, y2, x2; \
|
||||
ADCS y3, y3, x3; \
|
||||
ADC $0, ZR, hlp0; \
|
||||
SUBS const0, x0, acc0; \
|
||||
SBCS const1, x1, acc1;\
|
||||
SBCS const2, x2, acc2; \
|
||||
SBCS const3, x3, acc3;\
|
||||
SBCS $0, hlp0, hlp0;\
|
||||
CSEL CC, x0, acc0, y0;\
|
||||
CSEL CC, x1, acc1, y1;\
|
||||
CSEL CC, x2, acc2, y2;\
|
||||
CSEL CC, x3, acc3, y3;
|
||||
|
||||
/* ---------------------------------------*/
|
||||
// (x3, x2, x1, x0) = (x3, x2, x1, x0) + (y3, y2, y1, y0)
|
||||
#define gfpAddInline \
|
||||
@ -665,8 +682,7 @@ TEXT ·gfp2SquareU(SB),NOSPLIT,$72-16
|
||||
STx (x2in)
|
||||
|
||||
//LDy (tmp1)
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline
|
||||
MOVD $0, y0
|
||||
MOVD $0, y1
|
||||
@ -696,24 +712,18 @@ TEXT ·curvePointDoubleComplete(SB),NOSPLIT,$168-16
|
||||
CALL gfpSqrInternal(SB) // t0 := Y^2
|
||||
STy (tmp0)
|
||||
|
||||
gfpMulBy2Inline // Z3 := t0 + t0
|
||||
x2y
|
||||
gfpMulBy2Inline // Z3 := Z3 + Z3
|
||||
x2y
|
||||
gfpMulBy2Inline2 // Z3 := t0 + t0
|
||||
gfpMulBy2Inline2 // Z3 := Z3 + Z3
|
||||
gfpMulBy2Inline // Z3 := Z3 + Z3
|
||||
STx (z3t)
|
||||
|
||||
LDx (z1in)
|
||||
CALL gfpSqrInternal(SB) // t2 := Z^2
|
||||
STy (tmp1)
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
LDx (tmp1)
|
||||
CALL gfpSubInternal(SB) // t2 := 3b * t2 = 3bZ^2
|
||||
STx (tmp1)
|
||||
@ -854,14 +864,10 @@ TEXT ·curvePointAddComplete(SB),0,$264-24
|
||||
STx (tmp0)
|
||||
|
||||
LDy (tmp2)
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
LDx (tmp2)
|
||||
CALL gfpSubInternal(SB) // t2 := 3b * t2 = 3bZ1Z2
|
||||
STx (tmp2)
|
||||
@ -875,14 +881,10 @@ TEXT ·curvePointAddComplete(SB),0,$264-24
|
||||
STx (tmp1)
|
||||
|
||||
LDy (y3t)
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline
|
||||
x2y
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
gfpMulBy2Inline2
|
||||
LDx (y3t)
|
||||
CALL gfpSubInternal(SB) // Y3 = 3b * Y3 = 3b(X1Z2 + X2Z1)
|
||||
STx (y3t)
|
||||
|
Loading…
x
Reference in New Issue
Block a user