mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-22 02:06:18 +08:00
fix error
This commit is contained in:
parent
485d6317a3
commit
799a1d3ce4
@ -134,9 +134,9 @@ TEXT ·p256NegCond(SB),NOSPLIT,$0
|
||||
MOVD cond+24(FP), hlp0
|
||||
MOVD a_ptr, res_ptr
|
||||
// acc = poly
|
||||
MOVD $-1, acc0
|
||||
MOVD p256p<>+0x00(SB), acc0
|
||||
MOVD p256p<>+0x08(SB), acc1
|
||||
MOVD $-1, acc2
|
||||
MOVD p256p<>+0x10(SB), acc2
|
||||
MOVD p256p<>+0x18(SB), acc3
|
||||
// Load the original value
|
||||
LDP 0*16(a_ptr), (t0, t1)
|
||||
@ -227,13 +227,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
||||
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||
|
||||
UMULH const3, acc0, hlp1 // hlp1 = H(acc0*p3)
|
||||
ADC $0, hlp1
|
||||
UMULH const3, acc0, y1 // y1 = H(acc0*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
ADC $0, y1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
|
||||
// Second reduction step
|
||||
MUL const1, acc1, t0
|
||||
@ -247,13 +247,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
||||
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||
|
||||
UMULH const3, acc1, hlp1 // hlp1 = H(acc1*p3)
|
||||
ADC $0, hlp1
|
||||
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
// Third reduction step
|
||||
MUL const1, acc2, t0
|
||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||
@ -266,13 +266,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
||||
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||
|
||||
UMULH const3, acc2, hlp1 // hlp1 = H(acc2*p3)
|
||||
ADC $0, hlp1
|
||||
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
// Last reduction step
|
||||
MUL const1, acc3, t0
|
||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||
@ -285,13 +285,13 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
||||
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||
|
||||
UMULH const3, acc3, hlp1 // hlp1 = H(acc3*p3)
|
||||
ADC $0, hlp1
|
||||
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
|
||||
SUBS const0, acc0, t0
|
||||
SBCS const1, acc1, t1
|
||||
@ -834,10 +834,10 @@ TEXT sm2P256Subinternal<>(SB),NOSPLIT,$0
|
||||
SBCS x3, y3, acc3
|
||||
SBC $0, ZR, t0
|
||||
|
||||
ADDS $-1, acc0, acc4
|
||||
ADCS const0, acc1, acc5
|
||||
ADCS $-1, acc2, acc6
|
||||
ADC const1, acc3, acc7
|
||||
ADDS const0, acc0, acc4
|
||||
ADCS const1, acc1, acc5
|
||||
ADCS const2, acc2, acc6
|
||||
ADC const3, acc3, acc7
|
||||
|
||||
ANDS $1, t0
|
||||
CSEL EQ, acc0, acc4, x0
|
||||
@ -906,81 +906,81 @@ TEXT sm2P256SqrInternal<>(SB),NOSPLIT,$0
|
||||
UMULH x3, x3, t1
|
||||
ADCS t1, acc7, acc7
|
||||
// First reduction step
|
||||
MUL const0, acc0, t0
|
||||
MUL const1, acc0, t0
|
||||
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
||||
UMULH const0, acc0, y0 // y0 = H(acc0*p1)
|
||||
UMULH const1, acc0, y0 // y0 = H(acc0*p1)
|
||||
|
||||
MUL $-1, acc0, t0
|
||||
MUL const2, acc0, t0
|
||||
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
||||
UMULH $-1, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||
UMULH const2, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||
|
||||
MUL const1, acc0, t0 // t0 = L(acc0*p3)
|
||||
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||
|
||||
UMULH const1, acc0, hlp1 // hlp1 = H(acc0*p3)
|
||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
||||
UMULH const3, acc0, y1 // y1 = H(acc0*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
ADC $0, y1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
// Second reduction step
|
||||
MUL const0, acc1, t0
|
||||
MUL const1, acc1, t0
|
||||
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc1*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc1*p1)
|
||||
|
||||
MUL $-1, acc1, t0
|
||||
MUL const2, acc1, t0
|
||||
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
||||
UMULH $-1, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||
UMULH const2, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||
|
||||
MUL const1, acc1, t0 // t0 = L(acc1*p3)
|
||||
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||
|
||||
UMULH const1, acc1, hlp1 // hlp1 = H(acc1*p3)
|
||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
||||
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
// Third reduction step
|
||||
MUL const0, acc2, t0
|
||||
MUL const1, acc2, t0
|
||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||
|
||||
MUL $-1, acc2, t0
|
||||
MUL const2, acc2, t0
|
||||
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
||||
UMULH $-1, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||
UMULH const2, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||
|
||||
MUL const1, acc2, t0 // t0 = L(acc2*p3)
|
||||
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||
|
||||
UMULH const1, acc2, hlp1 // hlp1 = H(acc2*p3)
|
||||
ADC $0, hlp1 // hlp1 = carry3 + hlp1
|
||||
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||
ADC $0, y1
|
||||
|
||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
// Last reduction step
|
||||
MUL const0, acc3, t0
|
||||
MUL const1, acc3, t0
|
||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||
|
||||
MUL $-1, acc3, t0
|
||||
MUL const2, acc3, t0
|
||||
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
||||
UMULH $-1, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||
UMULH const2, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||
|
||||
MUL const1, acc3, t0 // t0 = L(acc3*p3)
|
||||
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||
|
||||
UMULH const1, acc3, hlp1 // hlp1 = H(acc3*p3)
|
||||
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||
ADC $0, acc7 // acc7 = carry3 + acc7
|
||||
|
||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
|
||||
// Add bits [511:256] of the sqr result
|
||||
ADDS acc4, acc0, acc0
|
||||
@ -989,10 +989,10 @@ TEXT sm2P256SqrInternal<>(SB),NOSPLIT,$0
|
||||
ADCS acc7, acc3, acc3
|
||||
ADC $0, ZR, acc4
|
||||
|
||||
SUBS $-1, acc0, t0
|
||||
SBCS const0, acc1, t1
|
||||
SBCS $-1, acc2, t2
|
||||
SBCS const1, acc3, t3
|
||||
SUBS const0, acc0, t0
|
||||
SBCS const1, acc1, t1
|
||||
SBCS const2, acc2, t2
|
||||
SBCS const3, acc3, t3
|
||||
SBCS $0, acc4, acc4
|
||||
|
||||
CSEL CS, t0, acc0, y0
|
||||
@ -1019,24 +1019,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
UMULH y0, x3, acc4
|
||||
ADC $0, acc4
|
||||
// First reduction step
|
||||
MUL const0, acc0, t0
|
||||
MUL const1, acc0, t0
|
||||
ADDS t0, acc1, acc1 // (carry1, acc1) = acc1 + L(acc0*p1)
|
||||
UMULH const0, acc0, y0 // y0 = H(acc0*p1)
|
||||
UMULH const1, acc0, y0 // y0 = H(acc0*p1)
|
||||
|
||||
MUL $-1, acc0, t0
|
||||
MUL const2, acc0, t0
|
||||
ADCS t0, acc2, acc2 // (carry2, acc2) = acc2 + L(acc0*p2)
|
||||
UMULH $-1, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||
UMULH const2, acc0, hlp0 // hlp0 = H(acc0*p2)
|
||||
|
||||
MUL const1, acc0, t0 // t0 = L(acc0*p3)
|
||||
MUL const3, acc0, t0 // t0 = L(acc0*p3)
|
||||
ADCS t0, acc3, acc3 // (carry3,acc3) = acc3 + L(acc0*p3)
|
||||
|
||||
UMULH const1, acc0, hlp1 // hlp1 = H(acc0*p3)
|
||||
UMULH const3, acc0, acc5 // acc5 = H(acc0*p3)
|
||||
ADC $0, acc4 // acc4 = carry3 + acc4
|
||||
|
||||
ADDS acc0, acc1, acc1 // (carry4, acc1) = acc0 + acc1 + L(acc0*p1)
|
||||
ADCS y0, acc2, acc2 // (carry5, acc2) = carry4 + acc2 + L(acc0*p2) + H(acc0*p1)
|
||||
ADCS hlp0, acc3, acc3 // (carry6, acc3) = carry5 + acc3 + L(acc0*p3) + H(acc0*p2)
|
||||
ADC $0, hlp1, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
ADC $0, acc5, acc0 // acc0 = carry6 + H(acc0*p3)
|
||||
|
||||
// y[1] * x
|
||||
MUL y1, x0, t0
|
||||
@ -1061,24 +1061,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
ADCS t3, acc4
|
||||
ADC hlp0, acc5
|
||||
// Second reduction step
|
||||
MUL const0, acc1, t0
|
||||
MUL const1, acc1, t0
|
||||
ADDS t0, acc2, acc2 // (carry1, acc2) = acc2 + L(acc1*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc1*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc1*p1)
|
||||
|
||||
MUL $-1, acc1, t0
|
||||
MUL const2, acc1, t0
|
||||
ADCS t0, acc3, acc3 // (carry2, acc3) = acc3 + L(acc1*p2)
|
||||
UMULH $-1, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||
UMULH const2, acc1, hlp0 // hlp0 = H(acc1*p2)
|
||||
|
||||
MUL const1, acc1, t0 // t0 = L(acc1*p3)
|
||||
MUL const3, acc1, t0 // t0 = L(acc1*p3)
|
||||
ADCS t0, acc0, acc0 // (carry3,acc0) = acc0 + L(acc1*p3)
|
||||
|
||||
UMULH const1, acc1, hlp1 // hlp1 = H(acc1*p3)
|
||||
UMULH const3, acc1, y1 // y1 = H(acc1*p3)
|
||||
ADC $0, acc5 // acc5 = carry3 + acc5
|
||||
|
||||
ADDS acc1, acc2, acc2 // (carry4, acc2) = acc1 + acc2 + L(acc1*p1)
|
||||
ADCS y0, acc3, acc3 // (carry5, acc3) = carry4 + acc3 + L(acc1*p2) + H(acc1*p1)
|
||||
ADCS hlp0, acc0, acc0 // (carry6, acc0) = carry5 + acc0 + L(acc1*p3) + H(acc1*p2)
|
||||
ADC $0, hlp1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
ADC $0, y1, acc1 // acc1 = carry6 + H(acc1*p3)
|
||||
|
||||
// y[2] * x
|
||||
MUL y2, x0, t0
|
||||
@ -1103,24 +1103,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
ADCS t3, acc5
|
||||
ADC hlp0, acc6
|
||||
// Third reduction step
|
||||
MUL const0, acc2, t0
|
||||
MUL const1, acc2, t0
|
||||
ADDS t0, acc3, acc3 // (carry1, acc3) = acc3 + L(acc2*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||
|
||||
MUL $-1, acc2, t0
|
||||
MUL const2, acc2, t0
|
||||
ADCS t0, acc0, acc0 // (carry2, acc0) = acc0 + L(acc2*p2)
|
||||
UMULH $-1, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||
UMULH const2, acc2, hlp0 // hlp0 = H(acc2*p2)
|
||||
|
||||
MUL const1, acc2, t0 // t0 = L(acc2*p3)
|
||||
MUL const3, acc2, t0 // t0 = L(acc2*p3)
|
||||
ADCS t0, acc1, acc1 // (carry3,acc1) = acc1 + L(acc2*p3)
|
||||
|
||||
UMULH const1, acc2, hlp1 // hlp1 = H(acc2*p3)
|
||||
UMULH const3, acc2, y1 // y1 = H(acc2*p3)
|
||||
ADC $0, acc6 // acc6 = carry3 + acc6
|
||||
|
||||
ADDS acc2, acc3, acc3 // (carry4, acc3) = acc2 + acc3 + L(acc2*p1)
|
||||
ADCS y0, acc0, acc0 // (carry5, acc0) = carry4 + acc0 + L(acc2*p2) + H(acc2*p1)
|
||||
ADCS hlp0, acc1, acc1 // (carry6, acc1) = carry5 + acc1 + L(acc2*p3) + H(acc2*p2)
|
||||
ADC $0, hlp1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
ADC $0, y1, acc2 // acc2 = carry6 + H(acc2*p3)
|
||||
|
||||
// y[3] * x
|
||||
MUL y3, x0, t0
|
||||
@ -1145,24 +1145,24 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
ADCS t3, acc6
|
||||
ADC hlp0, acc7
|
||||
// Last reduction step
|
||||
MUL const0, acc3, t0
|
||||
MUL const1, acc3, t0
|
||||
ADDS t0, acc0, acc0 // (carry1, acc0) = acc0 + L(acc3*p1)
|
||||
UMULH const0, acc1, y0 // y0 = H(acc2*p1)
|
||||
UMULH const1, acc1, y0 // y0 = H(acc2*p1)
|
||||
|
||||
MUL $-1, acc3, t0
|
||||
MUL const2, acc3, t0
|
||||
ADCS t0, acc1, acc1 // (carry2, acc1) = acc1 + L(acc3*p2)
|
||||
UMULH $-1, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||
UMULH const2, acc3, hlp0 // hlp0 = H(acc3*p2)
|
||||
|
||||
MUL const1, acc3, t0 // t0 = L(acc3*p3)
|
||||
MUL const3, acc3, t0 // t0 = L(acc3*p3)
|
||||
ADCS t0, acc2, acc2 // (carry3,acc2) = acc2 + L(acc3*p3)
|
||||
|
||||
UMULH const1, acc3, hlp1 // hlp1 = H(acc3*p3)
|
||||
UMULH const3, acc3, y1 // y1 = H(acc3*p3)
|
||||
ADC $0, acc7 // acc7 = carry3 + acc7
|
||||
|
||||
ADDS acc3, acc0, acc0 // (carry4, acc0) = acc3 + acc0 + L(acc3*p1)
|
||||
ADCS y0, acc1, acc1 // (carry5, acc1) = carry4 + acc1 + L(acc3*p2) + H(acc3*p1)
|
||||
ADCS hlp0, acc2, acc2 // (carry6, acc2) = carry5 + acc2 + L(acc3*p3) + H(acc3*p2)
|
||||
ADC $0, hlp1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
ADC $0, y1, acc3 // acc3 = carry6 + H(acc3*p3)
|
||||
|
||||
// Add bits [511:256] of the mul result
|
||||
ADDS acc4, acc0, acc0
|
||||
@ -1171,10 +1171,10 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
ADCS acc7, acc3, acc3
|
||||
ADC $0, ZR, acc4
|
||||
|
||||
SUBS $-1, acc0, t0
|
||||
SBCS const0, acc1, t1
|
||||
SBCS $-1, acc2, t2
|
||||
SBCS const1, acc3, t3
|
||||
SUBS const0, acc0, t0
|
||||
SBCS const1, acc1, t1
|
||||
SBCS const2, acc2, t2
|
||||
SBCS const3, acc3, t3
|
||||
SBCS $0, acc4, acc4
|
||||
|
||||
CSEL CS, t0, acc0, y0
|
||||
@ -1189,10 +1189,10 @@ TEXT sm2P256MulInternal<>(SB),NOSPLIT,$0
|
||||
ADCS y2, y2, x2; \
|
||||
ADCS y3, y3, x3; \
|
||||
ADC $0, ZR, hlp0; \
|
||||
SUBS $-1, x0, t0; \
|
||||
SBCS const0, x1, t1;\
|
||||
SBCS $-1, x2, t2; \
|
||||
SBCS const1, x3, t3;\
|
||||
SUBS const0, x0, t0; \
|
||||
SBCS const1, x1, t1;\
|
||||
SBCS const2, x2, t2; \
|
||||
SBCS const3, x3, t3;\
|
||||
SBCS $0, hlp0, hlp0;\
|
||||
CSEL CC, x0, t0, x0;\
|
||||
CSEL CC, x1, t1, x1;\
|
||||
@ -1240,25 +1240,24 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-96
|
||||
CMP $0, hlp1
|
||||
CSEL EQ, ZR, t0, hlp1
|
||||
|
||||
MOVD p256p<>+0x08(SB), const0
|
||||
MOVD p256p<>+0x18(SB), const1
|
||||
LDP p256p<>+0x00(SB), (const0, const1)
|
||||
LDP p256p<>+0x10(SB), (const2, const3)
|
||||
EOR t2<<1, hlp1
|
||||
|
||||
// Negate y2in based on sign
|
||||
LDP 2*16(b_ptr), (y0, y1)
|
||||
LDP 3*16(b_ptr), (y2, y3)
|
||||
|
||||
MOVD p256p<>+0x00(SB), acc4
|
||||
SUBS y0, acc4, acc0
|
||||
SBCS y1, const0, acc1
|
||||
SBCS y2, acc4, acc2
|
||||
SBCS y3, const1, acc3
|
||||
SUBS y0, const0, acc0
|
||||
SBCS y1, const1, acc1
|
||||
SBCS y2, const2, acc2
|
||||
SBCS y3, const3, acc3
|
||||
SBC $0, ZR, t0
|
||||
|
||||
ADDS $-1, acc0, acc4
|
||||
ADCS const0, acc1, acc5
|
||||
ADCS $-1, acc2, acc6
|
||||
ADCS const1, acc3, acc7
|
||||
ADDS const0, acc0, acc4
|
||||
ADCS const1, acc1, acc5
|
||||
ADCS const2, acc2, acc6
|
||||
ADCS const3, acc3, acc7
|
||||
ADC $0, t0, t0
|
||||
|
||||
CMP $0, t0
|
||||
@ -1408,10 +1407,10 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-96
|
||||
ADCS y2, x2, x2; \
|
||||
ADCS y3, x3, x3; \
|
||||
ADC $0, ZR, hlp0; \
|
||||
SUBS $-1, x0, t0; \
|
||||
SBCS const0, x1, t1;\
|
||||
SBCS $-1, x2, t2; \
|
||||
SBCS const1, x3, t3;\
|
||||
SUBS const0, x0, t0; \
|
||||
SBCS const1, x1, t1;\
|
||||
SBCS const2, x2, t2; \
|
||||
SBCS const3, x3, t3;\
|
||||
SBCS $0, hlp0, hlp0;\
|
||||
CSEL CC, x0, t0, x0;\
|
||||
CSEL CC, x1, t1, x1;\
|
||||
@ -1428,8 +1427,8 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48
|
||||
MOVD res+0(FP), res_ptr
|
||||
MOVD in+24(FP), a_ptr
|
||||
|
||||
MOVD p256p<>+0x08(SB), const0
|
||||
MOVD p256p<>+0x18(SB), const1
|
||||
LDP p256p<>+0x00(SB), (const0, const1)
|
||||
LDP p256p<>+0x10(SB), (const2, const3)
|
||||
|
||||
// Begin point double
|
||||
LDP 4*16(a_ptr), (x0, x1)
|
||||
@ -1471,10 +1470,10 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-48
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
|
||||
// Divide by 2
|
||||
ADDS $-1, y0, t0
|
||||
ADCS const0, y1, t1
|
||||
ADCS $-1, y2, t2
|
||||
ADCS const1, y3, t3
|
||||
ADDS const0, y0, t0
|
||||
ADCS const1, y1, t1
|
||||
ADCS const2, y2, t2
|
||||
ADCS const3, y3, t3
|
||||
ADC $0, ZR, hlp0
|
||||
|
||||
ANDS $1, y0, ZR
|
||||
@ -1530,8 +1529,8 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
||||
MOVD in1+24(FP), a_ptr
|
||||
MOVD in2+48(FP), b_ptr
|
||||
|
||||
MOVD p256p<>+0x08(SB), const0
|
||||
MOVD p256p<>+0x18(SB), const1
|
||||
LDP p256p<>+0x00(SB), (const0, const1)
|
||||
LDP p256p<>+0x10(SB), (const2, const3)
|
||||
|
||||
// Begin point add
|
||||
LDx(z2in)
|
||||
@ -1558,21 +1557,21 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
||||
STx(r)
|
||||
|
||||
MOVD $1, acc1
|
||||
ORR x0, x1, t0 // Check if zero mod p256
|
||||
ORR x2, x3, t1
|
||||
ORR t1, t0, t0
|
||||
CMP $0, t0
|
||||
ORR x0, x1, acc2 // Check if zero mod p256
|
||||
ORR x2, x3, acc3
|
||||
ORR acc3, acc2, acc2
|
||||
CMP $0, acc2
|
||||
CSEL EQ, acc1, ZR, hlp1
|
||||
|
||||
EOR $-1, x0, t0
|
||||
EOR const0, x1, t1
|
||||
EOR $-1, x2, t2
|
||||
EOR const1, x3, t3
|
||||
EOR const0, x0, acc2
|
||||
EOR const1, x1, acc3
|
||||
EOR const2, x2, acc4
|
||||
EOR const3, x3, acc5
|
||||
|
||||
ORR t0, t1, t0
|
||||
ORR t2, t3, t1
|
||||
ORR t1, t0, t0
|
||||
CMP $0, t0
|
||||
ORR acc2, acc3, acc2
|
||||
ORR acc4, acc5, acc3
|
||||
ORR acc3, acc2, acc2
|
||||
CMP $0, acc2
|
||||
CSEL EQ, acc1, hlp1, hlp1
|
||||
|
||||
LDx(z2sqr)
|
||||
@ -1590,21 +1589,21 @@ TEXT ·p256PointAddAsm(SB),0,$392-80
|
||||
STx(h)
|
||||
|
||||
MOVD $1, acc1
|
||||
ORR x0, x1, t0 // Check if zero mod p256
|
||||
ORR x2, x3, t1
|
||||
ORR t1, t0, t0
|
||||
CMP $0, t0
|
||||
ORR x0, x1, acc2 // Check if zero mod p256
|
||||
ORR x2, x3, acc3
|
||||
ORR acc3, acc2, acc2
|
||||
CMP $0, acc2
|
||||
CSEL EQ, acc1, ZR, hlp0
|
||||
|
||||
EOR $-1, x0, t0
|
||||
EOR const0, x1, t1
|
||||
EOR $-1, x2, t2
|
||||
EOR const1, x3, t3
|
||||
EOR const0, x0, acc2
|
||||
EOR const1, x1, acc3
|
||||
EOR const2, x2, acc4
|
||||
EOR const3, x3, acc5
|
||||
|
||||
ORR t0, t1, t0
|
||||
ORR t2, t3, t1
|
||||
ORR t1, t0, t0
|
||||
CMP $0, t0
|
||||
ORR acc2, acc3, acc2
|
||||
ORR acc4, acc5, acc3
|
||||
ORR acc3, acc2, acc2
|
||||
CMP $0, acc2
|
||||
CSEL EQ, acc1, hlp0, hlp0
|
||||
|
||||
AND hlp0, hlp1, hlp1
|
||||
|
Loading…
x
Reference in New Issue
Block a user