mirror of
https://github.com/emmansun/gmsm.git
synced 2025-10-15 15:50:46 +08:00
internal/sm2ec: loong64 v1
This commit is contained in:
parent
31b941908a
commit
aa2ef453f1
@ -7,9 +7,12 @@
|
||||
#include "textflag.h"
|
||||
|
||||
#define ZERO R0
|
||||
#define RSP R3
|
||||
#define res_ptr R4
|
||||
#define x_ptr R5
|
||||
#define y_ptr R6
|
||||
#define a_ptr x_ptr
|
||||
#define b_ptr y_ptr
|
||||
|
||||
#define acc0 R7
|
||||
#define acc1 R8
|
||||
@ -528,13 +531,13 @@ TEXT ·p256Sqr(SB),NOSPLIT,$0
|
||||
ADDV $1, const0, const1
|
||||
|
||||
sqrLoop:
|
||||
SUBV $1, y_ptr
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
MOVV y0, x0
|
||||
MOVV y1, x1
|
||||
MOVV y2, x2
|
||||
MOVV y3, x3
|
||||
BNE y_ptr, sqrLoop
|
||||
SUBV $1, y_ptr
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
MOVV y0, x0
|
||||
MOVV y1, x1
|
||||
MOVV y2, x2
|
||||
MOVV y3, x3
|
||||
BNE y_ptr, sqrLoop
|
||||
|
||||
MOVV y0, (8*0)(res_ptr)
|
||||
MOVV y1, (8*1)(res_ptr)
|
||||
@ -1288,11 +1291,147 @@ TEXT ·p256OrdReduce(SB),NOSPLIT,$0
|
||||
/* ---------------------------------------*/
|
||||
// func p256Select(res *SM2P256Point, table *p256Table, idx, limit int)
|
||||
TEXT ·p256Select(SB),NOSPLIT,$0
|
||||
MOVV limit+24(FP), x_ptr
|
||||
MOVV idx+16(FP), const0
|
||||
MOVV table+8(FP), y_ptr
|
||||
MOVV res+0(FP), res_ptr
|
||||
|
||||
MOVV $0, x0
|
||||
MOVV $0, x1
|
||||
MOVV $0, x2
|
||||
MOVV $0, x3
|
||||
MOVV $0, y0
|
||||
MOVV $0, y1
|
||||
MOVV $0, y2
|
||||
MOVV $0, y3
|
||||
MOVV $0, t0
|
||||
MOVV $0, t1
|
||||
MOVV $0, t2
|
||||
MOVV $0, t3
|
||||
|
||||
MOVV $0, const1
|
||||
|
||||
loop_select:
|
||||
ADDV $1, const1, const1
|
||||
XOR const1, const0, hlp0
|
||||
|
||||
MOVV (8*0)(y_ptr), acc0
|
||||
MOVV (8*1)(y_ptr), acc1
|
||||
MOVV (8*2)(y_ptr), acc2
|
||||
MOVV (8*3)(y_ptr), acc3
|
||||
MASKNEZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, acc3, acc3
|
||||
OR acc0, x0, x0
|
||||
OR acc1, x1, x1
|
||||
OR acc2, x2, x2
|
||||
OR acc3, x3, x3
|
||||
|
||||
ADDVU $32, y_ptr, y_ptr
|
||||
MOVV (8*0)(y_ptr), acc0
|
||||
MOVV (8*1)(y_ptr), acc1
|
||||
MOVV (8*2)(y_ptr), acc2
|
||||
MOVV (8*3)(y_ptr), acc3
|
||||
MASKNEZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, acc3, acc3
|
||||
OR acc0, y0, y0
|
||||
OR acc1, y1, y1
|
||||
OR acc2, y2, y2
|
||||
OR acc3, y3, y3
|
||||
|
||||
ADDVU $32, y_ptr, y_ptr
|
||||
MOVV (8*0)(y_ptr), acc0
|
||||
MOVV (8*1)(y_ptr), acc1
|
||||
MOVV (8*2)(y_ptr), acc2
|
||||
MOVV (8*3)(y_ptr), acc3
|
||||
MASKNEZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, acc3, acc3
|
||||
OR acc0, t0, t0
|
||||
OR acc1, t1, t1
|
||||
OR acc2, t2, t2
|
||||
OR acc3, t3, t3
|
||||
|
||||
BNE const1, x_ptr, loop_select
|
||||
|
||||
MOVV x0, (8*0)(res_ptr)
|
||||
MOVV x1, (8*1)(res_ptr)
|
||||
MOVV x2, (8*2)(res_ptr)
|
||||
MOVV x3, (8*3)(res_ptr)
|
||||
MOVV y0, (8*4)(res_ptr)
|
||||
MOVV y1, (8*5)(res_ptr)
|
||||
MOVV y2, (8*6)(res_ptr)
|
||||
MOVV y3, (8*7)(res_ptr)
|
||||
MOVV t0, (8*8)(res_ptr)
|
||||
MOVV t1, (8*9)(res_ptr)
|
||||
MOVV t2, (8*10)(res_ptr)
|
||||
MOVV t3, (8*11)(res_ptr)
|
||||
|
||||
RET
|
||||
|
||||
/* ---------------------------------------*/
|
||||
// func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int)
|
||||
TEXT ·p256SelectAffine(SB),NOSPLIT,$0
|
||||
MOVD idx+16(FP), t0
|
||||
MOVD table+8(FP), t1
|
||||
MOVD res+0(FP), res_ptr
|
||||
|
||||
XOR x0, x0, x0
|
||||
XOR x1, x1, x1
|
||||
XOR x2, x2, x2
|
||||
XOR x3, x3, x3
|
||||
XOR y0, y0, y0
|
||||
XOR y1, y1, y1
|
||||
XOR y2, y2, y2
|
||||
XOR y3, y3, y3
|
||||
|
||||
MOVV $0, t2
|
||||
MOVV $32, const0
|
||||
|
||||
loop_select:
|
||||
ADDV $1, t2, t2
|
||||
XOR t2, t0, hlp0
|
||||
|
||||
MOVV (8*0)(t1), acc0
|
||||
MOVV (8*1)(t1), acc1
|
||||
MOVV (8*2)(t1), acc2
|
||||
MOVV (8*3)(t1), acc3
|
||||
MASKNEZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, acc3, acc3
|
||||
OR acc0, x0, x0
|
||||
OR acc1, x1, x1
|
||||
OR acc2, x2, x2
|
||||
OR acc3, x3, x3
|
||||
|
||||
ADDVU $32, t1, t1
|
||||
MOVV (8*0)(t1), acc0
|
||||
MOVV (8*1)(t1), acc1
|
||||
MOVV (8*2)(t1), acc2
|
||||
MOVV (8*3)(t1), acc3
|
||||
MASKNEZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, acc3, acc3
|
||||
OR acc0, y0, y0
|
||||
OR acc1, y1, y1
|
||||
OR acc2, y2, y2
|
||||
OR acc3, y3, y3
|
||||
|
||||
BNE t2, const0, loop_select
|
||||
MOVV x0, (8*0)(res_ptr)
|
||||
MOVV x1, (8*1)(res_ptr)
|
||||
MOVV x2, (8*2)(res_ptr)
|
||||
MOVV x3, (8*3)(res_ptr)
|
||||
MOVV y0, (8*4)(res_ptr)
|
||||
MOVV y1, (8*5)(res_ptr)
|
||||
MOVV y2, (8*6)(res_ptr)
|
||||
MOVV y3, (8*7)(res_ptr)
|
||||
RET
|
||||
|
||||
/* ---------------------------------------*/
|
||||
@ -1420,9 +1559,304 @@ TEXT ·p256MulBy2(SB),NOSPLIT,$0
|
||||
MOVV x3, (8*3)(res_ptr)
|
||||
RET
|
||||
|
||||
/* ---------------------------------------*/
|
||||
#define x1in(off) (off)(a_ptr)
|
||||
#define y1in(off) (off + 32)(a_ptr)
|
||||
#define z1in(off) (off + 64)(a_ptr)
|
||||
#define x2in(off) (off)(b_ptr)
|
||||
#define z2in(off) (off + 64)(b_ptr)
|
||||
#define x3out(off) (off)(res_ptr)
|
||||
#define y3out(off) (off + 32)(res_ptr)
|
||||
#define z3out(off) (off + 64)(res_ptr)
|
||||
#define LDx(src) MOVV src(0), x0; MOVV src(8) x1; MOVV src(16), x2; MOVV src(24), x3
|
||||
#define LDy(src) MOVV src(0), y0; MOVV src(8) y1; MOVV src(16), y2; MOVV src(24), y3
|
||||
#define STx(src) MOVV x0, src(0); MOVV x1, src(8); MOVV x2, src(16); MOVV x3, src(24)
|
||||
#define STy(src) MOVV y0, src(0); MOVV y1, src(8); MOVV y2, src(16); MOVV y3, src(24)
|
||||
/* ---------------------------------------*/
|
||||
#define y2in(off) (32*0 + 8 + off)(RSP)
|
||||
#define s2(off) (32*1 + 8 + off)(RSP)
|
||||
#define z1sqr(off) (32*2 + 8 + off)(RSP)
|
||||
#define h(off) (32*3 + 8 + off)(RSP)
|
||||
#define r(off) (32*4 + 8 + off)(RSP)
|
||||
#define hsqr(off) (32*5 + 8 + off)(RSP)
|
||||
#define rsqr(off) (32*6 + 8 + off)(RSP)
|
||||
#define hcub(off) (32*7 + 8 + off)(RSP)
|
||||
|
||||
#define z2sqr(off) (32*8 + 8 + off)(RSP)
|
||||
#define s1(off) (32*9 + 8 + off)(RSP)
|
||||
#define u1(off) (32*10 + 8 + off)(RSP)
|
||||
#define u2(off) (32*11 + 8 + off)(RSP)
|
||||
|
||||
/* ---------------------------------------*/
|
||||
// func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int)
|
||||
TEXT ·p256PointAddAffineAsm(SB),0,$264-48
|
||||
MOVV in1+8(FP), a_ptr
|
||||
MOVV in2+16(FP), b_ptr
|
||||
MOVV sign+24(FP), hlp0
|
||||
MOVV sel+32(FP), hlp1
|
||||
MOVV zero+40(FP), res_ptr
|
||||
|
||||
MOVV p256one<>+0x08(SB), const0
|
||||
ADDV $1, const0, const1
|
||||
|
||||
// Negate y2in based on sign
|
||||
MOVV (8*0)(b_ptr), y0
|
||||
MOVV (8*1)(b_ptr), y1
|
||||
MOVV (8*2)(b_ptr), y2
|
||||
MOVV (8*3)(b_ptr), y3
|
||||
// (acc0, acc1, acc2, acc3) = - (y3, y2, y1, y0)
|
||||
SGTU y0, ZERO, t3
|
||||
SUBV y0, ZERO, acc0
|
||||
SGTU y1, ZERO, t4
|
||||
SUBV y1, ZERO, acc1
|
||||
SGTU t3, acc1, t1
|
||||
SUBV t3, acc1, acc1
|
||||
OR t4, t1, t3
|
||||
SGTU y2, ZERO, t4
|
||||
SUBV y2, ZERO, acc2
|
||||
SGTU t3, acc2, t1
|
||||
SUBV t3, acc2, acc2
|
||||
OR t4, t1, t3
|
||||
SGTU y3, ZERO, t4
|
||||
SUBV y3, ZERO, acc3
|
||||
SGTU t3, acc3, t1
|
||||
SUBV t3, acc3, acc3
|
||||
OR t4, t1, t3
|
||||
|
||||
MOVV $1, acc4
|
||||
MASKEQZ t3, acc4, acc4
|
||||
MASKEQZ t3, const0, acc5
|
||||
MASKEQZ t3, const1, acc7
|
||||
|
||||
SGTU acc4, acc0, t3
|
||||
SUBV acc4, acc0, acc0
|
||||
ADDV t3, acc5, acc5 // no carry
|
||||
SGTU acc5, acc1, t3
|
||||
SUBV acc5, acc1, acc1
|
||||
SGTU t3, acc2, t1
|
||||
SUBV t3, acc2, acc2
|
||||
ADDV t1, acc7, t3 // no carry
|
||||
SUBV t3, acc3, acc3
|
||||
// If condition is 0, keep original value
|
||||
MASKEQZ hlp0, acc0, acc0
|
||||
MASKNEZ hlp0, y0, y0
|
||||
MASKEQZ hlp0, acc1, acc1
|
||||
MASKNEZ hlp0, y1, y1
|
||||
MASKEQZ hlp0, acc2, acc2
|
||||
MASKNEZ hlp0, y2, y2
|
||||
MASKEQZ hlp0, acc3, acc3
|
||||
MASKNEZ hlp0, y3, y3
|
||||
OR acc0, y0
|
||||
OR acc1, y1
|
||||
OR acc2, y2
|
||||
OR acc3, y3
|
||||
// Store result
|
||||
STy(y2in)
|
||||
|
||||
// Begin point add
|
||||
LDx(z1in)
|
||||
CALL sm2P256SqrInternal<>(SB) // z1ˆ2
|
||||
STy(z1sqr)
|
||||
|
||||
LDx(x2in)
|
||||
CALL sm2P256MulInternal<>(SB) // x2 * z1ˆ2
|
||||
|
||||
LDx(x1in)
|
||||
CALL sm2P256Subinternal<>(SB) // h = u2 - u1
|
||||
STx(h)
|
||||
|
||||
LDy(z1in)
|
||||
CALL p256MulInternal<>(SB) // z3 = h * z1
|
||||
|
||||
// iff select == 0, z3 = z1
|
||||
MOVV (8*8)(a_ptr), acc0
|
||||
MOVV (8*9)(a_ptr), acc1
|
||||
MOVV (8*10)(a_ptr), acc2
|
||||
MOVV (8*11)(a_ptr), acc3
|
||||
MASKEQZ hlp1, y0, y0
|
||||
MASKNEZ hlp1, acc0, acc0
|
||||
MASKEQZ hlp1, y1, y1
|
||||
MASKNEZ hlp1, acc1, acc1
|
||||
MASKEQZ hlp1, y2, y2
|
||||
MASKNEZ hlp1, acc2, acc2
|
||||
MASKEQZ hlp1, y3, y3
|
||||
MASKNEZ hlp1, acc3, acc3
|
||||
OR acc0, y0
|
||||
OR acc1, y1
|
||||
OR acc2, y2
|
||||
OR acc3, y3
|
||||
// iff zero == 0, z3 = 1
|
||||
MOVV $1, acc0
|
||||
MOVV const0, acc1
|
||||
MOVV $0, acc2
|
||||
MOVV const1, acc3
|
||||
MASKEQZ res_ptr, y0, y0
|
||||
MASKNEZ res_ptr, acc0, acc0
|
||||
MASKEQZ res_ptr, y1, y1
|
||||
MASKNEZ res_ptr, acc1, acc1
|
||||
MASKEQZ res_ptr, y2, y2
|
||||
MASKNEZ res_ptr, acc2, acc2
|
||||
MASKEQZ res_ptr, y3, y3
|
||||
MASKNEZ res_ptr, acc3, acc3
|
||||
OR acc0, y0
|
||||
OR acc1, y1
|
||||
OR acc2, y2
|
||||
OR acc3, y3
|
||||
LDx(z1in)
|
||||
// store z3
|
||||
MOVV res+0(FP), t0
|
||||
MOVV y0, (8*8)(t0)
|
||||
MOVV y1, (8*9)(t0)
|
||||
MOVV y2, (8*10)(t0)
|
||||
MOVV y3, (8*11)(t0)
|
||||
|
||||
LDy(z1sqr)
|
||||
CALL p256MulInternal<>(SB) // z1 ^ 3
|
||||
|
||||
LDx(y2in)
|
||||
CALL p256MulInternal<>(SB) // s2 = y2 * z1ˆ3
|
||||
STy(s2)
|
||||
|
||||
LDx(y1in)
|
||||
CALL p256SubInternal<>(SB) // r = s2 - s1
|
||||
STx(r)
|
||||
|
||||
CALL p256SqrInternal<>(SB) // rsqr = rˆ2
|
||||
STy (rsqr)
|
||||
|
||||
LDx(h)
|
||||
CALL p256SqrInternal<>(SB) // hsqr = hˆ2
|
||||
STy(hsqr)
|
||||
|
||||
CALL p256MulInternal<>(SB) // hcub = hˆ3
|
||||
STy(hcub)
|
||||
|
||||
LDx(y1in)
|
||||
CALL p256MulInternal<>(SB) // y1 * hˆ3
|
||||
STy(s2)
|
||||
|
||||
MOVV hsqr(0*8), x0
|
||||
MOVV hsqr(1*8), x1
|
||||
MOVV hsqr(2*8), x2
|
||||
MOVV hsqr(3*8), x3
|
||||
CALL p256MulInternal<>(SB) // hsqr * u1
|
||||
MOVV y0, h(0*8)
|
||||
MOVV y1, h(1*8)
|
||||
MOVV y2, h(2*8)
|
||||
MOVV y3, h(3*8)
|
||||
|
||||
p256MulBy2Inline // u1 * hˆ2 * 2, inline
|
||||
|
||||
LDy(rsqr)
|
||||
CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2
|
||||
|
||||
MOVV x0, y0
|
||||
MOVV x1, y1
|
||||
MOVV x2, y2
|
||||
MOVV x3, y3
|
||||
LDy(hcub)
|
||||
CALL p256SubInternal<>(SB)
|
||||
|
||||
MOVV (8*0)(a_ptr), acc0
|
||||
MOVV (8*1)(a_ptr), acc1
|
||||
MOVV (8*2)(a_ptr), acc2
|
||||
MOVV (8*3)(a_ptr), acc3
|
||||
// iff select == 0, x3 = x1
|
||||
MASKEQZ hlp1, x0, x0
|
||||
MASKNEZ hlp1, acc0, acc0
|
||||
MASKEQZ hlp1, x1, x1
|
||||
MASKNEZ hlp1, acc1, acc1
|
||||
MASKEQZ hlp1, x2, x2
|
||||
MASKNEZ hlp1, acc2, acc2
|
||||
MASKEQZ hlp1, x3, x3
|
||||
MASKNEZ hlp1, acc3, acc3
|
||||
OR acc0, x0
|
||||
OR acc1, x1
|
||||
OR acc2, x2
|
||||
OR acc3, x3
|
||||
MOVV (8*0)(b_ptr), acc0
|
||||
MOVV (8*1)(b_ptr), acc1
|
||||
MOVV (8*2)(b_ptr), acc2
|
||||
MOVV (8*3)(b_ptr), acc3
|
||||
// iff zero == 0, x3 = x2
|
||||
MASKEQZ res_ptr, x0, x0
|
||||
MASKNEZ res_ptr, acc0, acc0
|
||||
MASKEQZ res_ptr, x1, x1
|
||||
MASKNEZ res_ptr, acc1, acc1
|
||||
MASKEQZ res_ptr, x2, x2
|
||||
MASKNEZ res_ptr, acc2, acc2
|
||||
MASKEQZ res_ptr, x3, x3
|
||||
MASKNEZ res_ptr, acc3, acc3
|
||||
OR acc0, x0
|
||||
OR acc1, x1
|
||||
OR acc2, x2
|
||||
OR acc3, x3
|
||||
// store x3
|
||||
MOVV res+0(FP), t0
|
||||
MOVV x0, (8*0)(t0)
|
||||
MOVV x1, (8*1)(t0)
|
||||
MOVV x2, (8*2)(t0)
|
||||
MOVV x3, (8*3)(t0)
|
||||
|
||||
MOVV h(0*8), y0
|
||||
MOVV h(1*8), y1
|
||||
MOVV h(2*8), y2
|
||||
MOVV h(3*8), y3
|
||||
CALL p256SubInternal<>(SB)
|
||||
|
||||
MOVV r(0*8), y0
|
||||
MOVV r(1*8), y1
|
||||
MOVV r(2*8), y2
|
||||
MOVV r(3*8), y3
|
||||
CALL p256MulInternal<>(SB)
|
||||
|
||||
MOVV s2(0*8), x0
|
||||
MOVV s2(1*8), x1
|
||||
MOVV s2(2*8), x2
|
||||
MOVV s2(3*8), x3
|
||||
CALL p256SubInternal<>(SB)
|
||||
|
||||
MOVV (8*4)(a_ptr), acc0
|
||||
MOVV (8*5)(a_ptr), acc1
|
||||
MOVV (8*6)(a_ptr), acc2
|
||||
MOVV (8*7)(a_ptr), acc3
|
||||
// iff select == 0, y3 = y1
|
||||
MASKEQZ hlp1, x0, x0
|
||||
MASKNEZ hlp1, acc0, acc0
|
||||
MASKEQZ hlp1, x1, x1
|
||||
MASKNEZ hlp1, acc1, acc1
|
||||
MASKEQZ hlp1, x2, x2
|
||||
MASKNEZ hlp1, acc2, acc2
|
||||
MASKEQZ hlp1, x3, x3
|
||||
MASKNEZ hlp1, acc3, acc3
|
||||
OR acc0, x0
|
||||
OR acc1, x1
|
||||
OR acc2, x2
|
||||
OR acc3, x3
|
||||
MOVV y2in(0*8), acc0
|
||||
MOVV y2in(1*8), acc1
|
||||
MOVV y2in(2*8), acc2
|
||||
MOVV y2in(3*8), acc3
|
||||
// iff zero == 0, y3 = y2
|
||||
MASKEQZ res_ptr, x0, x0
|
||||
MASKNEZ res_ptr, acc0, acc0
|
||||
MASKEQZ res_ptr, x1, x1
|
||||
MASKNEZ res_ptr, acc1, acc1
|
||||
MASKEQZ res_ptr, x2, x2
|
||||
MASKNEZ res_ptr, acc2, acc2
|
||||
MASKEQZ res_ptr, x3, x3
|
||||
MASKNEZ res_ptr, acc3, acc3
|
||||
OR acc0, x0
|
||||
OR acc1, x1
|
||||
OR acc2, x2
|
||||
OR acc3, x3
|
||||
// store y3
|
||||
MOVV res+0(FP), t0
|
||||
MOVV x0, (8*4)(t0)
|
||||
MOVV x1, (8*5)(t0)
|
||||
MOVV x2, (8*6)(t0)
|
||||
MOVV x3, (8*7)(t0)
|
||||
|
||||
RET
|
||||
|
||||
// (x3, x2, x1, x0) = (x3, x2, x1, x0) + (y3, y2, y1, y0)
|
||||
@ -1497,17 +1931,450 @@ TEXT ·p256Add(SB),NOSPLIT,$0
|
||||
MOVV x3, (8*3)(res_ptr)
|
||||
RET
|
||||
|
||||
// (y3, y2, y1, y0) = (y3, y2, y1, y0) / 2
|
||||
#define p256DivideBy2 \
|
||||
MOVV $1, acc1; \
|
||||
AND t1, y0, t0; \
|
||||
MASKEQZ t0, acc1, acc1
|
||||
MASKEQZ t0, const0, acc2; \
|
||||
MASKEQZ t0, const1, acc3; \
|
||||
SGTU acc1, y0, t1; \
|
||||
SUBV acc1, y0, y0; \
|
||||
ADDV t1, acc2, acc2; \
|
||||
SRLV $1, y0, y0; \
|
||||
SGTU acc2, y1, t1; \
|
||||
SUBV acc2, y1, y1; \
|
||||
SGTU t1, y2, t2; \
|
||||
SUBV t1, y2, y2; \
|
||||
BSTRINSV $63, y1, $63, y0; \
|
||||
SRLV $1, y1, y1; \
|
||||
ADDV t2, acc3, acc3; \
|
||||
BSTRINSV $63, y2, $63, y1; \
|
||||
SRLV $1, y2, y2; \
|
||||
SUBV acc3, y3, t1; \
|
||||
SGTU y3, acc3, t2; \
|
||||
BSTRINSV $63, t1, $63, y2; \
|
||||
SRLV $1, t1, y3; \
|
||||
MASKEQZ t0, t2, t2; \
|
||||
BSTRINSV $63, t2, $63, y3
|
||||
|
||||
/* ---------------------------------------*/
|
||||
// func p256DivBy2(res, in *p256Element)
|
||||
TEXT ·p256DivBy2(SB),NOSPLIT,$0
|
||||
MOVV res+0(FP), res_ptr
|
||||
MOVV in+8(FP), x_ptr
|
||||
MOVV (8*0)(x_ptr), y0
|
||||
MOVV (8*1)(x_ptr), y1
|
||||
MOVV (8*2)(x_ptr), y2
|
||||
MOVV (8*3)(x_ptr), y3
|
||||
MOVV p256one<>+0x08(SB), const0
|
||||
ADDV $1, const0, const1
|
||||
p256DivideBy2
|
||||
MOVV y0, (8*0)(res_ptr)
|
||||
MOVV y1, (8*1)(res_ptr)
|
||||
MOVV y2, (8*2)(res_ptr)
|
||||
MOVV y3, (8*3)(res_ptr)
|
||||
RET
|
||||
|
||||
#define s(off) (32*0 + 8 + off)(RSP)
|
||||
#define m(off) (32*1 + 8 + off)(RSP)
|
||||
#define zsqr(off) (32*2 + 8 + off)(RSP)
|
||||
#define tmp(off) (32*3 + 8 + off)(RSP)
|
||||
|
||||
//func p256PointDoubleAsm(res, in *SM2P256Point)
|
||||
TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-16
|
||||
MOVV res+0(FP), res_ptr
|
||||
MOVV in+8(FP), a_ptr
|
||||
|
||||
MOVV p256one<>+0x08(SB), const0
|
||||
ADDV $1, const0, const1
|
||||
|
||||
// Begin point double
|
||||
MOVV (8*8)(a_ptr), x0
|
||||
MOVV (8*9)(a_ptr), x1
|
||||
MOVV (8*10)(a_ptr), x2
|
||||
MOVV (8*11)(a_ptr), x3
|
||||
CALL sm2P256SqrInternal<>(SB) // z1ˆ2
|
||||
MOVV y0, zsqr(0*8) // store z^2
|
||||
MOVV y1, zsqr(1*8)
|
||||
MOVV y2, zsqr(2*8)
|
||||
MOVV y3, zsqr(3*8)
|
||||
|
||||
MOVV (8*0)(a_ptr), x0 // load x
|
||||
MOVV (8*1)(a_ptr), x1
|
||||
MOVV (8*2)(a_ptr), x2
|
||||
MOVV (8*3)(a_ptr), x3
|
||||
p256AddInline
|
||||
STx(m)
|
||||
|
||||
LDx(z1in)
|
||||
LDy(y1in)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
p256MulBy2Inline
|
||||
STx(z3out)
|
||||
|
||||
LDy(x1in)
|
||||
LDx(zsqr)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
LDy(m)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
|
||||
// Multiply by 3
|
||||
p256MulBy2Inline
|
||||
p256AddInline
|
||||
STx(m)
|
||||
|
||||
LDy(y1in)
|
||||
p256MulBy2Inline
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
STy(s)
|
||||
MOVV y0, x0
|
||||
MOVV y1, x1
|
||||
MOVV y2, x2
|
||||
MOVV y3, x3
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
|
||||
// Divide by 2
|
||||
p256DivideBy2
|
||||
|
||||
STy(y3out)
|
||||
|
||||
LDx(x1in)
|
||||
LDy(s)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
STy(s)
|
||||
p256MulBy2Inline
|
||||
STx(tmp)
|
||||
|
||||
LDx(m)
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
LDx(tmp)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
|
||||
STx(x3out)
|
||||
|
||||
LDy(s)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
|
||||
LDy(m)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
|
||||
LDx(y3out)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
STx(y3out)
|
||||
|
||||
RET
|
||||
|
||||
#define p256PointDoubleRound() \
|
||||
LDx(z3out) \ // load z
|
||||
CALL sm2P256SqrInternal<>(SB) \
|
||||
MOVV y0, zsqr(0*8) \ // store z^2
|
||||
MOVV y1, zsqr(1*8) \
|
||||
MOVV y2, zsqr(2*8) \
|
||||
MOVV y3, zsqr(3*8) \
|
||||
\
|
||||
LDx(x3out) \// load x
|
||||
p256AddInline \
|
||||
STx(m) \
|
||||
\
|
||||
LDx(z3out) \ // load z
|
||||
LDy(y3out) \ // load y
|
||||
CALL sm2P256MulInternal<>(SB) \
|
||||
p256MulBy2Inline \
|
||||
STx(z3out) \ // store result z
|
||||
\
|
||||
LDy(x3out) \ // load x
|
||||
LDx(zsqr) \
|
||||
CALL sm2P256Subinternal<>(SB) \
|
||||
LDy(m) \
|
||||
CALL sm2P256MulInternal<>(SB) \
|
||||
\
|
||||
\// Multiply by 3
|
||||
p256MulBy2Inline \
|
||||
p256AddInline \
|
||||
STx(m) \
|
||||
\
|
||||
LDy(y3out) \ // load y
|
||||
p256MulBy2Inline \
|
||||
CALL sm2P256SqrInternal<>(SB) \
|
||||
STy(s) \
|
||||
MOVV y0, x0 \
|
||||
MOVV y1, x1 \
|
||||
MOVV y2, x2 \
|
||||
MOVV y3, x3 \
|
||||
CALL sm2P256SqrInternal<>(SB) \
|
||||
\
|
||||
\// Divide by 2
|
||||
p256DivideBy2 \
|
||||
STy(y3out) \
|
||||
\
|
||||
LDx(x3out) \ // load x
|
||||
LDy(s) \
|
||||
CALL sm2P256MulInternal<>(SB) \
|
||||
STy(s) \
|
||||
p256MulBy2Inline \
|
||||
STx(tmp) \
|
||||
\
|
||||
LDx(m) \
|
||||
CALL sm2P256SqrInternal<>(SB) \
|
||||
LDx(tmp) \
|
||||
CALL sm2P256Subinternal<>(SB) \
|
||||
\
|
||||
STx(x3out) \
|
||||
\
|
||||
LDy(s) \
|
||||
CALL sm2P256Subinternal<>(SB) \
|
||||
\
|
||||
LDy(m) \
|
||||
CALL sm2P256MulInternal<>(SB) \
|
||||
\
|
||||
LDx(y3out) \
|
||||
CALL sm2P256Subinternal<>(SB) \
|
||||
STx(y3out) \
|
||||
|
||||
|
||||
/* ---------------------------------------*/
|
||||
//func p256PointDouble6TimesAsm(res, in *SM2P256Point)
|
||||
TEXT ·p256PointDouble6TimesAsm(SB),NOSPLIT,$136-16
|
||||
MOVV res+0(FP), res_ptr
|
||||
MOVV in+8(FP), a_ptr
|
||||
|
||||
MOVV p256one<>+0x08(SB), const0
|
||||
ADDV $1, const0, const1
|
||||
|
||||
// Begin point double
|
||||
MOVV (8*8)(a_ptr), x0
|
||||
MOVV (8*9)(a_ptr), x1
|
||||
MOVV (8*10)(a_ptr), x2
|
||||
MOVV (8*11)(a_ptr), x3
|
||||
CALL sm2P256SqrInternal<>(SB) // z1ˆ2
|
||||
MOVV y0, zsqr(0*8) // store z^2
|
||||
MOVV y1, zsqr(1*8)
|
||||
MOVV y2, zsqr(2*8)
|
||||
MOVV y3, zsqr(3*8)
|
||||
|
||||
MOVV (8*0)(a_ptr), x0 // load x
|
||||
MOVV (8*1)(a_ptr), x1
|
||||
MOVV (8*2)(a_ptr), x2
|
||||
MOVV (8*3)(a_ptr), x3
|
||||
p256AddInline
|
||||
STx(m)
|
||||
|
||||
LDx(z1in)
|
||||
LDy(y1in)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
p256MulBy2Inline
|
||||
STx(z3out)
|
||||
|
||||
LDy(x1in)
|
||||
LDx(zsqr)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
LDy(m)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
|
||||
// Multiply by 3
|
||||
p256MulBy2Inline
|
||||
p256AddInline
|
||||
STx(m)
|
||||
|
||||
LDy(y1in)
|
||||
p256MulBy2Inline
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
STy(s)
|
||||
MOVV y0, x0
|
||||
MOVV y1, x1
|
||||
MOVV y2, x2
|
||||
MOVV y3, x3
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
|
||||
// Divide by 2
|
||||
p256DivideBy2
|
||||
|
||||
STy(y3out)
|
||||
|
||||
LDx(x1in)
|
||||
LDy(s)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
STy(s)
|
||||
p256MulBy2Inline
|
||||
STx(tmp)
|
||||
|
||||
LDx(m)
|
||||
CALL sm2P256SqrInternal<>(SB)
|
||||
LDx(tmp)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
|
||||
STx(x3out)
|
||||
|
||||
LDy(s)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
|
||||
LDy(m)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
|
||||
LDx(y3out)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
STx(y3out)
|
||||
|
||||
// Begin point double rounds 2 - 6
|
||||
p256PointDoubleRound()
|
||||
p256PointDoubleRound()
|
||||
p256PointDoubleRound()
|
||||
p256PointDoubleRound()
|
||||
p256PointDoubleRound()
|
||||
|
||||
RET
|
||||
|
||||
/* ---------------------------------------*/
|
||||
#undef y2in
|
||||
#undef x3out
|
||||
#undef y3out
|
||||
#undef z3out
|
||||
#define y2in(off) (off + 32)(b_ptr)
|
||||
#define x3out(off) (off)(b_ptr)
|
||||
#define y3out(off) (off + 32)(b_ptr)
|
||||
#define z3out(off) (off + 64)(b_ptr)
|
||||
// func p256PointAddAsm(res, in1, in2 *SM2P256Point) int
|
||||
TEXT ·p256PointAddAsm(SB),0,$392-32
|
||||
// See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl
|
||||
// Move input to stack in order to free registers
|
||||
MOVV in1+8(FP), a_ptr
|
||||
MOVV in2+16(FP), b_ptr
|
||||
|
||||
MOVV p256one<>+0x08(SB), const0
|
||||
ADDV $1, const0, const1
|
||||
|
||||
// Begin point add
|
||||
LDx(z2in)
|
||||
CALL sm2P256SqrInternal<>(SB) // z2^2
|
||||
STy(z2sqr)
|
||||
|
||||
CALL sm2P256MulInternal<>(SB) // z2^3
|
||||
|
||||
LDx(y1in)
|
||||
CALL sm2P256MulInternal<>(SB) // s1 = z2ˆ3*y1
|
||||
STy(s1)
|
||||
|
||||
LDx(z1in)
|
||||
CALL sm2P256SqrInternal<>(SB) // z1^2
|
||||
STy(z1sqr)
|
||||
|
||||
CALL sm2P256MulInternal<>(SB) // z1^3
|
||||
|
||||
LDx(y2in)
|
||||
CALL sm2P256MulInternal<>(SB) // s2 = z1ˆ3*y2
|
||||
|
||||
LDx(s1)
|
||||
CALL sm2P256Subinternal<>(SB) // r = s2 - s1
|
||||
STx(r)
|
||||
|
||||
// Check if zero mod p256
|
||||
OR x0, x1, acc0
|
||||
OR x2, x3, acc1
|
||||
OR acc0, acc1, acc1
|
||||
SGTU acc1, ZERO, hlp0
|
||||
|
||||
MOVV $-1, acc0
|
||||
MOVV p256p<>+0x08(SB), acc1
|
||||
MOVV p256p<>+0x18(SB), acc3
|
||||
|
||||
XOR acc0, x0, acc4
|
||||
XOR acc1, x1, acc5
|
||||
XOR acc0, x2, acc6
|
||||
XOR acc3, x3, acc7
|
||||
OR acc4, acc5, acc4
|
||||
OR acc6, acc7, acc7
|
||||
OR acc4, acc7, acc7
|
||||
SGTU acc7, ZERO, res_ptr
|
||||
OR hlp0, res_ptr, res_ptr
|
||||
|
||||
LDx(z2sqr)
|
||||
LDy(x1in)
|
||||
CALL sm2P256MulInternal<>(SB) // u1 = x1 * z2ˆ2
|
||||
STy(u1)
|
||||
|
||||
LDx(z1sqr)
|
||||
LDy(x2in)
|
||||
CALL sm2P256MulInternal<>(SB) // u2 = x2 * z1ˆ2
|
||||
STy(u2)
|
||||
|
||||
LDx(u1)
|
||||
CALL sm2P256Subinternal<>(SB) // h = u2 - u1
|
||||
STx(h)
|
||||
|
||||
// Check if zero mod p256
|
||||
OR x0, x1, acc0
|
||||
OR x2, x3, acc1
|
||||
OR acc0, acc1, acc1
|
||||
SGTU acc1, ZERO, hlp0
|
||||
|
||||
MOVV $-1, acc0
|
||||
MOVV p256p<>+0x08(SB), acc1
|
||||
MOVV p256p<>+0x18(SB), acc3
|
||||
|
||||
XOR acc0, x0, acc4
|
||||
XOR acc1, x1, acc5
|
||||
XOR acc0, x2, acc6
|
||||
XOR acc3, x3, acc7
|
||||
OR acc4, acc5, acc4
|
||||
OR acc6, acc7, acc7
|
||||
OR acc4, acc7, acc7
|
||||
SGTU acc7, ZERO, t0
|
||||
OR hlp0, t0, hlp0
|
||||
|
||||
AND hlp0, res_ptr, res_ptr
|
||||
|
||||
LDx(r)
|
||||
CALL sm2P256SqrInternal<>(SB) // rsqr = rˆ2
|
||||
STy(rsqr)
|
||||
|
||||
LDx(h)
|
||||
CALL sm2P256SqrInternal<>(SB) // hsqr = hˆ2
|
||||
STy(hsqr)
|
||||
|
||||
LDx(h)
|
||||
CALL sm2P256MulInternal<>(SB) // hcub = hˆ3
|
||||
STy(hcub)
|
||||
|
||||
LDx(s1)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
STy(s2)
|
||||
|
||||
LDx(z1in)
|
||||
LDy(z2in)
|
||||
CALL sm2P256MulInternal<>(SB) // z1 * z2
|
||||
LDx(h)
|
||||
CALL sm2P256MulInternal<>(SB) // z1 * z2 * h
|
||||
MOVV res+0(FP), b_ptr
|
||||
STy(z3out)
|
||||
|
||||
LDx(hsqr)
|
||||
LDy(u1)
|
||||
CALL sm2P256MulInternal<>(SB) // hˆ2 * u1
|
||||
STy(u2)
|
||||
|
||||
p256MulBy2Inline // u1 * hˆ2 * 2, inline
|
||||
LDy(rsqr)
|
||||
CALL sm2P256Subinternal<>(SB) // rˆ2 - u1 * hˆ2 * 2
|
||||
|
||||
MOVV x0, y0
|
||||
MOVV x1, y1
|
||||
MOVV x2, y2
|
||||
MOVV x3, y3
|
||||
LDx(hcub)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
STx(x3out)
|
||||
|
||||
LDy(u2)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
|
||||
LDy(r)
|
||||
CALL sm2P256MulInternal<>(SB)
|
||||
|
||||
LDx(s2)
|
||||
CALL sm2P256Subinternal<>(SB)
|
||||
STx(y3out)
|
||||
|
||||
MOVV res_ptr, ret+24(FP)
|
||||
|
||||
RET
|
||||
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build purego || !(amd64 || arm64 || s390x || ppc64le)
|
||||
//go:build purego || !(amd64 || arm64 || loong64 || s390x || ppc64le)
|
||||
|
||||
package sm2ec
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
// 256-bit primes"
|
||||
// https://link.springer.com/article/10.1007%2Fs13389-014-0090-x
|
||||
// https://eprint.iacr.org/2013/816.pdf
|
||||
//go:build (amd64 || arm64 || s390x || ppc64le) && !purego
|
||||
//go:build (amd64 || arm64 || loong64 || s390x || ppc64le) && !purego
|
||||
|
||||
package sm2ec
|
||||
|
||||
|
@ -1,64 +0,0 @@
|
||||
package sm2ec
|
||||
|
||||
import (
|
||||
"github.com/emmansun/gmsm/internal/deps/cpu"
|
||||
)
|
||||
|
||||
// p256Element is a P-256 base field element in [0, P-1] in the Montgomery
|
||||
// domain (with R 2²⁵⁶) as four limbs in little-endian order value.
|
||||
type p256Element [4]uint64
|
||||
|
||||
type SM2P256Point1 struct {
|
||||
// (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point
|
||||
// at infinity can be represented by any set of coordinates with Z = 0.
|
||||
x, y, z p256Element
|
||||
}
|
||||
|
||||
var supportLSX = cpu.Loong64.HasLSX
|
||||
var supportLASX = cpu.Loong64.HasLASX
|
||||
|
||||
//go:noescape
|
||||
func p256BigToLittle(res *p256Element, in *[32]byte)
|
||||
|
||||
//go:noescape
|
||||
func p256LittleToBig(res *[32]byte, in *p256Element)
|
||||
|
||||
// If cond is 0, sets res = b, otherwise sets res = a.
|
||||
//
|
||||
//go:noescape
|
||||
func p256MovCond(res, a, b *SM2P256Point1, cond int)
|
||||
|
||||
// If cond is not 0, sets val = -val mod p.
|
||||
//
|
||||
//go:noescape
|
||||
func p256NegCond(val *p256Element, cond int)
|
||||
|
||||
// Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
|
||||
//
|
||||
//go:noescape
|
||||
func p256Mul(res, in1, in2 *p256Element)
|
||||
|
||||
// Montgomery square, repeated n times (n >= 1).
|
||||
//
|
||||
//go:noescape
|
||||
func p256Sqr(res, in *p256Element, n int)
|
||||
|
||||
// Montgomery multiplication by R⁻¹, or 1 outside the domain.
|
||||
// Sets res = in * R⁻¹, bringing res out of the Montgomery domain.
|
||||
//
|
||||
//go:noescape
|
||||
func p256FromMont(res, in *p256Element)
|
||||
|
||||
// p256OrdReduce ensures s is in the range [0, ord(G)-1].
|
||||
//
|
||||
//go:noescape
|
||||
func p256OrdReduce(s *p256OrdElement)
|
||||
|
||||
//go:noescape
|
||||
func p256Add(res, in1, in2 *p256Element)
|
||||
|
||||
//go:noescape
|
||||
func p256Sub(res, in1, in2 *p256Element)
|
||||
|
||||
//go:noescape
|
||||
func p256MulBy2(res, in *p256Element)
|
@ -1,376 +0,0 @@
|
||||
//go:build loong64 && !purego
|
||||
|
||||
package sm2ec
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/big"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestP256BigToLittle(t *testing.T) {
|
||||
// 构造一个已知的 32 字节大端输入
|
||||
var in [32]byte
|
||||
for i := 0; i < 32; i++ {
|
||||
in[i] = byte(i + 1)
|
||||
}
|
||||
var out p256Element
|
||||
|
||||
p256BigToLittle(&out, &in)
|
||||
|
||||
// 检查每个 limb 是否为小端解包
|
||||
for i := 0; i < 4; i++ {
|
||||
expected := binary.BigEndian.Uint64(in[i*8 : (i+1)*8])
|
||||
k := 3 - i // 逆序存储
|
||||
if out[k] != expected {
|
||||
t.Errorf("limb %d: got 0x%x, want 0x%x", k, out[k], expected)
|
||||
}
|
||||
}
|
||||
|
||||
// 逆操作测试
|
||||
var back [32]byte
|
||||
p256LittleToBig(&back, &out)
|
||||
if !bytes.Equal(in[:], back[:]) {
|
||||
t.Errorf("p256LittleToBig(p256BigToLittle(...)) mismatch\nin: %x\nback: %x", in, back)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256NegCond(t *testing.T) {
|
||||
var tests = []struct {
|
||||
input p256Element
|
||||
cond int
|
||||
expected p256Element
|
||||
}{
|
||||
{
|
||||
input: p256Element{1, 0, 0, 0},
|
||||
cond: 1,
|
||||
expected: p256Element{0xfffffffffffffffe, 0xffffffff00000000, 0xffffffffffffffff, 0xfffffffeffffffff},
|
||||
},
|
||||
{
|
||||
input: p256Element{1, 0, 0, 0},
|
||||
cond: 0,
|
||||
expected: p256Element{1, 0, 0, 0},
|
||||
},
|
||||
{
|
||||
input: p256Element{0x1, 0xffffffff00000001, 0xfffffffffffffffe, 0xfffffffeffffffff},
|
||||
cond: 1,
|
||||
expected: p256Element{0xfffffffffffffffe, 0xffffffffffffffff, 0, 0},
|
||||
},
|
||||
}
|
||||
|
||||
for i, test := range tests {
|
||||
var result p256Element
|
||||
copy(result[:], test.input[:])
|
||||
p256NegCond(&result, test.cond)
|
||||
if result != test.expected {
|
||||
t.Errorf("test %d: got %x, want %x", i, result, test.expected)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newPoint(x, y, z uint64) *SM2P256Point1 {
|
||||
return &SM2P256Point1{
|
||||
x: p256Element{x, x + 1, x + 2, x + 3},
|
||||
y: p256Element{y, y + 1, y + 2, y + 3},
|
||||
z: p256Element{z, z + 1, z + 2, z + 3},
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256MovCond(t *testing.T) {
|
||||
fmt.Printf("supportLSX=%v, supportLASX=%v\n", supportLSX, supportLASX)
|
||||
a := newPoint(10, 20, 30)
|
||||
b := newPoint(100, 200, 300)
|
||||
var res SM2P256Point1
|
||||
|
||||
// cond == 0: res = b
|
||||
p256MovCond(&res, a, b, 0)
|
||||
if !reflect.DeepEqual(res, *b) {
|
||||
t.Errorf("cond=0: got %+v, want %+v", res, *b)
|
||||
}
|
||||
|
||||
// cond != 0: res = a
|
||||
p256MovCond(&res, a, b, 1)
|
||||
if !reflect.DeepEqual(res, *a) {
|
||||
t.Errorf("cond=1: got %+v, want %+v", res, *a)
|
||||
}
|
||||
|
||||
// cond < 0: res = a (should treat any nonzero as true)
|
||||
p256MovCond(&res, a, b, -123)
|
||||
if !reflect.DeepEqual(res, *a) {
|
||||
t.Errorf("cond=-123: got %+v, want %+v", res, *a)
|
||||
}
|
||||
}
|
||||
|
||||
// fromBig converts a *big.Int into a format used by this code.
|
||||
func fromBig(out *p256Element, big *big.Int) {
|
||||
for i := range out {
|
||||
out[i] = 0
|
||||
}
|
||||
|
||||
for i, v := range big.Bits() {
|
||||
out[i] = uint64(v)
|
||||
}
|
||||
}
|
||||
|
||||
func toBigInt(in *p256Element) *big.Int {
|
||||
var valBytes [32]byte
|
||||
p256LittleToBig(&valBytes, in)
|
||||
return new(big.Int).SetBytes(valBytes[:])
|
||||
}
|
||||
|
||||
func p256MulTest(t *testing.T, x, y, p, r *big.Int) {
|
||||
x1 := new(big.Int).Mul(x, r)
|
||||
x1 = x1.Mod(x1, p)
|
||||
y1 := new(big.Int).Mul(y, r)
|
||||
y1 = y1.Mod(y1, p)
|
||||
ax := new(p256Element)
|
||||
ay := new(p256Element)
|
||||
res := new(p256Element)
|
||||
res2 := new(p256Element)
|
||||
fromBig(ax, x1)
|
||||
fromBig(ay, y1)
|
||||
p256Mul(res2, ax, ay)
|
||||
p256FromMont(res, res2)
|
||||
resInt := toBigInt(res)
|
||||
|
||||
expected := new(big.Int).Mul(x, y)
|
||||
expected = expected.Mod(expected, p)
|
||||
if resInt.Cmp(expected) != 0 {
|
||||
t.Fatalf("p256Mul(%x, %x) = %x, want %x", x, y, resInt, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256MulPMinus1(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
|
||||
p256MulTest(t, pMinus1, pMinus1, p, r)
|
||||
}
|
||||
|
||||
func TestFuzzyP256Mul(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
var scalar1 [32]byte
|
||||
var scalar2 [32]byte
|
||||
var timeout *time.Timer
|
||||
|
||||
if testing.Short() {
|
||||
timeout = time.NewTimer(10 * time.Millisecond)
|
||||
} else {
|
||||
timeout = time.NewTimer(2 * time.Second)
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return
|
||||
default:
|
||||
}
|
||||
io.ReadFull(rand.Reader, scalar1[:])
|
||||
io.ReadFull(rand.Reader, scalar2[:])
|
||||
x := new(big.Int).SetBytes(scalar1[:])
|
||||
y := new(big.Int).SetBytes(scalar2[:])
|
||||
p256MulTest(t, x, y, p, r)
|
||||
}
|
||||
}
|
||||
|
||||
func p256SqrTest(t *testing.T, x, p, r *big.Int) {
|
||||
x1 := new(big.Int).Mul(x, r)
|
||||
x1 = x1.Mod(x1, p)
|
||||
ax := new(p256Element)
|
||||
res := new(p256Element)
|
||||
res2 := new(p256Element)
|
||||
fromBig(ax, x1)
|
||||
p256Sqr(res2, ax, 1)
|
||||
p256FromMont(res, res2)
|
||||
resInt := toBigInt(res)
|
||||
|
||||
expected := new(big.Int).Mul(x, x)
|
||||
expected = expected.Mod(expected, p)
|
||||
if resInt.Cmp(expected) != 0 {
|
||||
t.Fatalf("p256Sqr(%x) = %x, want %x", x, resInt, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256SqrPMinus1(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
|
||||
p256SqrTest(t, pMinus1, p, r)
|
||||
}
|
||||
|
||||
func TestFuzzyP256Sqr(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
var scalar1 [32]byte
|
||||
var timeout *time.Timer
|
||||
|
||||
if testing.Short() {
|
||||
timeout = time.NewTimer(10 * time.Millisecond)
|
||||
} else {
|
||||
timeout = time.NewTimer(2 * time.Second)
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return
|
||||
default:
|
||||
}
|
||||
io.ReadFull(rand.Reader, scalar1[:])
|
||||
x := new(big.Int).SetBytes(scalar1[:])
|
||||
p256SqrTest(t, x, p, r)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256OrdReduce(t *testing.T) {
|
||||
p256Ord := &p256OrdElement{0x53bbf40939d54123, 0x7203df6b21c6052b, 0xffffffffffffffff, 0xfffffffeffffffff}
|
||||
// s < p256Ord
|
||||
var s1 p256OrdElement
|
||||
copy(s1[:], p256Ord[:])
|
||||
s1[0] -= 1 // s1 = p256Ord - 1
|
||||
s1Orig := s1
|
||||
p256OrdReduce(&s1)
|
||||
if s1 != s1Orig {
|
||||
t.Errorf("p256OrdReduce changed s when s < p256Ord: got %x, want %x", s1, s1Orig)
|
||||
}
|
||||
|
||||
// s >= p256Ord
|
||||
var s2 p256OrdElement
|
||||
copy(s2[:], p256Ord[:])
|
||||
// s2 = p256Ord
|
||||
p256OrdReduce(&s2)
|
||||
zero := p256OrdElement{}
|
||||
if s2 != zero {
|
||||
t.Errorf("p256OrdReduce failed for s == p256Ord: got %x, want 0", s2)
|
||||
}
|
||||
|
||||
// s2 = p256Ord + 1
|
||||
copy(s2[:], p256Ord[:])
|
||||
s2[0] += 1
|
||||
p256OrdReduce(&s2)
|
||||
one := p256OrdElement{1, 0, 0, 0}
|
||||
if s2 != one {
|
||||
t.Errorf("p256OrdReduce failed for s == p256Ord+1: got %x, want %x", s2, one)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256Sub(t *testing.T) {
|
||||
// in1 > in2
|
||||
in1 := p256Element{5, 0, 0, 0}
|
||||
in2 := p256Element{3, 0, 0, 0}
|
||||
var res p256Element
|
||||
p256Sub(&res, &in1, &in2)
|
||||
want := p256Element{2, 0, 0, 0}
|
||||
if !reflect.DeepEqual(res, want) {
|
||||
t.Errorf("in1 > in2: got %v, want %v", res, want)
|
||||
}
|
||||
|
||||
// in1 == in2
|
||||
in1 = p256Element{7, 8, 9, 10}
|
||||
in2 = p256Element{7, 8, 9, 10}
|
||||
p256Sub(&res, &in1, &in2)
|
||||
want = p256Element{0, 0, 0, 0}
|
||||
if !reflect.DeepEqual(res, want) {
|
||||
t.Errorf("in1 == in2: got %v, want %v", res, want)
|
||||
}
|
||||
|
||||
// in1 < in2
|
||||
in1 = p256Element{1, 0, 0, 0}
|
||||
in2 = p256Element{2, 0, 0, 0}
|
||||
p256Sub(&res, &in1, &in2)
|
||||
// 1 - 2 mod 2^64 = 0xFFFFFFFFFFFFFFFF
|
||||
want = p256Element{0xfffffffffffffffe, 0xffffffff00000000,
|
||||
0xffffffffffffffff, 0xfffffffeffffffff}
|
||||
if !reflect.DeepEqual(res, want) {
|
||||
t.Errorf("in1 < in2: got %v, want %v", res, want)
|
||||
}
|
||||
}
|
||||
|
||||
func p256MulBy2Test(t *testing.T, x, p, r *big.Int) {
|
||||
x1 := new(big.Int).Mul(x, r)
|
||||
x1 = x1.Mod(x1, p)
|
||||
y1 := new(big.Int).Mul(big.NewInt(2), r)
|
||||
y1 = y1.Mod(y1, p)
|
||||
ax := new(p256Element)
|
||||
res := new(p256Element)
|
||||
res2 := new(p256Element)
|
||||
fromBig(ax, x1)
|
||||
p256MulBy2(res2, ax)
|
||||
p256FromMont(res, res2)
|
||||
resInt := toBigInt(res)
|
||||
|
||||
expected := new(big.Int).Mul(x, big.NewInt(2))
|
||||
expected = expected.Mod(expected, p)
|
||||
if resInt.Cmp(expected) != 0 {
|
||||
t.Fatalf("p256MulBy2(%x) = %x, want %x", x, resInt, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256MulBy2(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
|
||||
p256MulBy2Test(t, pMinus1, p, r)
|
||||
p256MulBy2Test(t, big.NewInt(0), p, r)
|
||||
p256MulBy2Test(t, big.NewInt(1), p, r)
|
||||
}
|
||||
|
||||
func p256AddTest(t *testing.T, x, y, p, r *big.Int) {
|
||||
x1 := new(big.Int).Mul(x, r)
|
||||
x1 = x1.Mod(x1, p)
|
||||
y1 := new(big.Int).Mul(y, r)
|
||||
y1 = y1.Mod(y1, p)
|
||||
ax := new(p256Element)
|
||||
ay := new(p256Element)
|
||||
res := new(p256Element)
|
||||
res2 := new(p256Element)
|
||||
fromBig(ax, x1)
|
||||
fromBig(ay, y1)
|
||||
p256Add(res2, ax, ay)
|
||||
p256FromMont(res, res2)
|
||||
resInt := toBigInt(res)
|
||||
|
||||
expected := new(big.Int).Add(x, y)
|
||||
expected = expected.Mod(expected, p)
|
||||
if resInt.Cmp(expected) != 0 {
|
||||
t.Fatalf("p256Add(%x, %x) = %x, want %x", x, y, resInt, expected)
|
||||
}
|
||||
}
|
||||
|
||||
func TestP256AddPMinus1(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
|
||||
p256AddTest(t, pMinus1, pMinus1, p, r)
|
||||
}
|
||||
|
||||
func TestFuzzyP256Add(t *testing.T) {
|
||||
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16)
|
||||
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
|
||||
var scalar1 [32]byte
|
||||
var scalar2 [32]byte
|
||||
var timeout *time.Timer
|
||||
|
||||
if testing.Short() {
|
||||
timeout = time.NewTimer(10 * time.Millisecond)
|
||||
} else {
|
||||
timeout = time.NewTimer(2 * time.Second)
|
||||
}
|
||||
for {
|
||||
select {
|
||||
case <-timeout.C:
|
||||
return
|
||||
default:
|
||||
}
|
||||
io.ReadFull(rand.Reader, scalar1[:])
|
||||
io.ReadFull(rand.Reader, scalar2[:])
|
||||
x := new(big.Int).SetBytes(scalar1[:])
|
||||
y := new(big.Int).SetBytes(scalar2[:])
|
||||
p256AddTest(t, x, y, p, r)
|
||||
}
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
//go:build (amd64 || arm64 || s390x || ppc64le) && !purego
|
||||
//go:build (amd64 || arm64 || loong64 || s390x || ppc64le) && !purego
|
||||
|
||||
package sm2ec
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user