From aa2ef453f170cc3675ad222b0ee66be98792df9c Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Wed, 15 Oct 2025 13:37:10 +0800 Subject: [PATCH] internal/sm2ec: loong64 v1 --- internal/sm2ec/p256_asm_loong64.s | 881 ++++++++++++++++++++- internal/sm2ec/sm2p256.go | 2 +- internal/sm2ec/sm2p256_asm.go | 2 +- internal/sm2ec/sm2p256_asm_loong64.go | 64 -- internal/sm2ec/sm2p256_asm_loong64_test.go | 376 --------- internal/sm2ec/sm2p256_asm_test.go | 2 +- 6 files changed, 877 insertions(+), 450 deletions(-) delete mode 100644 internal/sm2ec/sm2p256_asm_loong64.go delete mode 100644 internal/sm2ec/sm2p256_asm_loong64_test.go diff --git a/internal/sm2ec/p256_asm_loong64.s b/internal/sm2ec/p256_asm_loong64.s index 0d1e7b5..c186191 100644 --- a/internal/sm2ec/p256_asm_loong64.s +++ b/internal/sm2ec/p256_asm_loong64.s @@ -7,9 +7,12 @@ #include "textflag.h" #define ZERO R0 +#define RSP R3 #define res_ptr R4 #define x_ptr R5 #define y_ptr R6 +#define a_ptr x_ptr +#define b_ptr y_ptr #define acc0 R7 #define acc1 R8 @@ -528,13 +531,13 @@ TEXT ·p256Sqr(SB),NOSPLIT,$0 ADDV $1, const0, const1 sqrLoop: - SUBV $1, y_ptr - CALL sm2P256SqrInternal<>(SB) - MOVV y0, x0 - MOVV y1, x1 - MOVV y2, x2 - MOVV y3, x3 - BNE y_ptr, sqrLoop + SUBV $1, y_ptr + CALL sm2P256SqrInternal<>(SB) + MOVV y0, x0 + MOVV y1, x1 + MOVV y2, x2 + MOVV y3, x3 + BNE y_ptr, sqrLoop MOVV y0, (8*0)(res_ptr) MOVV y1, (8*1)(res_ptr) @@ -1288,11 +1291,147 @@ TEXT ·p256OrdReduce(SB),NOSPLIT,$0 /* ---------------------------------------*/ // func p256Select(res *SM2P256Point, table *p256Table, idx, limit int) TEXT ·p256Select(SB),NOSPLIT,$0 + MOVV limit+24(FP), x_ptr + MOVV idx+16(FP), const0 + MOVV table+8(FP), y_ptr + MOVV res+0(FP), res_ptr + + MOVV $0, x0 + MOVV $0, x1 + MOVV $0, x2 + MOVV $0, x3 + MOVV $0, y0 + MOVV $0, y1 + MOVV $0, y2 + MOVV $0, y3 + MOVV $0, t0 + MOVV $0, t1 + MOVV $0, t2 + MOVV $0, t3 + + MOVV $0, const1 + +loop_select: + ADDV $1, const1, const1 + XOR const1, const0, hlp0 + + MOVV (8*0)(y_ptr), acc0 + MOVV (8*1)(y_ptr), acc1 + MOVV (8*2)(y_ptr), acc2 + MOVV (8*3)(y_ptr), acc3 + MASKNEZ hlp0, acc0, acc0 + MASKNEZ hlp0, acc1, acc1 + MASKNEZ hlp0, acc2, acc2 + MASKNEZ hlp0, acc3, acc3 + OR acc0, x0, x0 + OR acc1, x1, x1 + OR acc2, x2, x2 + OR acc3, x3, x3 + + ADDVU $32, y_ptr, y_ptr + MOVV (8*0)(y_ptr), acc0 + MOVV (8*1)(y_ptr), acc1 + MOVV (8*2)(y_ptr), acc2 + MOVV (8*3)(y_ptr), acc3 + MASKNEZ hlp0, acc0, acc0 + MASKNEZ hlp0, acc1, acc1 + MASKNEZ hlp0, acc2, acc2 + MASKNEZ hlp0, acc3, acc3 + OR acc0, y0, y0 + OR acc1, y1, y1 + OR acc2, y2, y2 + OR acc3, y3, y3 + + ADDVU $32, y_ptr, y_ptr + MOVV (8*0)(y_ptr), acc0 + MOVV (8*1)(y_ptr), acc1 + MOVV (8*2)(y_ptr), acc2 + MOVV (8*3)(y_ptr), acc3 + MASKNEZ hlp0, acc0, acc0 + MASKNEZ hlp0, acc1, acc1 + MASKNEZ hlp0, acc2, acc2 + MASKNEZ hlp0, acc3, acc3 + OR acc0, t0, t0 + OR acc1, t1, t1 + OR acc2, t2, t2 + OR acc3, t3, t3 + + BNE const1, x_ptr, loop_select + + MOVV x0, (8*0)(res_ptr) + MOVV x1, (8*1)(res_ptr) + MOVV x2, (8*2)(res_ptr) + MOVV x3, (8*3)(res_ptr) + MOVV y0, (8*4)(res_ptr) + MOVV y1, (8*5)(res_ptr) + MOVV y2, (8*6)(res_ptr) + MOVV y3, (8*7)(res_ptr) + MOVV t0, (8*8)(res_ptr) + MOVV t1, (8*9)(res_ptr) + MOVV t2, (8*10)(res_ptr) + MOVV t3, (8*11)(res_ptr) + RET /* ---------------------------------------*/ // func p256SelectAffine(res *p256AffinePoint, table *p256AffineTable, idx int) TEXT ·p256SelectAffine(SB),NOSPLIT,$0 + MOVD idx+16(FP), t0 + MOVD table+8(FP), t1 + MOVD res+0(FP), res_ptr + + XOR x0, x0, x0 + XOR x1, x1, x1 + XOR x2, x2, x2 + XOR x3, x3, x3 + XOR y0, y0, y0 + XOR y1, y1, y1 + XOR y2, y2, y2 + XOR y3, y3, y3 + + MOVV $0, t2 + MOVV $32, const0 + +loop_select: + ADDV $1, t2, t2 + XOR t2, t0, hlp0 + + MOVV (8*0)(t1), acc0 + MOVV (8*1)(t1), acc1 + MOVV (8*2)(t1), acc2 + MOVV (8*3)(t1), acc3 + MASKNEZ hlp0, acc0, acc0 + MASKNEZ hlp0, acc1, acc1 + MASKNEZ hlp0, acc2, acc2 + MASKNEZ hlp0, acc3, acc3 + OR acc0, x0, x0 + OR acc1, x1, x1 + OR acc2, x2, x2 + OR acc3, x3, x3 + + ADDVU $32, t1, t1 + MOVV (8*0)(t1), acc0 + MOVV (8*1)(t1), acc1 + MOVV (8*2)(t1), acc2 + MOVV (8*3)(t1), acc3 + MASKNEZ hlp0, acc0, acc0 + MASKNEZ hlp0, acc1, acc1 + MASKNEZ hlp0, acc2, acc2 + MASKNEZ hlp0, acc3, acc3 + OR acc0, y0, y0 + OR acc1, y1, y1 + OR acc2, y2, y2 + OR acc3, y3, y3 + + BNE t2, const0, loop_select + MOVV x0, (8*0)(res_ptr) + MOVV x1, (8*1)(res_ptr) + MOVV x2, (8*2)(res_ptr) + MOVV x3, (8*3)(res_ptr) + MOVV y0, (8*4)(res_ptr) + MOVV y1, (8*5)(res_ptr) + MOVV y2, (8*6)(res_ptr) + MOVV y3, (8*7)(res_ptr) RET /* ---------------------------------------*/ @@ -1420,9 +1559,304 @@ TEXT ·p256MulBy2(SB),NOSPLIT,$0 MOVV x3, (8*3)(res_ptr) RET +/* ---------------------------------------*/ +#define x1in(off) (off)(a_ptr) +#define y1in(off) (off + 32)(a_ptr) +#define z1in(off) (off + 64)(a_ptr) +#define x2in(off) (off)(b_ptr) +#define z2in(off) (off + 64)(b_ptr) +#define x3out(off) (off)(res_ptr) +#define y3out(off) (off + 32)(res_ptr) +#define z3out(off) (off + 64)(res_ptr) +#define LDx(src) MOVV src(0), x0; MOVV src(8) x1; MOVV src(16), x2; MOVV src(24), x3 +#define LDy(src) MOVV src(0), y0; MOVV src(8) y1; MOVV src(16), y2; MOVV src(24), y3 +#define STx(src) MOVV x0, src(0); MOVV x1, src(8); MOVV x2, src(16); MOVV x3, src(24) +#define STy(src) MOVV y0, src(0); MOVV y1, src(8); MOVV y2, src(16); MOVV y3, src(24) +/* ---------------------------------------*/ +#define y2in(off) (32*0 + 8 + off)(RSP) +#define s2(off) (32*1 + 8 + off)(RSP) +#define z1sqr(off) (32*2 + 8 + off)(RSP) +#define h(off) (32*3 + 8 + off)(RSP) +#define r(off) (32*4 + 8 + off)(RSP) +#define hsqr(off) (32*5 + 8 + off)(RSP) +#define rsqr(off) (32*6 + 8 + off)(RSP) +#define hcub(off) (32*7 + 8 + off)(RSP) + +#define z2sqr(off) (32*8 + 8 + off)(RSP) +#define s1(off) (32*9 + 8 + off)(RSP) +#define u1(off) (32*10 + 8 + off)(RSP) +#define u2(off) (32*11 + 8 + off)(RSP) + /* ---------------------------------------*/ // func p256PointAddAffineAsm(res, in1 *SM2P256Point, in2 *p256AffinePoint, sign, sel, zero int) TEXT ·p256PointAddAffineAsm(SB),0,$264-48 + MOVV in1+8(FP), a_ptr + MOVV in2+16(FP), b_ptr + MOVV sign+24(FP), hlp0 + MOVV sel+32(FP), hlp1 + MOVV zero+40(FP), res_ptr + + MOVV p256one<>+0x08(SB), const0 + ADDV $1, const0, const1 + + // Negate y2in based on sign + MOVV (8*0)(b_ptr), y0 + MOVV (8*1)(b_ptr), y1 + MOVV (8*2)(b_ptr), y2 + MOVV (8*3)(b_ptr), y3 + // (acc0, acc1, acc2, acc3) = - (y3, y2, y1, y0) + SGTU y0, ZERO, t3 + SUBV y0, ZERO, acc0 + SGTU y1, ZERO, t4 + SUBV y1, ZERO, acc1 + SGTU t3, acc1, t1 + SUBV t3, acc1, acc1 + OR t4, t1, t3 + SGTU y2, ZERO, t4 + SUBV y2, ZERO, acc2 + SGTU t3, acc2, t1 + SUBV t3, acc2, acc2 + OR t4, t1, t3 + SGTU y3, ZERO, t4 + SUBV y3, ZERO, acc3 + SGTU t3, acc3, t1 + SUBV t3, acc3, acc3 + OR t4, t1, t3 + + MOVV $1, acc4 + MASKEQZ t3, acc4, acc4 + MASKEQZ t3, const0, acc5 + MASKEQZ t3, const1, acc7 + + SGTU acc4, acc0, t3 + SUBV acc4, acc0, acc0 + ADDV t3, acc5, acc5 // no carry + SGTU acc5, acc1, t3 + SUBV acc5, acc1, acc1 + SGTU t3, acc2, t1 + SUBV t3, acc2, acc2 + ADDV t1, acc7, t3 // no carry + SUBV t3, acc3, acc3 + // If condition is 0, keep original value + MASKEQZ hlp0, acc0, acc0 + MASKNEZ hlp0, y0, y0 + MASKEQZ hlp0, acc1, acc1 + MASKNEZ hlp0, y1, y1 + MASKEQZ hlp0, acc2, acc2 + MASKNEZ hlp0, y2, y2 + MASKEQZ hlp0, acc3, acc3 + MASKNEZ hlp0, y3, y3 + OR acc0, y0 + OR acc1, y1 + OR acc2, y2 + OR acc3, y3 + // Store result + STy(y2in) + + // Begin point add + LDx(z1in) + CALL sm2P256SqrInternal<>(SB) // z1ˆ2 + STy(z1sqr) + + LDx(x2in) + CALL sm2P256MulInternal<>(SB) // x2 * z1ˆ2 + + LDx(x1in) + CALL sm2P256Subinternal<>(SB) // h = u2 - u1 + STx(h) + + LDy(z1in) + CALL p256MulInternal<>(SB) // z3 = h * z1 + + // iff select == 0, z3 = z1 + MOVV (8*8)(a_ptr), acc0 + MOVV (8*9)(a_ptr), acc1 + MOVV (8*10)(a_ptr), acc2 + MOVV (8*11)(a_ptr), acc3 + MASKEQZ hlp1, y0, y0 + MASKNEZ hlp1, acc0, acc0 + MASKEQZ hlp1, y1, y1 + MASKNEZ hlp1, acc1, acc1 + MASKEQZ hlp1, y2, y2 + MASKNEZ hlp1, acc2, acc2 + MASKEQZ hlp1, y3, y3 + MASKNEZ hlp1, acc3, acc3 + OR acc0, y0 + OR acc1, y1 + OR acc2, y2 + OR acc3, y3 + // iff zero == 0, z3 = 1 + MOVV $1, acc0 + MOVV const0, acc1 + MOVV $0, acc2 + MOVV const1, acc3 + MASKEQZ res_ptr, y0, y0 + MASKNEZ res_ptr, acc0, acc0 + MASKEQZ res_ptr, y1, y1 + MASKNEZ res_ptr, acc1, acc1 + MASKEQZ res_ptr, y2, y2 + MASKNEZ res_ptr, acc2, acc2 + MASKEQZ res_ptr, y3, y3 + MASKNEZ res_ptr, acc3, acc3 + OR acc0, y0 + OR acc1, y1 + OR acc2, y2 + OR acc3, y3 + LDx(z1in) + // store z3 + MOVV res+0(FP), t0 + MOVV y0, (8*8)(t0) + MOVV y1, (8*9)(t0) + MOVV y2, (8*10)(t0) + MOVV y3, (8*11)(t0) + + LDy(z1sqr) + CALL p256MulInternal<>(SB) // z1 ^ 3 + + LDx(y2in) + CALL p256MulInternal<>(SB) // s2 = y2 * z1ˆ3 + STy(s2) + + LDx(y1in) + CALL p256SubInternal<>(SB) // r = s2 - s1 + STx(r) + + CALL p256SqrInternal<>(SB) // rsqr = rˆ2 + STy (rsqr) + + LDx(h) + CALL p256SqrInternal<>(SB) // hsqr = hˆ2 + STy(hsqr) + + CALL p256MulInternal<>(SB) // hcub = hˆ3 + STy(hcub) + + LDx(y1in) + CALL p256MulInternal<>(SB) // y1 * hˆ3 + STy(s2) + + MOVV hsqr(0*8), x0 + MOVV hsqr(1*8), x1 + MOVV hsqr(2*8), x2 + MOVV hsqr(3*8), x3 + CALL p256MulInternal<>(SB) // hsqr * u1 + MOVV y0, h(0*8) + MOVV y1, h(1*8) + MOVV y2, h(2*8) + MOVV y3, h(3*8) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + + LDy(rsqr) + CALL p256SubInternal<>(SB) // rˆ2 - u1 * hˆ2 * 2 + + MOVV x0, y0 + MOVV x1, y1 + MOVV x2, y2 + MOVV x3, y3 + LDy(hcub) + CALL p256SubInternal<>(SB) + + MOVV (8*0)(a_ptr), acc0 + MOVV (8*1)(a_ptr), acc1 + MOVV (8*2)(a_ptr), acc2 + MOVV (8*3)(a_ptr), acc3 + // iff select == 0, x3 = x1 + MASKEQZ hlp1, x0, x0 + MASKNEZ hlp1, acc0, acc0 + MASKEQZ hlp1, x1, x1 + MASKNEZ hlp1, acc1, acc1 + MASKEQZ hlp1, x2, x2 + MASKNEZ hlp1, acc2, acc2 + MASKEQZ hlp1, x3, x3 + MASKNEZ hlp1, acc3, acc3 + OR acc0, x0 + OR acc1, x1 + OR acc2, x2 + OR acc3, x3 + MOVV (8*0)(b_ptr), acc0 + MOVV (8*1)(b_ptr), acc1 + MOVV (8*2)(b_ptr), acc2 + MOVV (8*3)(b_ptr), acc3 + // iff zero == 0, x3 = x2 + MASKEQZ res_ptr, x0, x0 + MASKNEZ res_ptr, acc0, acc0 + MASKEQZ res_ptr, x1, x1 + MASKNEZ res_ptr, acc1, acc1 + MASKEQZ res_ptr, x2, x2 + MASKNEZ res_ptr, acc2, acc2 + MASKEQZ res_ptr, x3, x3 + MASKNEZ res_ptr, acc3, acc3 + OR acc0, x0 + OR acc1, x1 + OR acc2, x2 + OR acc3, x3 + // store x3 + MOVV res+0(FP), t0 + MOVV x0, (8*0)(t0) + MOVV x1, (8*1)(t0) + MOVV x2, (8*2)(t0) + MOVV x3, (8*3)(t0) + + MOVV h(0*8), y0 + MOVV h(1*8), y1 + MOVV h(2*8), y2 + MOVV h(3*8), y3 + CALL p256SubInternal<>(SB) + + MOVV r(0*8), y0 + MOVV r(1*8), y1 + MOVV r(2*8), y2 + MOVV r(3*8), y3 + CALL p256MulInternal<>(SB) + + MOVV s2(0*8), x0 + MOVV s2(1*8), x1 + MOVV s2(2*8), x2 + MOVV s2(3*8), x3 + CALL p256SubInternal<>(SB) + + MOVV (8*4)(a_ptr), acc0 + MOVV (8*5)(a_ptr), acc1 + MOVV (8*6)(a_ptr), acc2 + MOVV (8*7)(a_ptr), acc3 + // iff select == 0, y3 = y1 + MASKEQZ hlp1, x0, x0 + MASKNEZ hlp1, acc0, acc0 + MASKEQZ hlp1, x1, x1 + MASKNEZ hlp1, acc1, acc1 + MASKEQZ hlp1, x2, x2 + MASKNEZ hlp1, acc2, acc2 + MASKEQZ hlp1, x3, x3 + MASKNEZ hlp1, acc3, acc3 + OR acc0, x0 + OR acc1, x1 + OR acc2, x2 + OR acc3, x3 + MOVV y2in(0*8), acc0 + MOVV y2in(1*8), acc1 + MOVV y2in(2*8), acc2 + MOVV y2in(3*8), acc3 + // iff zero == 0, y3 = y2 + MASKEQZ res_ptr, x0, x0 + MASKNEZ res_ptr, acc0, acc0 + MASKEQZ res_ptr, x1, x1 + MASKNEZ res_ptr, acc1, acc1 + MASKEQZ res_ptr, x2, x2 + MASKNEZ res_ptr, acc2, acc2 + MASKEQZ res_ptr, x3, x3 + MASKNEZ res_ptr, acc3, acc3 + OR acc0, x0 + OR acc1, x1 + OR acc2, x2 + OR acc3, x3 + // store y3 + MOVV res+0(FP), t0 + MOVV x0, (8*4)(t0) + MOVV x1, (8*5)(t0) + MOVV x2, (8*6)(t0) + MOVV x3, (8*7)(t0) + RET // (x3, x2, x1, x0) = (x3, x2, x1, x0) + (y3, y2, y1, y0) @@ -1497,17 +1931,450 @@ TEXT ·p256Add(SB),NOSPLIT,$0 MOVV x3, (8*3)(res_ptr) RET +// (y3, y2, y1, y0) = (y3, y2, y1, y0) / 2 +#define p256DivideBy2 \ + MOVV $1, acc1; \ + AND t1, y0, t0; \ + MASKEQZ t0, acc1, acc1 + MASKEQZ t0, const0, acc2; \ + MASKEQZ t0, const1, acc3; \ + SGTU acc1, y0, t1; \ + SUBV acc1, y0, y0; \ + ADDV t1, acc2, acc2; \ + SRLV $1, y0, y0; \ + SGTU acc2, y1, t1; \ + SUBV acc2, y1, y1; \ + SGTU t1, y2, t2; \ + SUBV t1, y2, y2; \ + BSTRINSV $63, y1, $63, y0; \ + SRLV $1, y1, y1; \ + ADDV t2, acc3, acc3; \ + BSTRINSV $63, y2, $63, y1; \ + SRLV $1, y2, y2; \ + SUBV acc3, y3, t1; \ + SGTU y3, acc3, t2; \ + BSTRINSV $63, t1, $63, y2; \ + SRLV $1, t1, y3; \ + MASKEQZ t0, t2, t2; \ + BSTRINSV $63, t2, $63, y3 + /* ---------------------------------------*/ +// func p256DivBy2(res, in *p256Element) +TEXT ·p256DivBy2(SB),NOSPLIT,$0 + MOVV res+0(FP), res_ptr + MOVV in+8(FP), x_ptr + MOVV (8*0)(x_ptr), y0 + MOVV (8*1)(x_ptr), y1 + MOVV (8*2)(x_ptr), y2 + MOVV (8*3)(x_ptr), y3 + MOVV p256one<>+0x08(SB), const0 + ADDV $1, const0, const1 + p256DivideBy2 + MOVV y0, (8*0)(res_ptr) + MOVV y1, (8*1)(res_ptr) + MOVV y2, (8*2)(res_ptr) + MOVV y3, (8*3)(res_ptr) + RET + +#define s(off) (32*0 + 8 + off)(RSP) +#define m(off) (32*1 + 8 + off)(RSP) +#define zsqr(off) (32*2 + 8 + off)(RSP) +#define tmp(off) (32*3 + 8 + off)(RSP) + //func p256PointDoubleAsm(res, in *SM2P256Point) TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$136-16 + MOVV res+0(FP), res_ptr + MOVV in+8(FP), a_ptr + + MOVV p256one<>+0x08(SB), const0 + ADDV $1, const0, const1 + + // Begin point double + MOVV (8*8)(a_ptr), x0 + MOVV (8*9)(a_ptr), x1 + MOVV (8*10)(a_ptr), x2 + MOVV (8*11)(a_ptr), x3 + CALL sm2P256SqrInternal<>(SB) // z1ˆ2 + MOVV y0, zsqr(0*8) // store z^2 + MOVV y1, zsqr(1*8) + MOVV y2, zsqr(2*8) + MOVV y3, zsqr(3*8) + + MOVV (8*0)(a_ptr), x0 // load x + MOVV (8*1)(a_ptr), x1 + MOVV (8*2)(a_ptr), x2 + MOVV (8*3)(a_ptr), x3 + p256AddInline + STx(m) + + LDx(z1in) + LDy(y1in) + CALL sm2P256MulInternal<>(SB) + p256MulBy2Inline + STx(z3out) + + LDy(x1in) + LDx(zsqr) + CALL sm2P256Subinternal<>(SB) + LDy(m) + CALL sm2P256MulInternal<>(SB) + + // Multiply by 3 + p256MulBy2Inline + p256AddInline + STx(m) + + LDy(y1in) + p256MulBy2Inline + CALL sm2P256SqrInternal<>(SB) + STy(s) + MOVV y0, x0 + MOVV y1, x1 + MOVV y2, x2 + MOVV y3, x3 + CALL sm2P256SqrInternal<>(SB) + + // Divide by 2 + p256DivideBy2 + + STy(y3out) + + LDx(x1in) + LDy(s) + CALL sm2P256MulInternal<>(SB) + STy(s) + p256MulBy2Inline + STx(tmp) + + LDx(m) + CALL sm2P256SqrInternal<>(SB) + LDx(tmp) + CALL sm2P256Subinternal<>(SB) + + STx(x3out) + + LDy(s) + CALL sm2P256Subinternal<>(SB) + + LDy(m) + CALL sm2P256MulInternal<>(SB) + + LDx(y3out) + CALL sm2P256Subinternal<>(SB) + STx(y3out) + RET +#define p256PointDoubleRound() \ + LDx(z3out) \ // load z + CALL sm2P256SqrInternal<>(SB) \ + MOVV y0, zsqr(0*8) \ // store z^2 + MOVV y1, zsqr(1*8) \ + MOVV y2, zsqr(2*8) \ + MOVV y3, zsqr(3*8) \ + \ + LDx(x3out) \// load x + p256AddInline \ + STx(m) \ + \ + LDx(z3out) \ // load z + LDy(y3out) \ // load y + CALL sm2P256MulInternal<>(SB) \ + p256MulBy2Inline \ + STx(z3out) \ // store result z + \ + LDy(x3out) \ // load x + LDx(zsqr) \ + CALL sm2P256Subinternal<>(SB) \ + LDy(m) \ + CALL sm2P256MulInternal<>(SB) \ + \ + \// Multiply by 3 + p256MulBy2Inline \ + p256AddInline \ + STx(m) \ + \ + LDy(y3out) \ // load y + p256MulBy2Inline \ + CALL sm2P256SqrInternal<>(SB) \ + STy(s) \ + MOVV y0, x0 \ + MOVV y1, x1 \ + MOVV y2, x2 \ + MOVV y3, x3 \ + CALL sm2P256SqrInternal<>(SB) \ + \ + \// Divide by 2 + p256DivideBy2 \ + STy(y3out) \ + \ + LDx(x3out) \ // load x + LDy(s) \ + CALL sm2P256MulInternal<>(SB) \ + STy(s) \ + p256MulBy2Inline \ + STx(tmp) \ + \ + LDx(m) \ + CALL sm2P256SqrInternal<>(SB) \ + LDx(tmp) \ + CALL sm2P256Subinternal<>(SB) \ + \ + STx(x3out) \ + \ + LDy(s) \ + CALL sm2P256Subinternal<>(SB) \ + \ + LDy(m) \ + CALL sm2P256MulInternal<>(SB) \ + \ + LDx(y3out) \ + CALL sm2P256Subinternal<>(SB) \ + STx(y3out) \ + + /* ---------------------------------------*/ //func p256PointDouble6TimesAsm(res, in *SM2P256Point) TEXT ·p256PointDouble6TimesAsm(SB),NOSPLIT,$136-16 + MOVV res+0(FP), res_ptr + MOVV in+8(FP), a_ptr + + MOVV p256one<>+0x08(SB), const0 + ADDV $1, const0, const1 + + // Begin point double + MOVV (8*8)(a_ptr), x0 + MOVV (8*9)(a_ptr), x1 + MOVV (8*10)(a_ptr), x2 + MOVV (8*11)(a_ptr), x3 + CALL sm2P256SqrInternal<>(SB) // z1ˆ2 + MOVV y0, zsqr(0*8) // store z^2 + MOVV y1, zsqr(1*8) + MOVV y2, zsqr(2*8) + MOVV y3, zsqr(3*8) + + MOVV (8*0)(a_ptr), x0 // load x + MOVV (8*1)(a_ptr), x1 + MOVV (8*2)(a_ptr), x2 + MOVV (8*3)(a_ptr), x3 + p256AddInline + STx(m) + + LDx(z1in) + LDy(y1in) + CALL sm2P256MulInternal<>(SB) + p256MulBy2Inline + STx(z3out) + + LDy(x1in) + LDx(zsqr) + CALL sm2P256Subinternal<>(SB) + LDy(m) + CALL sm2P256MulInternal<>(SB) + + // Multiply by 3 + p256MulBy2Inline + p256AddInline + STx(m) + + LDy(y1in) + p256MulBy2Inline + CALL sm2P256SqrInternal<>(SB) + STy(s) + MOVV y0, x0 + MOVV y1, x1 + MOVV y2, x2 + MOVV y3, x3 + CALL sm2P256SqrInternal<>(SB) + + // Divide by 2 + p256DivideBy2 + + STy(y3out) + + LDx(x1in) + LDy(s) + CALL sm2P256MulInternal<>(SB) + STy(s) + p256MulBy2Inline + STx(tmp) + + LDx(m) + CALL sm2P256SqrInternal<>(SB) + LDx(tmp) + CALL sm2P256Subinternal<>(SB) + + STx(x3out) + + LDy(s) + CALL sm2P256Subinternal<>(SB) + + LDy(m) + CALL sm2P256MulInternal<>(SB) + + LDx(y3out) + CALL sm2P256Subinternal<>(SB) + STx(y3out) + + // Begin point double rounds 2 - 6 + p256PointDoubleRound() + p256PointDoubleRound() + p256PointDoubleRound() + p256PointDoubleRound() + p256PointDoubleRound() + RET /* ---------------------------------------*/ +#undef y2in +#undef x3out +#undef y3out +#undef z3out +#define y2in(off) (off + 32)(b_ptr) +#define x3out(off) (off)(b_ptr) +#define y3out(off) (off + 32)(b_ptr) +#define z3out(off) (off + 64)(b_ptr) // func p256PointAddAsm(res, in1, in2 *SM2P256Point) int TEXT ·p256PointAddAsm(SB),0,$392-32 + // See https://hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-3.html#addition-add-2007-bl + // Move input to stack in order to free registers + MOVV in1+8(FP), a_ptr + MOVV in2+16(FP), b_ptr + + MOVV p256one<>+0x08(SB), const0 + ADDV $1, const0, const1 + + // Begin point add + LDx(z2in) + CALL sm2P256SqrInternal<>(SB) // z2^2 + STy(z2sqr) + + CALL sm2P256MulInternal<>(SB) // z2^3 + + LDx(y1in) + CALL sm2P256MulInternal<>(SB) // s1 = z2ˆ3*y1 + STy(s1) + + LDx(z1in) + CALL sm2P256SqrInternal<>(SB) // z1^2 + STy(z1sqr) + + CALL sm2P256MulInternal<>(SB) // z1^3 + + LDx(y2in) + CALL sm2P256MulInternal<>(SB) // s2 = z1ˆ3*y2 + + LDx(s1) + CALL sm2P256Subinternal<>(SB) // r = s2 - s1 + STx(r) + + // Check if zero mod p256 + OR x0, x1, acc0 + OR x2, x3, acc1 + OR acc0, acc1, acc1 + SGTU acc1, ZERO, hlp0 + + MOVV $-1, acc0 + MOVV p256p<>+0x08(SB), acc1 + MOVV p256p<>+0x18(SB), acc3 + + XOR acc0, x0, acc4 + XOR acc1, x1, acc5 + XOR acc0, x2, acc6 + XOR acc3, x3, acc7 + OR acc4, acc5, acc4 + OR acc6, acc7, acc7 + OR acc4, acc7, acc7 + SGTU acc7, ZERO, res_ptr + OR hlp0, res_ptr, res_ptr + + LDx(z2sqr) + LDy(x1in) + CALL sm2P256MulInternal<>(SB) // u1 = x1 * z2ˆ2 + STy(u1) + + LDx(z1sqr) + LDy(x2in) + CALL sm2P256MulInternal<>(SB) // u2 = x2 * z1ˆ2 + STy(u2) + + LDx(u1) + CALL sm2P256Subinternal<>(SB) // h = u2 - u1 + STx(h) + + // Check if zero mod p256 + OR x0, x1, acc0 + OR x2, x3, acc1 + OR acc0, acc1, acc1 + SGTU acc1, ZERO, hlp0 + + MOVV $-1, acc0 + MOVV p256p<>+0x08(SB), acc1 + MOVV p256p<>+0x18(SB), acc3 + + XOR acc0, x0, acc4 + XOR acc1, x1, acc5 + XOR acc0, x2, acc6 + XOR acc3, x3, acc7 + OR acc4, acc5, acc4 + OR acc6, acc7, acc7 + OR acc4, acc7, acc7 + SGTU acc7, ZERO, t0 + OR hlp0, t0, hlp0 + + AND hlp0, res_ptr, res_ptr + + LDx(r) + CALL sm2P256SqrInternal<>(SB) // rsqr = rˆ2 + STy(rsqr) + + LDx(h) + CALL sm2P256SqrInternal<>(SB) // hsqr = hˆ2 + STy(hsqr) + + LDx(h) + CALL sm2P256MulInternal<>(SB) // hcub = hˆ3 + STy(hcub) + + LDx(s1) + CALL sm2P256MulInternal<>(SB) + STy(s2) + + LDx(z1in) + LDy(z2in) + CALL sm2P256MulInternal<>(SB) // z1 * z2 + LDx(h) + CALL sm2P256MulInternal<>(SB) // z1 * z2 * h + MOVV res+0(FP), b_ptr + STy(z3out) + + LDx(hsqr) + LDy(u1) + CALL sm2P256MulInternal<>(SB) // hˆ2 * u1 + STy(u2) + + p256MulBy2Inline // u1 * hˆ2 * 2, inline + LDy(rsqr) + CALL sm2P256Subinternal<>(SB) // rˆ2 - u1 * hˆ2 * 2 + + MOVV x0, y0 + MOVV x1, y1 + MOVV x2, y2 + MOVV x3, y3 + LDx(hcub) + CALL sm2P256Subinternal<>(SB) + STx(x3out) + + LDy(u2) + CALL sm2P256Subinternal<>(SB) + + LDy(r) + CALL sm2P256MulInternal<>(SB) + + LDx(s2) + CALL sm2P256Subinternal<>(SB) + STx(y3out) + + MOVV res_ptr, ret+24(FP) + RET diff --git a/internal/sm2ec/sm2p256.go b/internal/sm2ec/sm2p256.go index bf7db8c..2b06117 100644 --- a/internal/sm2ec/sm2p256.go +++ b/internal/sm2ec/sm2p256.go @@ -2,7 +2,7 @@ // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. -//go:build purego || !(amd64 || arm64 || s390x || ppc64le) +//go:build purego || !(amd64 || arm64 || loong64 || s390x || ppc64le) package sm2ec diff --git a/internal/sm2ec/sm2p256_asm.go b/internal/sm2ec/sm2p256_asm.go index cbdca71..4e85dbf 100644 --- a/internal/sm2ec/sm2p256_asm.go +++ b/internal/sm2ec/sm2p256_asm.go @@ -7,7 +7,7 @@ // 256-bit primes" // https://link.springer.com/article/10.1007%2Fs13389-014-0090-x // https://eprint.iacr.org/2013/816.pdf -//go:build (amd64 || arm64 || s390x || ppc64le) && !purego +//go:build (amd64 || arm64 || loong64 || s390x || ppc64le) && !purego package sm2ec diff --git a/internal/sm2ec/sm2p256_asm_loong64.go b/internal/sm2ec/sm2p256_asm_loong64.go deleted file mode 100644 index 16a0c19..0000000 --- a/internal/sm2ec/sm2p256_asm_loong64.go +++ /dev/null @@ -1,64 +0,0 @@ -package sm2ec - -import ( - "github.com/emmansun/gmsm/internal/deps/cpu" -) - -// p256Element is a P-256 base field element in [0, P-1] in the Montgomery -// domain (with R 2²⁵⁶) as four limbs in little-endian order value. -type p256Element [4]uint64 - -type SM2P256Point1 struct { - // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point - // at infinity can be represented by any set of coordinates with Z = 0. - x, y, z p256Element -} - -var supportLSX = cpu.Loong64.HasLSX -var supportLASX = cpu.Loong64.HasLASX - -//go:noescape -func p256BigToLittle(res *p256Element, in *[32]byte) - -//go:noescape -func p256LittleToBig(res *[32]byte, in *p256Element) - -// If cond is 0, sets res = b, otherwise sets res = a. -// -//go:noescape -func p256MovCond(res, a, b *SM2P256Point1, cond int) - -// If cond is not 0, sets val = -val mod p. -// -//go:noescape -func p256NegCond(val *p256Element, cond int) - -// Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p. -// -//go:noescape -func p256Mul(res, in1, in2 *p256Element) - -// Montgomery square, repeated n times (n >= 1). -// -//go:noescape -func p256Sqr(res, in *p256Element, n int) - -// Montgomery multiplication by R⁻¹, or 1 outside the domain. -// Sets res = in * R⁻¹, bringing res out of the Montgomery domain. -// -//go:noescape -func p256FromMont(res, in *p256Element) - -// p256OrdReduce ensures s is in the range [0, ord(G)-1]. -// -//go:noescape -func p256OrdReduce(s *p256OrdElement) - -//go:noescape -func p256Add(res, in1, in2 *p256Element) - -//go:noescape -func p256Sub(res, in1, in2 *p256Element) - -//go:noescape -func p256MulBy2(res, in *p256Element) diff --git a/internal/sm2ec/sm2p256_asm_loong64_test.go b/internal/sm2ec/sm2p256_asm_loong64_test.go deleted file mode 100644 index eeefba9..0000000 --- a/internal/sm2ec/sm2p256_asm_loong64_test.go +++ /dev/null @@ -1,376 +0,0 @@ -//go:build loong64 && !purego - -package sm2ec - -import ( - "bytes" - "crypto/rand" - "encoding/binary" - "fmt" - "io" - "math/big" - "reflect" - "testing" - "time" -) - -func TestP256BigToLittle(t *testing.T) { - // 构造一个已知的 32 字节大端输入 - var in [32]byte - for i := 0; i < 32; i++ { - in[i] = byte(i + 1) - } - var out p256Element - - p256BigToLittle(&out, &in) - - // 检查每个 limb 是否为小端解包 - for i := 0; i < 4; i++ { - expected := binary.BigEndian.Uint64(in[i*8 : (i+1)*8]) - k := 3 - i // 逆序存储 - if out[k] != expected { - t.Errorf("limb %d: got 0x%x, want 0x%x", k, out[k], expected) - } - } - - // 逆操作测试 - var back [32]byte - p256LittleToBig(&back, &out) - if !bytes.Equal(in[:], back[:]) { - t.Errorf("p256LittleToBig(p256BigToLittle(...)) mismatch\nin: %x\nback: %x", in, back) - } -} - -func TestP256NegCond(t *testing.T) { - var tests = []struct { - input p256Element - cond int - expected p256Element - }{ - { - input: p256Element{1, 0, 0, 0}, - cond: 1, - expected: p256Element{0xfffffffffffffffe, 0xffffffff00000000, 0xffffffffffffffff, 0xfffffffeffffffff}, - }, - { - input: p256Element{1, 0, 0, 0}, - cond: 0, - expected: p256Element{1, 0, 0, 0}, - }, - { - input: p256Element{0x1, 0xffffffff00000001, 0xfffffffffffffffe, 0xfffffffeffffffff}, - cond: 1, - expected: p256Element{0xfffffffffffffffe, 0xffffffffffffffff, 0, 0}, - }, - } - - for i, test := range tests { - var result p256Element - copy(result[:], test.input[:]) - p256NegCond(&result, test.cond) - if result != test.expected { - t.Errorf("test %d: got %x, want %x", i, result, test.expected) - } - } -} - -func newPoint(x, y, z uint64) *SM2P256Point1 { - return &SM2P256Point1{ - x: p256Element{x, x + 1, x + 2, x + 3}, - y: p256Element{y, y + 1, y + 2, y + 3}, - z: p256Element{z, z + 1, z + 2, z + 3}, - } -} - -func TestP256MovCond(t *testing.T) { - fmt.Printf("supportLSX=%v, supportLASX=%v\n", supportLSX, supportLASX) - a := newPoint(10, 20, 30) - b := newPoint(100, 200, 300) - var res SM2P256Point1 - - // cond == 0: res = b - p256MovCond(&res, a, b, 0) - if !reflect.DeepEqual(res, *b) { - t.Errorf("cond=0: got %+v, want %+v", res, *b) - } - - // cond != 0: res = a - p256MovCond(&res, a, b, 1) - if !reflect.DeepEqual(res, *a) { - t.Errorf("cond=1: got %+v, want %+v", res, *a) - } - - // cond < 0: res = a (should treat any nonzero as true) - p256MovCond(&res, a, b, -123) - if !reflect.DeepEqual(res, *a) { - t.Errorf("cond=-123: got %+v, want %+v", res, *a) - } -} - -// fromBig converts a *big.Int into a format used by this code. -func fromBig(out *p256Element, big *big.Int) { - for i := range out { - out[i] = 0 - } - - for i, v := range big.Bits() { - out[i] = uint64(v) - } -} - -func toBigInt(in *p256Element) *big.Int { - var valBytes [32]byte - p256LittleToBig(&valBytes, in) - return new(big.Int).SetBytes(valBytes[:]) -} - -func p256MulTest(t *testing.T, x, y, p, r *big.Int) { - x1 := new(big.Int).Mul(x, r) - x1 = x1.Mod(x1, p) - y1 := new(big.Int).Mul(y, r) - y1 = y1.Mod(y1, p) - ax := new(p256Element) - ay := new(p256Element) - res := new(p256Element) - res2 := new(p256Element) - fromBig(ax, x1) - fromBig(ay, y1) - p256Mul(res2, ax, ay) - p256FromMont(res, res2) - resInt := toBigInt(res) - - expected := new(big.Int).Mul(x, y) - expected = expected.Mod(expected, p) - if resInt.Cmp(expected) != 0 { - t.Fatalf("p256Mul(%x, %x) = %x, want %x", x, y, resInt, expected) - } -} - -func TestP256MulPMinus1(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - pMinus1 := new(big.Int).Sub(p, big.NewInt(1)) - p256MulTest(t, pMinus1, pMinus1, p, r) -} - -func TestFuzzyP256Mul(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - var scalar1 [32]byte - var scalar2 [32]byte - var timeout *time.Timer - - if testing.Short() { - timeout = time.NewTimer(10 * time.Millisecond) - } else { - timeout = time.NewTimer(2 * time.Second) - } - for { - select { - case <-timeout.C: - return - default: - } - io.ReadFull(rand.Reader, scalar1[:]) - io.ReadFull(rand.Reader, scalar2[:]) - x := new(big.Int).SetBytes(scalar1[:]) - y := new(big.Int).SetBytes(scalar2[:]) - p256MulTest(t, x, y, p, r) - } -} - -func p256SqrTest(t *testing.T, x, p, r *big.Int) { - x1 := new(big.Int).Mul(x, r) - x1 = x1.Mod(x1, p) - ax := new(p256Element) - res := new(p256Element) - res2 := new(p256Element) - fromBig(ax, x1) - p256Sqr(res2, ax, 1) - p256FromMont(res, res2) - resInt := toBigInt(res) - - expected := new(big.Int).Mul(x, x) - expected = expected.Mod(expected, p) - if resInt.Cmp(expected) != 0 { - t.Fatalf("p256Sqr(%x) = %x, want %x", x, resInt, expected) - } -} - -func TestP256SqrPMinus1(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - pMinus1 := new(big.Int).Sub(p, big.NewInt(1)) - p256SqrTest(t, pMinus1, p, r) -} - -func TestFuzzyP256Sqr(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - var scalar1 [32]byte - var timeout *time.Timer - - if testing.Short() { - timeout = time.NewTimer(10 * time.Millisecond) - } else { - timeout = time.NewTimer(2 * time.Second) - } - for { - select { - case <-timeout.C: - return - default: - } - io.ReadFull(rand.Reader, scalar1[:]) - x := new(big.Int).SetBytes(scalar1[:]) - p256SqrTest(t, x, p, r) - } -} - -func TestP256OrdReduce(t *testing.T) { - p256Ord := &p256OrdElement{0x53bbf40939d54123, 0x7203df6b21c6052b, 0xffffffffffffffff, 0xfffffffeffffffff} - // s < p256Ord - var s1 p256OrdElement - copy(s1[:], p256Ord[:]) - s1[0] -= 1 // s1 = p256Ord - 1 - s1Orig := s1 - p256OrdReduce(&s1) - if s1 != s1Orig { - t.Errorf("p256OrdReduce changed s when s < p256Ord: got %x, want %x", s1, s1Orig) - } - - // s >= p256Ord - var s2 p256OrdElement - copy(s2[:], p256Ord[:]) - // s2 = p256Ord - p256OrdReduce(&s2) - zero := p256OrdElement{} - if s2 != zero { - t.Errorf("p256OrdReduce failed for s == p256Ord: got %x, want 0", s2) - } - - // s2 = p256Ord + 1 - copy(s2[:], p256Ord[:]) - s2[0] += 1 - p256OrdReduce(&s2) - one := p256OrdElement{1, 0, 0, 0} - if s2 != one { - t.Errorf("p256OrdReduce failed for s == p256Ord+1: got %x, want %x", s2, one) - } -} - -func TestP256Sub(t *testing.T) { - // in1 > in2 - in1 := p256Element{5, 0, 0, 0} - in2 := p256Element{3, 0, 0, 0} - var res p256Element - p256Sub(&res, &in1, &in2) - want := p256Element{2, 0, 0, 0} - if !reflect.DeepEqual(res, want) { - t.Errorf("in1 > in2: got %v, want %v", res, want) - } - - // in1 == in2 - in1 = p256Element{7, 8, 9, 10} - in2 = p256Element{7, 8, 9, 10} - p256Sub(&res, &in1, &in2) - want = p256Element{0, 0, 0, 0} - if !reflect.DeepEqual(res, want) { - t.Errorf("in1 == in2: got %v, want %v", res, want) - } - - // in1 < in2 - in1 = p256Element{1, 0, 0, 0} - in2 = p256Element{2, 0, 0, 0} - p256Sub(&res, &in1, &in2) - // 1 - 2 mod 2^64 = 0xFFFFFFFFFFFFFFFF - want = p256Element{0xfffffffffffffffe, 0xffffffff00000000, - 0xffffffffffffffff, 0xfffffffeffffffff} - if !reflect.DeepEqual(res, want) { - t.Errorf("in1 < in2: got %v, want %v", res, want) - } -} - -func p256MulBy2Test(t *testing.T, x, p, r *big.Int) { - x1 := new(big.Int).Mul(x, r) - x1 = x1.Mod(x1, p) - y1 := new(big.Int).Mul(big.NewInt(2), r) - y1 = y1.Mod(y1, p) - ax := new(p256Element) - res := new(p256Element) - res2 := new(p256Element) - fromBig(ax, x1) - p256MulBy2(res2, ax) - p256FromMont(res, res2) - resInt := toBigInt(res) - - expected := new(big.Int).Mul(x, big.NewInt(2)) - expected = expected.Mod(expected, p) - if resInt.Cmp(expected) != 0 { - t.Fatalf("p256MulBy2(%x) = %x, want %x", x, resInt, expected) - } -} - -func TestP256MulBy2(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - pMinus1 := new(big.Int).Sub(p, big.NewInt(1)) - p256MulBy2Test(t, pMinus1, p, r) - p256MulBy2Test(t, big.NewInt(0), p, r) - p256MulBy2Test(t, big.NewInt(1), p, r) -} - -func p256AddTest(t *testing.T, x, y, p, r *big.Int) { - x1 := new(big.Int).Mul(x, r) - x1 = x1.Mod(x1, p) - y1 := new(big.Int).Mul(y, r) - y1 = y1.Mod(y1, p) - ax := new(p256Element) - ay := new(p256Element) - res := new(p256Element) - res2 := new(p256Element) - fromBig(ax, x1) - fromBig(ay, y1) - p256Add(res2, ax, ay) - p256FromMont(res, res2) - resInt := toBigInt(res) - - expected := new(big.Int).Add(x, y) - expected = expected.Mod(expected, p) - if resInt.Cmp(expected) != 0 { - t.Fatalf("p256Add(%x, %x) = %x, want %x", x, y, resInt, expected) - } -} - -func TestP256AddPMinus1(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - pMinus1 := new(big.Int).Sub(p, big.NewInt(1)) - p256AddTest(t, pMinus1, pMinus1, p, r) -} - -func TestFuzzyP256Add(t *testing.T) { - p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) - r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) - var scalar1 [32]byte - var scalar2 [32]byte - var timeout *time.Timer - - if testing.Short() { - timeout = time.NewTimer(10 * time.Millisecond) - } else { - timeout = time.NewTimer(2 * time.Second) - } - for { - select { - case <-timeout.C: - return - default: - } - io.ReadFull(rand.Reader, scalar1[:]) - io.ReadFull(rand.Reader, scalar2[:]) - x := new(big.Int).SetBytes(scalar1[:]) - y := new(big.Int).SetBytes(scalar2[:]) - p256AddTest(t, x, y, p, r) - } -} diff --git a/internal/sm2ec/sm2p256_asm_test.go b/internal/sm2ec/sm2p256_asm_test.go index 7f82692..559f63c 100644 --- a/internal/sm2ec/sm2p256_asm_test.go +++ b/internal/sm2ec/sm2p256_asm_test.go @@ -1,4 +1,4 @@ -//go:build (amd64 || arm64 || s390x || ppc64le) && !purego +//go:build (amd64 || arm64 || loong64 || s390x || ppc64le) && !purego package sm2ec