diff --git a/sm2/p256_asm.go b/sm2/p256_asm.go index 7457d03..3bbdc83 100644 --- a/sm2/p256_asm.go +++ b/sm2/p256_asm.go @@ -1,3 +1,5 @@ +// It is by standing on the shoulders of giants. + // This file contains the Go wrapper for the constant-time, 64-bit assembly // implementation of P256. The optimizations performed here are described in // detail in: @@ -107,6 +109,7 @@ func p256PointAddAsm(res, in1, in2 []uint64) int //go:noescape func p256PointDoubleAsm(res, in []uint64) +// Inverse, implements invertible interface, need to test this function's correctness func (curve p256Curve) Inverse(k *big.Int) *big.Int { if k.Sign() < 0 { // This should never happen. @@ -342,7 +345,7 @@ func (p *p256Point) CopyConditional(src *p256Point, v int) { // p256Inverse sets out to in^-1 mod p. func p256Inverse(out, in []uint64) { - var stack [9 * 4]uint64 + var stack [8 * 4]uint64 p2 := stack[4*0 : 4*0+4] p4 := stack[4*1 : 4*1+4] p8 := stack[4*2 : 4*2+4] @@ -352,12 +355,12 @@ func p256Inverse(out, in []uint64) { p32m2 := stack[4*6 : 4*6+4] ptmp := stack[4*7 : 4*7+4] - p256Sqr(out, in, 1) // 2^1 - p256Mul(p2, out, in) // 2^2 - 2^0 + p256Sqr(ptmp, in, 1) // 2^1 + p256Mul(p2, ptmp, in) // 2^2 - 2^0 - p256Sqr(out, p2, 2) // 2^4 - 2^2 - p256Mul(ptmp, out, in) // 2^4 - 2^1 - p256Mul(p4, out, p2) // 2^4 - 2^0 + p256Sqr(out, p2, 2) // 2^4 - 2^2 + p256Mul(ptmp, out, ptmp) // 2^4 - 2^1 + p256Mul(p4, out, p2) // 2^4 - 2^0 p256Sqr(out, p4, 4) // 2^8 - 2^4 p256Mul(ptmp, out, ptmp) // 2^8 - 2^1 @@ -380,8 +383,8 @@ func p256Inverse(out, in []uint64) { p256Mul(out, out, p32m2) //2^160 - 2^1 p256Sqr(ptmp, out, 95) //2^255 - 2^96 - p256Sqr(out, p32m2, 223) //2^225 - 2^224 - p256Mul(ptmp, ptmp, out) //2^226 - 2^224 - 2^96 + p256Sqr(out, p32m2, 223) //2^255 - 2^224 + p256Mul(ptmp, ptmp, out) //2^256 - 2^224 - 2^96 p256Sqr(out, p32, 16) // 2^48 - 2^16 p256Mul(out, out, p16) // 2^48 - 2^0 @@ -396,7 +399,7 @@ func p256Inverse(out, in []uint64) { p256Mul(out, out, p2) // 2^62 - 2^0 p256Sqr(out, out, 2) // 2^64 - 2^2 - p256Mul(out, out, in) //2^64 - 2^2 + 2^1 + p256Mul(out, out, in) //2^64 - 2^2 + 2^0 p256Mul(out, out, ptmp) //2^256 - 2^224 - 2^96 + 2^64 - 3 } diff --git a/sm2/p256_asm_amd64.s b/sm2/p256_asm_amd64.s index eea7168..d361b75 100644 --- a/sm2/p256_asm_amd64.s +++ b/sm2/p256_asm_amd64.s @@ -408,17 +408,29 @@ TEXT ·p256Mul(SB),NOSPLIT,$0 MOVQ DX, acc4 XORQ acc5, acc5 // First reduction step - MOVQ acc0, AX - MOVQ acc0, t1 - SHLQ $32, acc0 - MULQ p256const1<>(SB) - SHRQ $32, t1 - ADDQ acc0, acc1 - ADCQ t1, acc2 - ADCQ AX, acc3 + MOVQ p256p<>+0x08(SB), AX + MULQ acc0 + ADDQ acc0, acc1 + ADCQ $0, DX + ADDQ AX, acc1 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x010(SB), AX + MULQ acc0 + ADDQ t1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x018(SB), AX + MULQ acc0 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 ADCQ DX, acc4 ADCQ $0, acc5 XORQ acc0, acc0 + // x * y[1] MOVQ (8*1)(y_ptr), t0 @@ -452,17 +464,29 @@ TEXT ·p256Mul(SB),NOSPLIT,$0 ADCQ DX, acc5 ADCQ $0, acc0 // Second reduction step - MOVQ acc1, AX - MOVQ acc1, t1 - SHLQ $32, acc1 - MULQ p256const1<>(SB) - SHRQ $32, t1 - ADDQ acc1, acc2 - ADCQ t1, acc3 - ADCQ AX, acc4 + MOVQ p256p<>+0x08(SB), AX + MULQ acc1 + ADDQ acc1, acc2 + ADCQ $0, DX + ADDQ AX, acc2 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x010(SB), AX + MULQ acc1 + ADDQ t1, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x018(SB), AX + MULQ acc1 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 ADCQ DX, acc5 ADCQ $0, acc0 XORQ acc1, acc1 + // x * y[2] MOVQ (8*2)(y_ptr), t0 @@ -496,14 +520,25 @@ TEXT ·p256Mul(SB),NOSPLIT,$0 ADCQ DX, acc0 ADCQ $0, acc1 // Third reduction step - MOVQ acc2, AX - MOVQ acc2, t1 - SHLQ $32, acc2 - MULQ p256const1<>(SB) - SHRQ $32, t1 - ADDQ acc2, acc3 - ADCQ t1, acc4 - ADCQ AX, acc5 + MOVQ p256p<>+0x08(SB), AX + MULQ acc2 + ADDQ acc2, acc3 + ADCQ $0, DX + ADDQ AX, acc3 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x010(SB), AX + MULQ acc2 + ADDQ t1, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x018(SB), AX + MULQ acc2 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 ADCQ DX, acc0 ADCQ $0, acc1 XORQ acc2, acc2 @@ -540,14 +575,25 @@ TEXT ·p256Mul(SB),NOSPLIT,$0 ADCQ DX, acc1 ADCQ $0, acc2 // Last reduction step - MOVQ acc3, AX - MOVQ acc3, t1 - SHLQ $32, acc3 - MULQ p256const1<>(SB) - SHRQ $32, t1 - ADDQ acc3, acc4 - ADCQ t1, acc5 - ADCQ AX, acc0 + MOVQ p256p<>+0x08(SB), AX + MULQ acc3 + ADDQ acc3, acc4 + ADCQ $0, DX + ADDQ AX, acc4 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x010(SB), AX + MULQ acc3 + ADDQ t1, acc5 + ADCQ $0, DX + ADDQ AX, acc5 + ADCQ $0, DX + MOVQ DX, t1 + MOVQ p256p<>+0x018(SB), AX + MULQ acc3 + ADDQ t1, acc0 + ADCQ $0, DX + ADDQ AX, acc0 ADCQ DX, acc1 ADCQ $0, acc2 // Copy result [255:0] @@ -558,7 +604,7 @@ TEXT ·p256Mul(SB),NOSPLIT,$0 // Subtract p256 SUBQ $-1, acc4 SBBQ p256const0<>(SB) ,acc5 - SBBQ $0, acc0 + SBBQ $-1, acc0 SBBQ p256const1<>(SB), acc1 SBBQ $0, acc2 @@ -1241,6 +1287,8 @@ ordSqrLoop: ADCQ $0, acc0 SUBQ AX, acc3 SBBQ DX, acc0 + SUBQ t0, acc3 + SBBQ $0, acc0 // Second reduction step MOVQ acc1, AX MULQ p256ordK0<>(SB) @@ -1274,6 +1322,8 @@ ordSqrLoop: ADCQ $0, acc1 SUBQ AX, acc0 SBBQ DX, acc1 + SUBQ t0, acc0 + SBBQ $0, acc1 // Third reduction step MOVQ acc2, AX MULQ p256ordK0<>(SB) @@ -1307,6 +1357,8 @@ ordSqrLoop: ADCQ $0, acc2 SUBQ AX, acc1 SBBQ DX, acc2 + SUBQ t0, acc1 + SBBQ $0, acc2 // Last reduction step MOVQ acc3, AX MULQ p256ordK0<>(SB) @@ -1342,6 +1394,8 @@ ordSqrLoop: ADCQ $0, acc3 SUBQ AX, acc2 SBBQ DX, acc3 + SUBQ t0, acc2 + SBBQ $0, acc3 XORQ t0, t0 // Add bits [511:256] of the sqr result ADCQ acc4, acc0 @@ -1420,7 +1474,7 @@ TEXT sm2P256SubInternal(SB),NOSPLIT,$0 ADDQ $-1, acc4 ADCQ p256const0<>(SB), acc5 - ADCQ $0, acc6 + ADCQ $-1, acc6 ADCQ p256const1<>(SB), acc7 ANDQ $1, mul0 @@ -1545,47 +1599,91 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$0 ADCQ $0, mul1 MOVQ mul1, acc7 // First reduction step - MOVQ acc0, mul0 - MOVQ acc0, hlp - SHLQ $32, acc0 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc0 ADDQ acc0, acc1 - ADCQ hlp, acc2 - ADCQ mul0, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc0 + ADDQ hlp, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc0 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 ADCQ $0, mul1 MOVQ mul1, acc0 // Second reduction step - MOVQ acc1, mul0 - MOVQ acc1, hlp - SHLQ $32, acc1 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc1 ADDQ acc1, acc2 - ADCQ hlp, acc3 - ADCQ mul0, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc1 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc1 + ADDQ hlp, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc0 ADCQ $0, mul1 MOVQ mul1, acc1 // Third reduction step - MOVQ acc2, mul0 - MOVQ acc2, hlp - SHLQ $32, acc2 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc2 ADDQ acc2, acc3 - ADCQ hlp, acc0 - ADCQ mul0, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc2 + ADDQ hlp, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc2 + ADDQ hlp, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc1 ADCQ $0, mul1 MOVQ mul1, acc2 // Last reduction step - MOVQ acc3, mul0 - MOVQ acc3, hlp - SHLQ $32, acc3 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc3 ADDQ acc3, acc0 - ADCQ hlp, acc1 - ADCQ mul0, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc3 + ADDQ hlp, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc3 + ADDQ hlp, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc2 ADCQ $0, mul1 MOVQ mul1, acc3 MOVQ $0, BP @@ -1603,7 +1701,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$0 // Subtract p256 SUBQ $-1, acc4 SBBQ p256const0<>(SB) ,acc5 - SBBQ $0, acc6 + SBBQ $-1, acc6 SBBQ p256const1<>(SB), acc7 SBBQ $0, hlp // If the result of the subtraction is negative, restore the previous result @@ -1687,47 +1785,91 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0 ADCQ mul0, t2 ADCQ DX, t3 // First reduction step - MOVQ acc0, mul0 - MOVQ acc0, hlp - SHLQ $32, acc0 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc0 ADDQ acc0, acc1 - ADCQ hlp, acc2 - ADCQ mul0, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc0 + ADDQ hlp, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc0 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 ADCQ $0, mul1 MOVQ mul1, acc0 // Second reduction step - MOVQ acc1, mul0 - MOVQ acc1, hlp - SHLQ $32, acc1 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc1 ADDQ acc1, acc2 - ADCQ hlp, acc3 - ADCQ mul0, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc2 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc1 + ADDQ hlp, acc3 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc1 + ADDQ hlp, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc0 ADCQ $0, mul1 MOVQ mul1, acc1 // Third reduction step - MOVQ acc2, mul0 - MOVQ acc2, hlp - SHLQ $32, acc2 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc2 ADDQ acc2, acc3 - ADCQ hlp, acc0 - ADCQ mul0, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc3 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc2 + ADDQ hlp, acc0 + ADCQ $0, mul1 + ADDQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc2 + ADDQ hlp, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc1 ADCQ $0, mul1 MOVQ mul1, acc2 // Last reduction step - MOVQ acc3, mul0 - MOVQ acc3, hlp - SHLQ $32, acc3 - MULQ p256const1<>(SB) - SHRQ $32, hlp + MOVQ p256p<>+0x08(SB), mul0 + MULQ acc3 ADDQ acc3, acc0 - ADCQ hlp, acc1 - ADCQ mul0, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc0 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x010(SB), mul0 + MULQ acc3 + ADDQ hlp, acc1 + ADCQ $0, mul1 + ADDQ mul0, acc1 + ADCQ $0, mul1 + MOVQ mul1, hlp + MOVQ p256p<>+0x018(SB), mul0 + MULQ acc3 + ADDQ hlp, acc2 + ADCQ $0, mul1 + ADDQ mul0, acc2 ADCQ $0, mul1 MOVQ mul1, acc3 MOVQ $0, BP @@ -1745,7 +1887,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0 // Subtract p256 SUBQ $-1, acc4 SBBQ p256const0<>(SB) ,acc5 - SBBQ $0, acc6 + SBBQ $-1, acc6 SBBQ p256const1<>(SB), acc7 SBBQ $0, hlp // If the result of the subtraction is negative, restore the previous result @@ -1769,7 +1911,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0 MOVQ acc7, t3;\ SUBQ $-1, t0;\ SBBQ p256const0<>(SB), t1;\ - SBBQ $0, t2;\ + SBBQ $-1, t2;\ SBBQ p256const1<>(SB), t3;\ SBBQ $0, mul0;\ CMOVQCS acc4, t0;\ @@ -1790,7 +1932,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0 MOVQ acc7, t3;\ SUBQ $-1, t0;\ SBBQ p256const0<>(SB), t1;\ - SBBQ $0, t2;\ + SBBQ $-1, t2;\ SBBQ p256const1<>(SB), t3;\ SBBQ $0, mul0;\ CMOVQCS acc4, t0;\ @@ -1864,7 +2006,7 @@ TEXT ·p256PointAddAffineAsm(SB),0,$512-96 MOVQ (16*2 + 8*3)(CX), acc7 MOVQ $-1, acc0 MOVQ p256const0<>(SB), acc1 - MOVQ $0, acc2 + MOVQ $-1, acc2 MOVQ p256const1<>(SB), acc3 XORQ mul0, mul0 // Speculatively subtract @@ -1880,7 +2022,7 @@ TEXT ·p256PointAddAffineAsm(SB),0,$512-96 // Add in case the operand was > p256 ADDQ $-1, acc0 ADCQ p256const0<>(SB), acc1 - ADCQ $0, acc2 + ADCQ $-1, acc2 ADCQ p256const1<>(SB), acc3 ADCQ $0, mul0 CMOVQNE t0, acc0 @@ -2101,6 +2243,7 @@ TEXT sm2P256IsZero(SB),NOSPLIT,$0 // XOR [acc4..acc7] with P and compare with zero again. XORQ $-1, acc4 XORQ p256const0<>(SB), acc5 + XORQ $-1, acc6 XORQ p256const1<>(SB), acc7 ORQ acc5, acc4 ORQ acc6, acc4 @@ -2384,7 +2527,7 @@ TEXT ·p256PointDoubleAsm(SB),NOSPLIT,$256-48 ADDQ $-1, acc4 ADCQ p256const0<>(SB), acc5 - ADCQ $0, acc6 + ADCQ $-1, acc6 ADCQ p256const1<>(SB), acc7 ADCQ $0, mul0 TESTQ $1, t0 diff --git a/sm2/p256_asm_test.go b/sm2/p256_asm_test.go index 60a9238..ab24b52 100644 --- a/sm2/p256_asm_test.go +++ b/sm2/p256_asm_test.go @@ -3,6 +3,7 @@ package sm2 import ( + "crypto/rand" "encoding/hex" "fmt" "math/big" @@ -64,17 +65,231 @@ func Test_p256Sqr(t *testing.T) { t.FailNow() } gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05} - p256Sqr(res, gx, 1) - //p256FromMont(res, res) + p256Sqr(res, gx, 2) resInt := toBigInt(res) fmt.Printf("1=%s\n", hex.EncodeToString(resInt.Bytes())) gxsqr := new(big.Int).Mul(x, x) gxsqr = new(big.Int).Mod(gxsqr, p) + gxsqr = new(big.Int).Mul(gxsqr, gxsqr) + gxsqr = new(big.Int).Mod(gxsqr, p) gxsqr = new(big.Int).Mul(gxsqr, r) gxsqr = new(big.Int).Mod(gxsqr, p) fmt.Printf("2=%s\n", hex.EncodeToString(gxsqr.Bytes())) if resInt.Cmp(gxsqr) != 0 { t.FailNow() } - +} + +func Test_p256Mul(t *testing.T) { + r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) + p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) + x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) + y, _ := new(big.Int).SetString("BC3736A2F4F6779C59BDCEE36B692153D0A9877CC62A474002DF32E52139F0A0", 16) + res := make([]uint64, 4) + gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05} + gy := []uint64{0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd} + + p256Mul(res, gx, gy) + resInt := toBigInt(res) + fmt.Printf("1=%s\n", hex.EncodeToString(resInt.Bytes())) + xmy := new(big.Int).Mul(x, y) + xmy = new(big.Int).Mod(xmy, p) + xmy = new(big.Int).Mul(xmy, r) + xmy = new(big.Int).Mod(xmy, p) + fmt.Printf("2=%s\n", hex.EncodeToString(xmy.Bytes())) + if resInt.Cmp(xmy) != 0 { + t.FailNow() + } +} + +func Test_p256MulSqr(t *testing.T) { + r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) + p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) + x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) + res := make([]uint64, 4) + gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05} + + p256Sqr(res, gx, 32) + resInt := toBigInt(res) + fmt.Printf("0=%s\n", hex.EncodeToString(resInt.Bytes())) + + p256Mul(res, gx, gx) + for i := 0; i < 31; i++ { + p256Mul(res, res, res) + } + resInt1 := toBigInt(res) + fmt.Printf("1=%s\n", hex.EncodeToString(resInt1.Bytes())) + + resInt2 := new(big.Int).Mod(x, p) + + for i := 0; i < 32; i++ { + resInt2 = new(big.Int).Mul(resInt2, resInt2) + resInt2 = new(big.Int).Mod(resInt2, p) + } + resInt2 = new(big.Int).Mul(resInt2, r) + resInt2 = new(big.Int).Mod(resInt2, p) + fmt.Printf("2=%s\n", hex.EncodeToString(resInt2.Bytes())) + + if resInt.Cmp(resInt2) != 0 || resInt1.Cmp(resInt2) != 0 { + t.FailNow() + } +} + +func Test_p256OrdSqr(t *testing.T) { + r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) + n, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16) + x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) + gx := make([]uint64, 4) + res := make([]uint64, 4) + xm := new(big.Int).Mul(x, r) + xm = new(big.Int).Mod(xm, n) + p256BigToLittle(gx, xm.Bytes()) + p256OrdMul(res, gx, gx) + resInt := toBigInt(res) + fmt.Printf("p256OrdMul=%s\n", hex.EncodeToString(resInt.Bytes())) + gxsqr := new(big.Int).Mul(x, x) + gxsqr = new(big.Int).Mod(gxsqr, n) + gxsqr = new(big.Int).Mul(gxsqr, r) + gxsqr = new(big.Int).Mod(gxsqr, n) + fmt.Printf("2=%s\n", hex.EncodeToString(gxsqr.Bytes())) + if resInt.Cmp(gxsqr) != 0 { + t.FailNow() + } + p256OrdSqr(res, gx, 1) + resInt = toBigInt(res) + fmt.Printf("p256OrdSqr=%s\n", hex.EncodeToString(resInt.Bytes())) + if resInt.Cmp(gxsqr) != 0 { + t.FailNow() + } +} + +func Test_p256Inverse(t *testing.T) { + r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16) + p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF00000000FFFFFFFFFFFFFFFF", 16) + x, _ := new(big.Int).SetString("32C4AE2C1F1981195F9904466A39C9948FE30BBFF2660BE1715A4589334C74C7", 16) + gx := []uint64{0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05} + res := make([]uint64, 4) + p256Inverse(res, gx) + resInt := toBigInt(res) + fmt.Printf("p256Inverse=%s\n", hex.EncodeToString(resInt.Bytes())) + xInv := new(big.Int).ModInverse(x, p) + xInv = new(big.Int).Mul(xInv, r) + xInv = new(big.Int).Mod(xInv, p) + fmt.Printf("expected=%s\n", hex.EncodeToString(xInv.Bytes())) + if resInt.Cmp(xInv) != 0 { + t.FailNow() + } +} + +func Test_p256PointAddAsm_basepoint(t *testing.T) { + curve1 := P256() + params := curve1.Params() + basePoint := []uint64{ + 0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05, + 0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd, + 0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000, + } + in := make([]uint64, 12) + res := make([]uint64, 12) + copy(in, basePoint) + p256PointDoubleAsm(res, in) + n := p256PointAddAsm(res, res, in) + fmt.Printf("n=%d\n", n) + var r p256Point + copy(r.xyz[:], res) + x1, y1 := r.p256PointToAffine() + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x1.Bytes()), hex.EncodeToString(y1.Bytes())) + + x2, y2 := params.Double(params.Gx, params.Gy) + x2, y2 = params.Add(params.Gx, params.Gy, x2, y2) + fmt.Printf("x2=%s, y2=%s\n", hex.EncodeToString(x2.Bytes()), hex.EncodeToString(y2.Bytes())) + if x1.Cmp(x2) != 0 || y1.Cmp(y2) != 0 { + t.FailNow() + } +} + +func Test_p256PointDoubleAsm(t *testing.T) { + basePoint := []uint64{ + 0x61328990f418029e, 0x3e7981eddca6c050, 0xd6a1ed99ac24c3c3, 0x91167a5ee1c13b05, + 0xc1354e593c2d0ddd, 0xc1f5e5788d3295fa, 0x8d4cfb066e2a48f8, 0x63cd65d481d735bd, + 0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000, + } + t1 := make([]uint64, 12) + copy(t1, basePoint) + for i := 0; i < 16; i++ { + p256PointDoubleAsm(t1, t1) + } + var r p256Point + copy(r.xyz[:], t1) + x1, y1 := r.p256PointToAffine() + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x1.Bytes()), hex.EncodeToString(y1.Bytes())) + curve1 := P256() + params := curve1.Params() + x2, y2 := params.Double(params.Gx, params.Gy) + for i := 0; i < 15; i++ { + x2, y2 = params.Double(x2, y2) + } + fmt.Printf("x2=%s, y2=%s\n", hex.EncodeToString(x2.Bytes()), hex.EncodeToString(y2.Bytes())) + if x1.Cmp(x2) != 0 || y1.Cmp(y2) != 0 { + t.FailNow() + } +} + +func Test_ScalarBaseMult(t *testing.T) { + scalar := big.NewInt(0xffffffff) + curve1 := P256() + x1, y1 := curve1.ScalarBaseMult(scalar.Bytes()) + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x1.Bytes()), hex.EncodeToString(y1.Bytes())) + params := curve1.Params() + x2, y2 := params.ScalarBaseMult(scalar.Bytes()) + fmt.Printf("x2=%s, y2=%s\n", hex.EncodeToString(x2.Bytes()), hex.EncodeToString(y2.Bytes())) + if x1.Cmp(x2) != 0 || y1.Cmp(y2) != 0 { + t.FailNow() + } +} + +func Test_p256PointAddAsm(t *testing.T) { + curve1 := P256() + params := curve1.Params() + k1, _ := randFieldElement(params, rand.Reader) + x1, y1 := params.ScalarBaseMult(k1.Bytes()) + k2, _ := randFieldElement(params, rand.Reader) + x2, y2 := params.ScalarBaseMult(k2.Bytes()) + x3, y3 := params.Add(x1, y1, x2, y2) + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x3.Bytes()), hex.EncodeToString(y3.Bytes())) + var in1, in2, r p256Point + fromBig(in1.xyz[0:4], maybeReduceModP(x1)) + fromBig(in1.xyz[4:8], maybeReduceModP(y1)) + fromBig(in2.xyz[0:4], maybeReduceModP(x2)) + fromBig(in2.xyz[4:8], maybeReduceModP(y2)) + in1.xyz[8] = 0x0000000000000001 + in1.xyz[9] = 0x00000000ffffffff + in1.xyz[10] = 0x0000000000000000 + in1.xyz[11] = 0x0000000100000000 + in2.xyz[8] = 0x0000000000000001 + in2.xyz[9] = 0x00000000ffffffff + in2.xyz[10] = 0x0000000000000000 + in2.xyz[11] = 0x0000000100000000 + res := make([]uint64, 12) + n := p256PointAddAsm(res, in1.xyz[:], in2.xyz[:]) + fmt.Printf("n=%d\n", n) + copy(r.xyz[:], res) + x4, y4 := r.p256PointToAffine() + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x4.Bytes()), hex.EncodeToString(y4.Bytes())) + if x3.Cmp(x4) != 0 || y3.Cmp(y4) != 0 { + t.FailNow() + } +} + +func Test_ScalarMult_basepoint(t *testing.T) { + scalar := big.NewInt(0xffffffff) + curve1 := P256() + x1, y1 := curve1.ScalarMult(curve1.Params().Gx, curve1.Params().Gy, scalar.Bytes()) + fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x1.Bytes()), hex.EncodeToString(y1.Bytes())) + params := curve1.Params() + x2, y2 := params.ScalarMult(curve1.Params().Gx, curve1.Params().Gy, scalar.Bytes()) + fmt.Printf("x2=%s, y2=%s\n", hex.EncodeToString(x2.Bytes()), hex.EncodeToString(y2.Bytes())) + if x1.Cmp(x2) != 0 || y1.Cmp(y2) != 0 { + t.FailNow() + } }