diff --git a/README.md b/README.md index d35171c..44abd38 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,13 @@ PASS ok gmsm/sm2 1.564s + goos: windows(with amd64 curve implementation, first version) + goarch: amd64 + pkg: github.com/emmansun/gmsm/sm2 + BenchmarkLessThan32_P256SM2-6 10447 115618 ns/op 2357 B/op 46 allocs/op + PASS + ok github.com/emmansun/gmsm/sm2 2.199s + P-256 goos: windows goarch: amd64 diff --git a/sm2/p256_asm.go b/sm2/p256_asm.go index 3bbdc83..0ab8e13 100644 --- a/sm2/p256_asm.go +++ b/sm2/p256_asm.go @@ -109,6 +109,10 @@ func p256PointAddAsm(res, in1, in2 []uint64) int //go:noescape func p256PointDoubleAsm(res, in []uint64) +var ( + p256one = []uint64{0x0000000000000001, 0x00000000ffffffff, 0x0000000000000000, 0x0000000100000000} +) + // Inverse, implements invertible interface, need to test this function's correctness func (curve p256Curve) Inverse(k *big.Int) *big.Int { if k.Sign() < 0 { @@ -224,7 +228,7 @@ func p256GetScalar(out []uint64, in []byte) { // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the // underlying field of the curve. (See initP256 for the value.) Thus rr here is // R×R mod p. See comment in Inverse about how this is used. -var rr = []uint64{0x0000000000000003, 0x00000002ffffffff, 0x0000000100000001, 0x0000000400000002} +var rr = []uint64{0x200000003, 0x2ffffffff, 0x100000001, 0x400000002} func maybeReduceModP(in *big.Int) *big.Int { if in.Cmp(p256.P) < 0 { @@ -248,10 +252,10 @@ func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []by p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:]) // This sets r2's Z value to 1, in the Montgomery domain. - r2.xyz[8] = 0x0000000000000001 - r2.xyz[9] = 0x00000000ffffffff - r2.xyz[10] = 0x0000000000000000 - r2.xyz[11] = 0x0000000100000000 + r2.xyz[8] = p256one[0] + r2.xyz[9] = p256one[1] + r2.xyz[10] = p256one[2] + r2.xyz[11] = p256one[3] r2.p256ScalarMult(scalarReversed) @@ -284,10 +288,10 @@ func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:]) p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:]) // This sets r2's Z value to 1, in the Montgomery domain. - r.xyz[8] = 0x0000000000000001 - r.xyz[9] = 0x00000000ffffffff - r.xyz[10] = 0x0000000000000000 - r.xyz[11] = 0x0000000100000000 + r.xyz[8] = p256one[0] + r.xyz[9] = p256one[1] + r.xyz[10] = p256one[2] + r.xyz[11] = p256one[3] r.p256ScalarMult(scalarReversed) return r.p256PointToAffine() @@ -477,17 +481,17 @@ func (p *p256Point) p256BaseMult(scalar []uint64) { p256NegCond(p.xyz[4:8], sign) // (This is one, in the Montgomery domain.) - p.xyz[8] = 0x0000000000000001 - p.xyz[9] = 0x00000000ffffffff - p.xyz[10] = 0x0000000000000000 - p.xyz[11] = 0x0000000100000000 + p.xyz[8] = p256one[0] + p.xyz[9] = p256one[1] + p.xyz[10] = p256one[2] + p.xyz[11] = p256one[3] var t0 p256Point // (This is one, in the Montgomery domain.) - t0.xyz[8] = 0x0000000000000001 - t0.xyz[9] = 0x00000000ffffffff - t0.xyz[10] = 0x0000000000000000 - t0.xyz[11] = 0x0000000100000000 + t0.xyz[8] = p256one[0] + t0.xyz[9] = p256one[1] + t0.xyz[10] = p256one[2] + t0.xyz[11] = p256one[3] index := uint(5) zero := sel diff --git a/sm2/p256_asm_test.go b/sm2/p256_asm_test.go index ab24b52..86e2425 100644 --- a/sm2/p256_asm_test.go +++ b/sm2/p256_asm_test.go @@ -257,7 +257,7 @@ func Test_p256PointAddAsm(t *testing.T) { x2, y2 := params.ScalarBaseMult(k2.Bytes()) x3, y3 := params.Add(x1, y1, x2, y2) fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x3.Bytes()), hex.EncodeToString(y3.Bytes())) - var in1, in2, r p256Point + var in1, in2, rp p256Point fromBig(in1.xyz[0:4], maybeReduceModP(x1)) fromBig(in1.xyz[4:8], maybeReduceModP(y1)) fromBig(in2.xyz[0:4], maybeReduceModP(x2)) @@ -270,13 +270,17 @@ func Test_p256PointAddAsm(t *testing.T) { in2.xyz[9] = 0x00000000ffffffff in2.xyz[10] = 0x0000000000000000 in2.xyz[11] = 0x0000000100000000 + p256Mul(in1.xyz[0:4], in1.xyz[0:4], rr[:]) + p256Mul(in1.xyz[4:8], in1.xyz[4:8], rr[:]) + p256Mul(in2.xyz[0:4], in2.xyz[0:4], rr[:]) + p256Mul(in2.xyz[4:8], in2.xyz[4:8], rr[:]) res := make([]uint64, 12) n := p256PointAddAsm(res, in1.xyz[:], in2.xyz[:]) fmt.Printf("n=%d\n", n) - copy(r.xyz[:], res) - x4, y4 := r.p256PointToAffine() + copy(rp.xyz[:], res) + x4, y4 := rp.p256PointToAffine() fmt.Printf("x1=%s, y1=%s\n", hex.EncodeToString(x4.Bytes()), hex.EncodeToString(y4.Bytes())) - if x3.Cmp(x4) != 0 || y3.Cmp(y4) != 0 { + if n == 0 && (x3.Cmp(x4) != 0 || y3.Cmp(y4) != 0) { t.FailNow() } }