From 5f72151e74a22b62fae4b880722f09827121c51a Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Wed, 5 Jul 2023 17:58:19 +0800 Subject: [PATCH] sm9/bn256: special square for final exp & optimize gfp2/gfp12 square #137 #139 --- sm9/bn256/bn_pair.go | 69 +++++----- sm9/bn256/gfp.go | 1 + sm9/bn256/gfp12.go | 276 ++++++++++++++++++++++++++------------- sm9/bn256/gfp12_exp_u.go | 17 +-- sm9/bn256/gfp12_test.go | 131 +++++++++++++++++++ sm9/bn256/gfp2.go | 78 +++-------- sm9/bn256/gfp2_test.go | 12 ++ sm9/bn256/gfp4.go | 57 +------- sm9/bn256/gfp4_test.go | 19 +++ sm9/bn256/gfp_decl.go | 1 + 10 files changed, 420 insertions(+), 241 deletions(-) diff --git a/sm9/bn256/bn_pair.go b/sm9/bn256/bn_pair.go index fe79b77..0435f8c 100644 --- a/sm9/bn256/bn_pair.go +++ b/sm9/bn256/bn_pair.go @@ -10,7 +10,7 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { D.Square(D).Sub(D, r2).Sub(D, &r.t).Mul(D, &r.t) // D = ((Yp + Zr)^2 - Zr^2 - Yp^2)*Zr^2 = 2Yp*Zr^3 H := (&gfP2{}).Sub(B, &r.x) // H = Xp * Zr^2 - Xr - I := (&gfP2{}).SquareNC(H) // I = (Xp * Zr^2 - Xr)^2 = Xp^2*Zr^4 + Xr^2 - 2Xr*Xp*Zr^2 + I := (&gfP2{}).SquareNC(H) // I = (Xp * Zr^2 - Xr)^2 = Xp^2*Zr^4 + Xr^2 - 2Xr*Xp*Zr^2 E := (&gfP2{}).Add(I, I) // E = 2*(Xp * Zr^2 - Xr)^2 E.Add(E, E) // E = 4*(Xp * Zr^2 - Xr)^2 @@ -37,8 +37,8 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t) // t = (Yp + rOut.Z)^2 - Yp^2 - rOut.Z^2 = 2Yp*rOut.Z t2.Mul(L1, &p.x) - t2.Add(t2, t2) // t2 = 2 L1 * Xp - a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z + t2.Add(t2, t2) // t2 = 2 L1 * Xp + a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z c.MulScalar(&rOut.z, &q.y) c.Add(c, c) @@ -139,9 +139,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 { ret.Square(ret) } mulLine(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR switch sixUPlus2NAF[i-1] { case 1: lineFunctionAdd(r, aAffine, newR, bAffine, r2, a, b, c) @@ -152,9 +152,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 { } mulLine(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR } // In order to calculate Q1 we have to convert q from the sextic twist @@ -187,9 +187,9 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 { r2.Square(&q1.y) lineFunctionAdd(r, q1, newR, bAffine, r2, a, b, c) mulLine(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR r2.Square(&minusQ2.y) lineFunctionAdd(r, minusQ2, newR, bAffine, r2, a, b, c) @@ -202,51 +202,50 @@ func miller(q *twistPoint, p *curvePoint) *gfP12 { // GF(p¹²) to obtain an element of GT. https://eprint.iacr.org/2007/390.pdf // http://cryptojedi.org/papers/dclxvi-20100714.pdf func finalExponentiation(in *gfP12) *gfP12 { - t1 := &gfP12{} - // This is the p^6-Frobenius - t1.FrobeniusP6(in) + t1 := (&gfP12{}).FrobeniusP6(in) - inv := &gfP12{} - inv.Invert(in) + inv := (&gfP12{}).Invert(in) t1.Mul(t1, inv) - t2 := (&gfP12{}).FrobeniusP2(t1) - t1.Mul(t1, t2) + t2 := inv.FrobeniusP2(t1) // reuse inv + t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation fp := (&gfP12{}).Frobenius(t1) fp2 := (&gfP12{}).FrobeniusP2(t1) fp3 := (&gfP12{}).Frobenius(fp2) - fu := (&gfP12{}).gfP12ExpU(t1) - fu2 := (&gfP12{}).gfP12ExpU(fu) - fu3 := (&gfP12{}).gfP12ExpU(fu2) + y0 := &gfP12{} + y0.MulNC(fp, fp2).Mul(y0, fp3) // y0 = (t1^p) * (t1^(p^2)) * (t1^(p^3)) + + // reuse fp, fp2, fp3 local variables + // [gfP12ExpU] is most time consuming operation + fu := fp.gfP12ExpU(t1) + fu2 := fp2.gfP12ExpU(fu) + fu3 := fp3.gfP12ExpU(fu2) - y3 := (&gfP12{}).Frobenius(fu) fu2p := (&gfP12{}).Frobenius(fu2) fu3p := (&gfP12{}).Frobenius(fu3) - y2 := (&gfP12{}).FrobeniusP2(fu2) - y0 := &gfP12{} - y0.MulNC(fp, fp2).Mul(y0, fp3) + y1 := (&gfP12{}).Conjugate(t1) // y1 = 1 / t1 + y2 := (&gfP12{}).FrobeniusP2(fu2) // y2 = (t1^(u^2))^(p^2) + y3 := (&gfP12{}).Frobenius(fu) // y3 = (t1^u)^p + y3.Conjugate(y3) // y3 = 1 / (t1^u)^p + y4 := (&gfP12{}).MulNC(fu, fu2p) // y4 = (t1^u) * ((t1^(u^2))^p) + y4.Conjugate(y4) // y4 = 1 / ((t1^u) * ((t1^(u^2))^p)) + y5 := fu2p.Conjugate(fu2) // y5 = 1 / t1^(u^2), reuse fu2p + y6 := (&gfP12{}).MulNC(fu3, fu3p) // y6 = t1^(u^3) * (t1^(u^3))^p + y6.Conjugate(y6) // y6 = 1 / (t1^(u^3) * (t1^(u^3))^p) - y1 := (&gfP12{}).Conjugate(t1) - y5 := (&gfP12{}).Conjugate(fu2) - y3.Conjugate(y3) - y4 := (&gfP12{}).MulNC(fu, fu2p) - y4.Conjugate(y4) - - y6 := (&gfP12{}).MulNC(fu3, fu3p) - y6.Conjugate(y6) - - t0 := (&gfP12{}).SquareNC(y6) + // https://eprint.iacr.org/2008/490.pdf + t0 := (&gfP12{}).SpecialSquareNC(y6) t0.Mul(t0, y4).Mul(t0, y5) t1.Mul(y3, y5).Mul(t1, t0) t0.Mul(t0, y2) - t1.Square(t1).Mul(t1, t0).Square(t1) + t1.SpecialSquare(t1).Mul(t1, t0).SpecialSquare(t1) t0.Mul(t1, y1) t1.Mul(t1, y0) - t0.Square(t0).Mul(t0, t1) + t0.SpecialSquare(t0).Mul(t0, t1) return t0 } diff --git a/sm9/bn256/gfp.go b/sm9/bn256/gfp.go index f834a07..9e0e5d7 100644 --- a/sm9/bn256/gfp.go +++ b/sm9/bn256/gfp.go @@ -9,6 +9,7 @@ import ( type gfP [4]uint64 +var genericZero = &gfP{0} var zero = newGFp(0) var one = newGFp(1) var two = newGFp(2) diff --git a/sm9/bn256/gfp12.go b/sm9/bn256/gfp12.go index 8070fd0..50f2344 100644 --- a/sm9/bn256/gfp12.go +++ b/sm9/bn256/gfp12.go @@ -140,36 +140,7 @@ func (e *gfP12) Mul(a, b *gfP12) *gfP12 { // +x0*z1*w^2 + x0*y1*v + x0*x1*v*w //=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2 tmp := &gfP12{} - tx := &tmp.x - ty := &tmp.y - tz := &tmp.z - t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} - v0.MulNC(&a.z, &b.z) - v1.MulNC(&a.y, &b.y) - v2.MulNC(&a.x, &b.x) - - t.Add(&a.y, &a.x) - tz.Add(&b.y, &b.x) - t.Mul(t, tz) - t.Sub(t, v1) - t.Sub(t, v2) - t.MulV1(t) - tz.Add(t, v0) - - t.Add(&a.z, &a.y) - ty.Add(&b.z, &b.y) - ty.Mul(t, ty) - ty.Sub(ty, v0) - ty.Sub(ty, v1) - t.MulV1(v2) - ty.Add(ty, t) - - t.Add(&a.z, &a.x) - tx.Add(&b.z, &b.x) - tx.Mul(tx, t) - tx.Sub(tx, v0) - tx.Add(tx, v1) - tx.Sub(tx, v2) + tmp.MulNC(a, b) gfp12Copy(e, tmp) return e } @@ -180,6 +151,7 @@ func (e *gfP12) MulNC(a, b *gfP12) *gfP12 { // +y0*z1*w + y0*y1*w^2 + y0*x1*v // +x0*z1*w^2 + x0*y1*v + x0*x1*v*w //=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2 + // Karatsuba method tx := &e.x ty := &e.y tz := &e.z @@ -219,25 +191,7 @@ func (e *gfP12) Square(a *gfP12) *gfP12 { // (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2 // (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2 tmp := &gfP12{} - tx := &tmp.x - ty := &tmp.y - tz := &tmp.z - t := &gfP4{} - - tz.SquareNC(&a.z) - t.MulV(&a.x, &a.y) - t.Add(t, t) - tz.Add(tz, t) - - ty.SquareVNC(&a.x) - t.Mul(&a.y, &a.z) - t.Add(t, t) - ty.Add(ty, t) - - tx.SquareNC(&a.y) - t.Mul(&a.x, &a.z) - t.Add(t, t) - tx.Add(tx, t) + tmp.SquareNC(a) gfp12Copy(e, tmp) return e } @@ -247,25 +201,148 @@ func (e *gfP12) SquareNC(a *gfP12) *gfP12 { // z^2 + z*y*w + z*x*w^2 + y*z*w + y^2*w^2 + y*x*v + x*z*w^2 + x*y*v + x^2 *v *w // (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2 // (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2 + // Karatsuba method tx := &e.x ty := &e.y tz := &e.z - t := &gfP4{} + t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} + v0.SquareNC(&a.z) + v1.SquareNC(&a.y) + v2.SquareNC(&a.x) - tz.SquareNC(&a.z) - t.MulV(&a.x, &a.y) - t.Add(t, t) - tz.Add(tz, t) + t.Add(&a.y, &a.x) + tz.SquareNC(t) + tz.Sub(tz, v1) + tz.Sub(tz, v2) + tz.MulV1(tz) + tz.Add(tz, v0) - ty.SquareVNC(&a.x) - t.Mul(&a.y, &a.z) - t.Add(t, t) + t.Add(&a.z, &a.y) + ty.SquareNC(t) + ty.Sub(ty, v0) + ty.Sub(ty, v1) + t.MulV1(v2) ty.Add(ty, t) - tx.SquareNC(&a.y) - t.Mul(&a.x, &a.z) - t.Add(t, t) - tx.Add(tx, t) + t.Add(&a.z, &a.x) + tx.SquareNC(t) + tx.Sub(tx, v0) + tx.Add(tx, v1) + tx.Sub(tx, v2) + + return e +} + +// Special squaring for use on elements in T_6(fp2) (after the +// easy part of the final exponentiation. Used in the hard part +// of the final exponentiation. Function uses formulas in +// Granger/Scott (PKC2010). +func (e *gfP12) SpecialSquare(a *gfP12) *gfP12 { + tmp := &gfP12{} + tmp.SpecialSquareNC(a) + gfp12Copy(e, tmp) + return e +} + +func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { + // Square first round + in := &gfP12{} + tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{} + + v0 := &in.x + v1 := &in.y + v2 := &in.z + + v0.SquareVNC(&a.x) // (t02, t10) + v1.SquareNC(&a.y) // (t12, t01) + v2.SquareNC(&a.z) // (t11, t00) + + tx.Add(v0, v0) + tx.Add(v0, tx) + ty.Add(v1, v1) + ty.Add(v1, ty) + tz.Add(v2, v2) + tz.Add(v2, tz) + + v0.Add(&a.x, &a.x) // (f12, f01) + v0.y.Neg(&v0.y) + v1.Add(&a.y, &a.y) // (f02, f10) + v1.x.Neg(&v1.x) + v2.Add(&a.z, &a.z) // (f11, f00) + v2.y.Neg(&v2.y) + + v0.Add(ty, v0) + v1.Add(tx, v1) + v2.Add(tz, v2) + + tmp := &gfP12{} + var tmp2 *gfP12 + + for i := 1; i < n; i++ { + v0 = &tmp.x + v1 = &tmp.y + v2 = &tmp.z + + v0.SquareVNC(&in.x) // (t02, t10) + v1.SquareNC(&in.y) // (t12, t01) + v2.SquareNC(&in.z) // (t11, t00) + + tx.Add(v0, v0) + tx.Add(v0, tx) + ty.Add(v1, v1) + ty.Add(v1, ty) + tz.Add(v2, v2) + tz.Add(v2, tz) + + v0.Add(&in.x, &in.x) // (f12, f01) + v0.y.Neg(&v0.y) + v1.Add(&in.y, &in.y) // (f02, f10) + v1.x.Neg(&v1.x) + v2.Add(&in.z, &in.z) // (f11, f00) + v2.y.Neg(&v2.y) + + v0.Add(ty, v0) + v1.Add(tx, v1) + v2.Add(tz, v2) + + // Switch references + tmp2 = in + in = tmp + tmp = tmp2 + } + gfp12Copy(e, in) + return e +} + +func (e *gfP12) SpecialSquareNC(a *gfP12) *gfP12 { + tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{} + + v0 := &e.x + v1 := &e.y + v2 := &e.z + + v0.SquareVNC(&a.x) // (t02, t10) + v1.SquareNC(&a.y) // (t12, t01) + v2.SquareNC(&a.z) // (t11, t00) + + tx.Add(v0, v0) + tx.Add(v0, tx) + ty.Add(v1, v1) + ty.Add(v1, ty) + tz.Add(v2, v2) + tz.Add(v2, tz) + + v0.Add(&a.x, &a.x) // (f12, f01) + v0.y.Neg(&v0.y) + v1.Add(&a.y, &a.y) // (f02, f10) + v1.x.Neg(&v1.x) + v2.Add(&a.z, &a.z) // (f11, f00) + v2.y.Neg(&v2.y) + + v0.Add(ty, v0) + v1.Add(tx, v1) + v2.Add(tz, v2) + return e } @@ -275,51 +352,68 @@ func (e *gfP12) Squares(a *gfP12, n int) *gfP12 { tx := &in.x ty := &in.y tz := &in.z - t := &gfP4{} + t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} - tz.SquareNC(&a.z) - t.MulV(&a.x, &a.y) - t.Add(t, t) - tz.Add(tz, t) + v0.SquareNC(&a.z) + v1.SquareNC(&a.y) + v2.SquareNC(&a.x) - ty.SquareVNC(&a.x) - t.Mul(&a.y, &a.z) - t.Add(t, t) + t.Add(&a.y, &a.x) + tz.SquareNC(t) + tz.Sub(tz, v1) + tz.Sub(tz, v2) + tz.MulV1(tz) + tz.Add(tz, v0) + + t.Add(&a.z, &a.y) + ty.SquareNC(t) + ty.Sub(ty, v0) + ty.Sub(ty, v1) + t.MulV1(v2) ty.Add(ty, t) - tx.SquareNC(&a.y) - t.Mul(&a.x, &a.z) - t.Add(t, t) - tx.Add(tx, t) + t.Add(&a.z, &a.x) + tx.SquareNC(t) + tx.Sub(tx, v0) + tx.Add(tx, v1) + tx.Sub(tx, v2) tmp := &gfP12{} var tmp2 *gfP12 - tx = &tmp.x - ty = &tmp.y - tz = &tmp.z - for i := 1; i < n; i++ { - tz.SquareNC(&in.z) - t.MulV(&in.x, &in.y) - t.Add(t, t) - tz.Add(tz, t) - ty.SquareVNC(&in.x) - t.Mul(&in.y, &in.z) - t.Add(t, t) + for i := 1; i < n; i++ { + tx = &tmp.x + ty = &tmp.y + tz = &tmp.z + + v0.SquareNC(&in.z) + v1.SquareNC(&in.y) + v2.SquareNC(&in.x) + + t.Add(&in.y, &in.x) + tz.SquareNC(t) + tz.Sub(tz, v1) + tz.Sub(tz, v2) + tz.MulV1(tz) + tz.Add(tz, v0) + + t.Add(&in.z, &in.y) + ty.SquareNC(t) + ty.Sub(ty, v0) + ty.Sub(ty, v1) + t.MulV1(v2) ty.Add(ty, t) - tx.SquareNC(&in.y) - t.Mul(&in.x, &in.z) - t.Add(t, t) - tx.Add(tx, t) + t.Add(&in.z, &in.x) + tx.SquareNC(t) + tx.Sub(tx, v0) + tx.Add(tx, v1) + tx.Sub(tx, v2) // Switch references tmp2 = in in = tmp tmp = tmp2 - tx = &tmp.x - ty = &tmp.y - tz = &tmp.z } gfp12Copy(e, in) return e diff --git a/sm9/bn256/gfp12_exp_u.go b/sm9/bn256/gfp12_exp_u.go index 04a4d54..198be7d 100644 --- a/sm9/bn256/gfp12_exp_u.go +++ b/sm9/bn256/gfp12_exp_u.go @@ -1,5 +1,6 @@ package bn256 +// Use special square func (e *gfP12) gfP12ExpU(x *gfP12) *gfP12 { // The sequence of 10 multiplications and 61 squarings is derived from the // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. @@ -20,23 +21,23 @@ func (e *gfP12) gfP12ExpU(x *gfP12) *gfP12 { var t2 = new(gfP12) var t3 = new(gfP12) - t2.SquareNC(x) - t1.SquareNC(t2) + t2.SpecialSquareNC(x) + t1.SpecialSquareNC(t2) z.MulNC(x, t1) t0.MulNC(t1, z) t2.Mul(t2, t0) t3.MulNC(x, t2) - t3.Squares(t3, 40) + t3.SpecialSquares(t3, 40) t3.Mul(t2, t3) - t3.Squares(t3, 7) + t3.SpecialSquares(t3, 7) t2.Mul(t2, t3) t1.Mul(t1, t2) - t1.Squares(t1, 4) + t1.SpecialSquares(t1, 4) t0.Mul(t0, t1) - t0.Square(t0) + t0.SpecialSquare(t0) t0.Mul(x, t0) - t0.Squares(t0, 6) + t0.SpecialSquares(t0, 6) z.Mul(z, t0) - z.Square(z) + z.SpecialSquare(z) return e } diff --git a/sm9/bn256/gfp12_test.go b/sm9/bn256/gfp12_test.go index bedd1c9..580a336 100644 --- a/sm9/bn256/gfp12_test.go +++ b/sm9/bn256/gfp12_test.go @@ -35,6 +35,31 @@ func Test_gfP12Square(t *testing.T) { } } +func TestSpecialSquare(t *testing.T) { + in := &gfP12{ + testdataP4, + testdataP4, + *(&gfP4{}).SetOne(), + } + + // This is the p^6-Frobenius + t1 := (&gfP12{}).FrobeniusP6(in) + + inv := (&gfP12{}).Invert(in) + t1.Mul(t1, inv) + + t2 := inv.FrobeniusP2(t1) // reuse inv + t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation + + got := &gfP12{} + expected := &gfP12{} + got.SpecialSquare(t1) + expected.Square(t1) + if *got != *expected { + t.Errorf("not same got=%v, expected=%v", got, expected) + } +} + func BenchmarkGfP12Square(b *testing.B) { x := &gfP12{ testdataP4, @@ -49,6 +74,52 @@ func BenchmarkGfP12Square(b *testing.B) { } } +func BenchmarkGfP12SpecialSquare(b *testing.B) { + in := &gfP12{ + testdataP4, + testdataP4, + *(&gfP4{}).SetOne(), + } + + // This is the p^6-Frobenius + t1 := (&gfP12{}).FrobeniusP6(in) + + inv := (&gfP12{}).Invert(in) + t1.Mul(t1, inv) + + t2 := inv.FrobeniusP2(t1) // reuse inv + t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation + x2 := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + x2.Square(t1) + } +} + +func BenchmarkGfP12SpecialSqures(b *testing.B) { + in := &gfP12{ + testdataP4, + testdataP4, + *(&gfP4{}).SetOne(), + } + + // This is the p^6-Frobenius + t1 := (&gfP12{}).FrobeniusP6(in) + + inv := (&gfP12{}).Invert(in) + t1.Mul(t1, inv) + + t2 := inv.FrobeniusP2(t1) // reuse inv + t1.Mul(t1, t2) // t1 = in ^ ((p^6 - 1) * (p^2 + 1)), the first two parts of the exponentiation + got := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + got.SpecialSquares(in, 61) + } +} + func testGfP12Invert(t *testing.T, x *gfP12) { xInv := &gfP12{} xInv.Invert(x) @@ -281,6 +352,20 @@ func Test_W3(t *testing.T) { } } +func BenchmarkGfP12Invert(b *testing.B) { + x := &gfP12{ + testdataP4, + testdataP4, + testdataP4, + } + got := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + got.Invert(x) + } +} + func BenchmarkGfP12Frobenius(b *testing.B) { x := &gfP12{ testdataP4, @@ -300,6 +385,48 @@ func BenchmarkGfP12Frobenius(b *testing.B) { } } +func BenchmarkGfP12Mul(b *testing.B) { + x := &gfP12{ + testdataP4, + testdataP4, + testdataP4, + } + got := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + got.Mul(x, x) + } +} + +func BenchmarkGfP12Squre(b *testing.B) { + x := &gfP12{ + testdataP4, + testdataP4, + testdataP4, + } + got := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + got.Square(x) + } +} + +func BenchmarkGfP12Squres(b *testing.B) { + x := &gfP12{ + testdataP4, + testdataP4, + testdataP4, + } + got := &gfP12{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + got.Squares(x, 61) + } +} + func BenchmarkGfP12ExpU(b *testing.B) { x := &gfP12{ testdataP4, @@ -311,6 +438,8 @@ func BenchmarkGfP12ExpU(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { got.gfP12ExpU(x) + got.gfP12ExpU(x) + got.gfP12ExpU(x) } } @@ -325,5 +454,7 @@ func BenchmarkGfP12ExpU2(b *testing.B) { b.ResetTimer() for i := 0; i < b.N; i++ { got.Exp(x, u) + got.Exp(x, u) + got.Exp(x, u) } } diff --git a/sm9/bn256/gfp2.go b/sm9/bn256/gfp2.go index 1e48251..0a17a99 100644 --- a/sm9/bn256/gfp2.go +++ b/sm9/bn256/gfp2.go @@ -64,13 +64,13 @@ func (e *gfP2) IsOne() bool { func (e *gfP2) Conjugate(a *gfP2) *gfP2 { e.y.Set(&a.y) - gfpNeg(&e.x, &a.x) + gfpSub(&e.x, genericZero, &a.x) return e } func (e *gfP2) Neg(a *gfP2) *gfP2 { - gfpNeg(&e.x, &a.x) - gfpNeg(&e.y, &a.y) + gfpSub(&e.x, genericZero, &a.x) + gfpSub(&e.y, genericZero, &a.y) return e } @@ -109,22 +109,7 @@ func (e *gfP2) Triple(a *gfP2) *gfP2 { // c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 func (e *gfP2) Mul(a, b *gfP2) *gfP2 { tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y - v0, v1 := &gfP{}, &gfP{} - - gfpMul(v0, &a.y, &b.y) - gfpMul(v1, &a.x, &b.x) - - gfpAdd(tx, &a.x, &a.y) - gfpAdd(ty, &b.x, &b.y) - gfpMul(tx, tx, ty) - gfpSub(tx, tx, v0) - gfpSub(tx, tx, v1) - - gfpSub(ty, v0, v1) - gfpSub(ty, ty, v1) - + tmp.MulNC(a, b) gfp2Copy(e, tmp) return e } @@ -170,7 +155,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 { gfpSub(ty, ty, v0) gfpSub(ty, ty, v1) gfpAdd(ty, ty, ty) - gfpNeg(ty, ty) + gfpSub(ty, genericZero, ty) gfpSub(tx, v0, v1) gfpSub(tx, tx, v1) @@ -186,7 +171,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 { func (e *gfP2) MulU1(a *gfP2) *gfP2 { t := &gfP{} gfpAdd(t, &a.x, &a.x) - gfpNeg(t, t) + gfpSub(t, genericZero, t) gfpCopy(&e.x, &a.y) gfpCopy(&e.y, t) @@ -197,15 +182,7 @@ func (e *gfP2) Square(a *gfP2) *gfP2 { // Complex squaring algorithm: // (xu+y)² = y^2-2*x^2 + 2*u*x*y tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y - gfpSqr(tx, &a.x, 1) - gfpSqr(ty, &a.y, 1) - gfpSub(ty, ty, tx) - gfpSub(ty, ty, tx) - - gfpMul(tx, &a.x, &a.y) - gfpAdd(tx, tx, tx) + tmp.SquareNC(a) gfp2Copy(e, tmp) return e } @@ -215,13 +192,15 @@ func (e *gfP2) SquareNC(a *gfP2) *gfP2 { // (xu+y)² = y^2-2*x^2 + 2*u*x*y tx := &e.x ty := &e.y - gfpSqr(tx, &a.x, 1) - gfpSqr(ty, &a.y, 1) - gfpSub(ty, ty, tx) - gfpSub(ty, ty, tx) + gfpAdd(ty, &a.x, &a.y) + gfpAdd(tx, &a.x, &a.x) + gfpSub(tx, &a.y, tx) + gfpMul(ty, tx, ty) gfpMul(tx, &a.x, &a.y) + gfpAdd(ty, tx, ty) gfpAdd(tx, tx, tx) + return e } @@ -230,20 +209,7 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 { // (xu+y)²*u = (y^2-2*x^2)u - 4*x*y tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y - // tx = a0^2 - 2 * a1^2 - gfpSqr(ty, &a.x, 1) - gfpSqr(tx, &a.y, 1) - gfpAdd(ty, ty, ty) - gfpSub(tx, tx, ty) - - // ty = -4 * a0 * a1 - gfpMul(ty, &a.x, &a.y) - gfpAdd(ty, ty, ty) - gfpAdd(ty, ty, ty) - gfpNeg(ty, ty) - + tmp.SquareUNC(a) gfp2Copy(e, tmp) return e } @@ -251,20 +217,18 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 { func (e *gfP2) SquareUNC(a *gfP2) *gfP2 { // Complex squaring algorithm: // (xu+y)²*u = (y^2-2*x^2)u - 4*x*y - tx := &e.x ty := &e.y - // tx = a0^2 - 2 * a1^2 - gfpSqr(ty, &a.x, 1) - gfpSqr(tx, &a.y, 1) - gfpAdd(ty, ty, ty) - gfpSub(tx, tx, ty) - // ty = -4 * a0 * a1 + gfpAdd(tx, &a.x, &a.y) + gfpAdd(ty, &a.x, &a.x) + gfpSub(ty, &a.y, ty) + gfpMul(tx, tx, ty) gfpMul(ty, &a.x, &a.y) + gfpAdd(tx, tx, ty) gfpAdd(ty, ty, ty) gfpAdd(ty, ty, ty) - gfpNeg(ty, ty) + gfpSub(ty, genericZero, ty) return e } @@ -287,7 +251,7 @@ func (e *gfP2) Invert(a *gfP2) *gfP2 { inv := &gfP{} inv.Invert(t3) // inv = (2 * a.x ^ 2 + a.y ^ 2) ^ (-1) - gfpNeg(t1, &a.x) + gfpSub(t1, genericZero, &a.x) gfpMul(&e.x, t1, inv) // x = - a.x * inv gfpMul(&e.y, &a.y, inv) // y = a.y * inv diff --git a/sm9/bn256/gfp2_test.go b/sm9/bn256/gfp2_test.go index b6fd6d7..92d17bb 100644 --- a/sm9/bn256/gfp2_test.go +++ b/sm9/bn256/gfp2_test.go @@ -172,6 +172,18 @@ func BenchmarkGfP2Square(b *testing.B) { } } +func BenchmarkGfP2SquareU(b *testing.B) { + x := &gfP2{ + *fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")), + *fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")), + } + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + x.SquareU(x) + } +} + /* func Test_gfP2QuadraticResidue(t *testing.T) { x := &gfP2{ diff --git a/sm9/bn256/gfp4.go b/sm9/bn256/gfp4.go index f1eb00a..a8931e5 100644 --- a/sm9/bn256/gfp4.go +++ b/sm9/bn256/gfp4.go @@ -99,21 +99,7 @@ func (e *gfP4) Mul(a, b *gfP4) *gfP4 { //c0 = a0*b0 +a1*b1*u //c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 tmp := &gfP4{} - tx := &tmp.x - ty := &tmp.y - v0, v1 := &gfP2{}, &gfP2{} - v0.MulNC(&a.y, &b.y) - v1.MulNC(&a.x, &b.x) - - tx.Add(&a.x, &a.y) - ty.Add(&b.x, &b.y) - tx.Mul(tx, ty) - tx.Sub(tx, v0) - tx.Sub(tx, v1) - - ty.MulU1(v1) - ty.Add(ty, v0) - + tmp.MulNC(a, b) gfp4Copy(e, tmp) return e } @@ -151,22 +137,7 @@ func (e *gfP4) MulNC(a, b *gfP4) *gfP4 { // c1 = a0*b0 + a1*b1*u func (e *gfP4) MulV(a, b *gfP4) *gfP4 { tmp := &gfP4{} - tx := &tmp.x - ty := &tmp.y - v0, v1 := &gfP2{}, &gfP2{} - v0.MulNC(&a.y, &b.y) - v1.MulNC(&a.x, &b.x) - - tx.Add(&a.x, &a.y) - ty.Add(&b.x, &b.y) - ty.Mul(tx, ty) - ty.Sub(ty, v0) - ty.Sub(ty, v1) - ty.MulU1(ty) - - tx.MulU1(v1) - tx.Add(tx, v0) - + tmp.MulVNC(a, b) gfp4Copy(e, tmp) return e } @@ -208,15 +179,7 @@ func (e *gfP4) Square(a *gfP4) *gfP4 { // Complex squaring algorithm: // (xv+y)² = (x^2*u + y^2) + 2*x*y*v tmp := &gfP4{} - tx := &tmp.x - ty := &tmp.y - tx.SquareUNC(&a.x) - ty.SquareNC(&a.y) - ty.Add(tx, ty) - - tx.Mul(&a.x, &a.y) - tx.Add(tx, tx) - + tmp.SquareNC(a) gfp4Copy(e, tmp) return e } @@ -224,13 +187,15 @@ func (e *gfP4) Square(a *gfP4) *gfP4 { func (e *gfP4) SquareNC(a *gfP4) *gfP4 { // Complex squaring algorithm: // (xv+y)² = (x^2*u + y^2) + 2*x*y*v + // = (xu + y)(x + y) -xy(1+u) + 2xy*v tx := &e.x ty := &e.y + tx.SquareUNC(&a.x) ty.SquareNC(&a.y) ty.Add(tx, ty) - tx.Mul(&a.x, &a.y) + tx.MulNC(&a.x, &a.y) tx.Add(tx, tx) return e @@ -240,15 +205,7 @@ func (e *gfP4) SquareNC(a *gfP4) *gfP4 { // v*(xv+y)² = (x^2*u + y^2)v + 2*x*y*u func (e *gfP4) SquareV(a *gfP4) *gfP4 { tmp := &gfP4{} - tx := &tmp.x - ty := &tmp.y - tx.SquareUNC(&a.x) - ty.SquareNC(&a.y) - tx.Add(tx, ty) - - ty.MulU(&a.x, &a.y) - ty.Add(ty, ty) - + tmp.SquareVNC(a) gfp4Copy(e, tmp) return e } diff --git a/sm9/bn256/gfp4_test.go b/sm9/bn256/gfp4_test.go index a875f54..e45127d 100644 --- a/sm9/bn256/gfp4_test.go +++ b/sm9/bn256/gfp4_test.go @@ -207,3 +207,22 @@ func BenchmarkGfP4Mul(b *testing.B) { t.Mul(x, y) } } + +func BenchmarkGfP4Square(b *testing.B) { + x := &gfP4{ + gfP2{ + *fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")), + *fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")), + }, + gfP2{ + *fromBigInt(bigFromHex("17509B092E845C1266BA0D262CBEE6ED0736A96FA347C8BD856DC76B84EBEB96")), + *fromBigInt(bigFromHex("A7CF28D519BE3DA65F3170153D278FF247EFBA98A71A08116215BBA5C999A7C7")), + }, + } + t := &gfP4{} + b.ReportAllocs() + b.ResetTimer() + for i := 0; i < b.N; i++ { + t.Square(x) + } +} diff --git a/sm9/bn256/gfp_decl.go b/sm9/bn256/gfp_decl.go index 7c0a4a1..ed426d1 100644 --- a/sm9/bn256/gfp_decl.go +++ b/sm9/bn256/gfp_decl.go @@ -16,6 +16,7 @@ import ( var supportADX = cpu.X86.HasADX && cpu.X86.HasBMI2 // Set c = p - a, if c == p, then c = 0 +// It seems this function's performance is worse than gfpSub with zero. // // go:noescape func gfpNeg(c, a *gfP)