From 4755d5aa2abac7e144d959e04e13549fe48817e9 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 6 Jul 2023 17:36:34 +0800 Subject: [PATCH] sm9/bn256: also apply to 1-2-6-12 #139 #137 --- sm9/bn256/bn_pair.go | 55 +++++----- sm9/bn256/bn_pair_b6.go | 38 ++++--- sm9/bn256/g2.go | 2 +- sm9/bn256/gfp.go | 1 - sm9/bn256/gfp12.go | 58 ++++------ sm9/bn256/gfp12_b6.go | 220 +++++++++++++++++++++++++++++++++++-- sm9/bn256/gfp12_b6_test.go | 25 +++++ sm9/bn256/gfp12_test.go | 2 +- sm9/bn256/gfp12b6_exp_u.go | 43 ++++++++ sm9/bn256/gfp2.go | 29 +++-- sm9/bn256/gfp4.go | 39 ++++++- sm9/bn256/gfp6.go | 82 +++++++++----- sm9/bn256/gfp_test.go | 16 +++ sm9/bn256/select_amd64.s | 58 +++++++++- sm9/bn256/select_arm64.s | 19 +++- sm9/bn256/select_decl.go | 3 + 16 files changed, 543 insertions(+), 147 deletions(-) create mode 100644 sm9/bn256/gfp12b6_exp_u.go diff --git a/sm9/bn256/bn_pair.go b/sm9/bn256/bn_pair.go index 0435f8c..3a40cf7 100644 --- a/sm9/bn256/bn_pair.go +++ b/sm9/bn256/bn_pair.go @@ -32,19 +32,20 @@ func lineFunctionAdd(r, p, rOut *twistPoint, q *curvePoint, r2, a, b, c *gfP2) { t2.Add(t2, t2) // t2 = 2Yr * J rOut.y.Sub(t, t2) // rOut.y = L1*(V-rOut.x) - 2Yr*J - rOut.t.Square(&rOut.z) + rOut.t.SquareNC(&rOut.z) - t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t) // t = (Yp + rOut.Z)^2 - Yp^2 - rOut.Z^2 = 2Yp*rOut.Z + // t = (Yp + rOut.Z)^2 - Yp^2 - rOut.Z^2 = 2Yp*rOut.Z + t.Add(&p.y, &rOut.z).Square(t).Sub(t, r2).Sub(t, &rOut.t) t2.Mul(L1, &p.x) t2.Add(t2, t2) // t2 = 2 L1 * Xp - a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z + a.Sub(t2, t) // a = 2 L1 * Xp - 2 Yp * rOut.z = 2 L1 * Xp - (Yp + rOut.Z)^2 + Yp^2 + rOut.Z^2 - c.MulScalar(&rOut.z, &q.y) - c.Add(c, c) + c.MulScalar(&rOut.z, &q.y) // c = rOut.z * Yq + c.Add(c, c) // c = 2 * rOut.z * Yq - b.Neg(L1) - b.MulScalar(b, &q.x).Add(b, b) + b.Neg(L1) // b= -L1 + b.MulScalar(b, &q.x).Add(b, b) // b = -2 * L1 * Xq } func lineFunctionDouble(r, rOut *twistPoint, q *curvePoint, a, b, c *gfP2) { @@ -71,38 +72,36 @@ func lineFunctionDouble(r, rOut *twistPoint, q *curvePoint, a, b, c *gfP2) { t.Add(t, t).Add(t, t) // t = 8 * Yr ^ 4 rOut.y.Sub(&rOut.y, t) - rOut.t.Square(&rOut.z) + rOut.t.SquareNC(&rOut.z) - t.Mul(E, &r.t).Add(t, t) - b.Neg(t) - b.MulScalar(b, &q.x) + t.Mul(E, &r.t).Add(t, t) // t = 2(E * Tr) + b.Neg(t) // b = -2(E * Tr) + b.MulScalar(b, &q.x) // b = -2(E * Tr * Xq) - a.Add(&r.x, E) - a.Square(a).Sub(a, A).Sub(a, G) - t.Add(B, B).Add(t, t) - a.Sub(a, t) + a.Add(&r.x, E) // a = Xr + E + a.Square(a).Sub(a, A).Sub(a, G) // a = (Xr + E) ^ 2 - A - G + t.Add(B, B).Add(t, t) // t = 4B + a.Sub(a, t) // a = (Xr + E) ^ 2 - A - G - 4B - c.Mul(&rOut.z, &r.t) - c.Add(c, c).MulScalar(c, &q.y) + c.Mul(&rOut.z, &r.t) // c = rOut.z * Tr + c.Add(c, c).MulScalar(c, &q.y) // c = 2 rOut.z * Tr * Yq } // (ret.z + ret.y*w + ret.x*w^2)* ((cv+a) + b*w^2) func mulLine(ret *gfP12, a, b, c *gfP2) { - t1, tz, t, bz := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} - gfp2Copy(&bz.x, c) - gfp2Copy(&bz.y, a) - - tz.MulNC(&ret.z, bz) + tz, t := &gfP4{}, &gfP4{} + tz.MulNC2(&ret.z, c, a) t.MulScalar(&ret.y, b).MulV1(t) tz.Add(tz, t) - t1.MulNC(&ret.y, bz) - t.MulScalar(&ret.x, b).MulV1(t) - ret.y.Add(t1, t) + t.MulNC2(&ret.y, c, a) + ret.y.MulScalar(&ret.x, b).MulV1(&ret.y) + ret.y.Add(&ret.y, t) + + t.MulNC2(&ret.x, c, a) + ret.x.MulScalar(&ret.z, b) + ret.x.Add(&ret.x, t) - t.MulNC(&ret.x, bz) - t1.MulScalar(&ret.z, b) - ret.x.Add(t1, t) gfp4Copy(&ret.z, tz) } diff --git a/sm9/bn256/bn_pair_b6.go b/sm9/bn256/bn_pair_b6.go index 46d226b..2bd6a1c 100644 --- a/sm9/bn256/bn_pair_b6.go +++ b/sm9/bn256/bn_pair_b6.go @@ -61,9 +61,9 @@ func millerB6(q *twistPoint, p *curvePoint) *gfP12b6 { ret.Square(ret) } mulLineB6(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR switch sixUPlus2NAF[i-1] { case 1: lineFunctionAdd(r, aAffine, newR, bAffine, r2, a, b, c) @@ -74,9 +74,9 @@ func millerB6(q *twistPoint, p *curvePoint) *gfP12b6 { } mulLineB6(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR } // In order to calculate Q1 we have to convert q from the sextic twist @@ -109,9 +109,9 @@ func millerB6(q *twistPoint, p *curvePoint) *gfP12b6 { r2.Square(&q1.y) lineFunctionAdd(r, q1, newR, bAffine, r2, a, b, c) mulLineB6(ret, a, b, c) - tmpR= r + tmpR = r r = newR - newR= tmpR + newR = tmpR r2.Square(&minusQ2.y) lineFunctionAdd(r, minusQ2, newR, bAffine, r2, a, b, c) @@ -144,42 +144,44 @@ func finalExponentiationB6(in *gfP12b6) *gfP12b6 { inv.Invert(in) t1.Mul(t1, inv) - t2 := (&gfP12b6{}).FrobeniusP2(t1) + t2 := inv.FrobeniusP2(t1) // reuse inv t1.Mul(t1, t2) fp := (&gfP12b6{}).Frobenius(t1) fp2 := (&gfP12b6{}).FrobeniusP2(t1) fp3 := (&gfP12b6{}).Frobenius(fp2) - fu := (&gfP12b6{}).Exp(t1, u) - fu2 := (&gfP12b6{}).Exp(fu, u) - fu3 := (&gfP12b6{}).Exp(fu2, u) + y0 := &gfP12b6{} + y0.MulNC(fp, fp2).Mul(y0, fp3) + + // reuse fp, fp2, fp3 local variables + // [gfP12ExpU] is most time consuming operation + fu := fp.gfP12ExpU(t1) + fu2 := fp2.gfP12ExpU(fu) + fu3 := fp3.gfP12ExpU(fu2) y3 := (&gfP12b6{}).Frobenius(fu) fu2p := (&gfP12b6{}).Frobenius(fu2) fu3p := (&gfP12b6{}).Frobenius(fu3) y2 := (&gfP12b6{}).FrobeniusP2(fu2) - y0 := &gfP12b6{} - y0.Mul(fp, fp2).Mul(y0, fp3) - y1 := (&gfP12b6{}).Conjugate(t1) y5 := (&gfP12b6{}).Conjugate(fu2) y3.Conjugate(y3) - y4 := (&gfP12b6{}).Mul(fu, fu2p) + y4 := (&gfP12b6{}).MulNC(fu, fu2p) y4.Conjugate(y4) - y6 := (&gfP12b6{}).Mul(fu3, fu3p) + y6 := (&gfP12b6{}).MulNC(fu3, fu3p) y6.Conjugate(y6) - t0 := (&gfP12b6{}).Square(y6) + t0 := (&gfP12b6{}).SpecialSquareNC(y6) t0.Mul(t0, y4).Mul(t0, y5) t1.Mul(y3, y5).Mul(t1, t0) t0.Mul(t0, y2) - t1.Square(t1).Mul(t1, t0).Square(t1) + t1.SpecialSquare(t1).Mul(t1, t0).SpecialSquare(t1) t0.Mul(t1, y1) t1.Mul(t1, y0) - t0.Square(t0).Mul(t0, t1) + t0.SpecialSquare(t0).Mul(t0, t1) return t0 } diff --git a/sm9/bn256/g2.go b/sm9/bn256/g2.go index ee93578..694b0e0 100644 --- a/sm9/bn256/g2.go +++ b/sm9/bn256/g2.go @@ -13,7 +13,7 @@ type G2 struct { p *twistPoint } -//Gen2 is the generator of G2. +// Gen2 is the generator of G2. var Gen2 = &G2{twistGen} var g2GeneratorTable *[32 * 2]twistPointTable diff --git a/sm9/bn256/gfp.go b/sm9/bn256/gfp.go index 9e0e5d7..f834a07 100644 --- a/sm9/bn256/gfp.go +++ b/sm9/bn256/gfp.go @@ -9,7 +9,6 @@ import ( type gfP [4]uint64 -var genericZero = &gfP{0} var zero = newGFp(0) var one = newGFp(1) var two = newGFp(2) diff --git a/sm9/bn256/gfp12.go b/sm9/bn256/gfp12.go index 50f2344..f749af0 100644 --- a/sm9/bn256/gfp12.go +++ b/sm9/bn256/gfp12.go @@ -134,11 +134,6 @@ func (e *gfP12) MulGFP(a *gfP12, b *gfP) *gfP12 { } func (e *gfP12) Mul(a, b *gfP12) *gfP12 { - // (z0 + y0*w + x0*w^2)* (z1 + y1*w + x1*w^2) - // z0*z1 + z0*y1*w + z0*x1*w^2 - // +y0*z1*w + y0*y1*w^2 + y0*x1*v - // +x0*z1*w^2 + x0*y1*v + x0*x1*v*w - //=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2 tmp := &gfP12{} tmp.MulNC(a, b) gfp12Copy(e, tmp) @@ -186,10 +181,6 @@ func (e *gfP12) MulNC(a, b *gfP12) *gfP12 { } func (e *gfP12) Square(a *gfP12) *gfP12 { - // (z + y*w + x*w^2)* (z + y*w + x*w^2) - // z^2 + z*y*w + z*x*w^2 + y*z*w + y^2*w^2 + y*x*v + x*z*w^2 + x*y*v + x^2 *v *w - // (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2 - // (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2 tmp := &gfP12{} tmp.SquareNC(a) gfp12Copy(e, tmp) @@ -244,6 +235,10 @@ func (e *gfP12) SpecialSquare(a *gfP12) *gfP12 { return e } +// Special squaring loop for use on elements in T_6(fp2) (after the +// easy part of the final exponentiation. Used in the hard part +// of the final exponentiation. Function uses formulas in +// Granger/Scott (PKC2010). func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { // Square first round in := &gfP12{} @@ -254,15 +249,12 @@ func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { v2 := &in.z v0.SquareVNC(&a.x) // (t02, t10) - v1.SquareNC(&a.y) // (t12, t01) - v2.SquareNC(&a.z) // (t11, t00) + v1.SquareNC(&a.y) // (t12, t01) + v2.SquareNC(&a.z) // (t11, t00) - tx.Add(v0, v0) - tx.Add(v0, tx) - ty.Add(v1, v1) - ty.Add(v1, ty) - tz.Add(v2, v2) - tz.Add(v2, tz) + tx.Triple(v0) + ty.Triple(v1) + tz.Triple(v2) v0.Add(&a.x, &a.x) // (f12, f01) v0.y.Neg(&v0.y) @@ -284,23 +276,20 @@ func (e *gfP12) SpecialSquares(a *gfP12, n int) *gfP12 { v2 = &tmp.z v0.SquareVNC(&in.x) // (t02, t10) - v1.SquareNC(&in.y) // (t12, t01) - v2.SquareNC(&in.z) // (t11, t00) - - tx.Add(v0, v0) - tx.Add(v0, tx) - ty.Add(v1, v1) - ty.Add(v1, ty) - tz.Add(v2, v2) - tz.Add(v2, tz) - + v1.SquareNC(&in.y) // (t12, t01) + v2.SquareNC(&in.z) // (t11, t00) + + tx.Triple(v0) + ty.Triple(v1) + tz.Triple(v2) + v0.Add(&in.x, &in.x) // (f12, f01) v0.y.Neg(&v0.y) v1.Add(&in.y, &in.y) // (f02, f10) v1.x.Neg(&v1.x) v2.Add(&in.z, &in.z) // (f11, f00) v2.y.Neg(&v2.y) - + v0.Add(ty, v0) v1.Add(tx, v1) v2.Add(tz, v2) @@ -322,15 +311,12 @@ func (e *gfP12) SpecialSquareNC(a *gfP12) *gfP12 { v2 := &e.z v0.SquareVNC(&a.x) // (t02, t10) - v1.SquareNC(&a.y) // (t12, t01) - v2.SquareNC(&a.z) // (t11, t00) + v1.SquareNC(&a.y) // (t12, t01) + v2.SquareNC(&a.z) // (t11, t00) - tx.Add(v0, v0) - tx.Add(v0, tx) - ty.Add(v1, v1) - ty.Add(v1, ty) - tz.Add(v2, v2) - tz.Add(v2, tz) + tx.Triple(v0) + ty.Triple(v1) + tz.Triple(v2) v0.Add(&a.x, &a.x) // (f12, f01) v0.y.Neg(&v0.y) diff --git a/sm9/bn256/gfp12_b6.go b/sm9/bn256/gfp12_b6.go index 09ae8cc..2e2c9e1 100644 --- a/sm9/bn256/gfp12_b6.go +++ b/sm9/bn256/gfp12_b6.go @@ -125,15 +125,25 @@ func (e *gfP12b6) Sub(a, b *gfP12b6) *gfP12b6 { } func (e *gfP12b6) Mul(a, b *gfP12b6) *gfP12b6 { + tmp := &gfP12b6{} + tmp.MulNC(a, b) + e.x.Set(&tmp.x) + e.y.Set(&tmp.y) + return e +} + +func (e *gfP12b6) MulNC(a, b *gfP12b6) *gfP12b6 { // "Multiplication and Squaring on Pairing-Friendly Fields" // Section 4, Karatsuba method. // http://eprint.iacr.org/2006/471.pdf //(a0+a1*t)(b0+b1*t)=c0+c1*t, where //c0 = a0*b0 +a1*b1*s //c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 - tx, ty, v0, v1 := &gfP6{}, &gfP6{}, &gfP6{}, &gfP6{} - v0.Mul(&a.y, &b.y) - v1.Mul(&a.x, &b.x) + tx := &e.x + ty := &e.y + v0, v1 := &gfP6{}, &gfP6{} + v0.MulNC(&a.y, &b.y) + v1.MulNC(&a.x, &b.x) tx.Add(&a.x, &a.y) ty.Add(&b.x, &b.y) @@ -144,8 +154,6 @@ func (e *gfP12b6) Mul(a, b *gfP12b6) *gfP12b6 { ty.MulS(v1) ty.Add(ty, v0) - e.x.Set(tx) - e.y.Set(ty) return e } @@ -168,21 +176,209 @@ func (e *gfP12b6) MulGfP2(a *gfP12b6, b *gfP2) *gfP12b6 { } func (e *gfP12b6) Square(a *gfP12b6) *gfP12b6 { + tmp := &gfP12b6{} + tmp.SquareNC(a) + e.x.Set(&tmp.x) + e.y.Set(&tmp.y) + return e +} + +func (e *gfP12b6) SquareNC(a *gfP12b6) *gfP12b6 { // Complex squaring algorithm // (xt+y)² = (x^2*s + y^2) + 2*x*y*t - tx, ty := &gfP6{}, &gfP6{} - tx.Square(&a.x).MulS(tx) - ty.Square(&a.y) + tx := &e.x + ty := &e.y + + tx.SquareNC(&a.x).MulS(tx) + ty.SquareNC(&a.y) ty.Add(tx, ty) tx.Mul(&a.x, &a.y) tx.Add(tx, tx) - e.x.Set(tx) - e.y.Set(ty) return e } +// Special squaring for use on elements in T_6(fp2) (after the +// easy part of the final exponentiation. Used in the hard part +// of the final exponentiation. Function uses formulas in +// Granger/Scott (PKC2010). +func (e *gfP12b6) SpecialSquare(a *gfP12b6) *gfP12b6 { + tmp := &gfP12b6{} + tmp.SpecialSquareNC(a) + e.x.Set(&tmp.x) + e.y.Set(&tmp.y) + return e +} + +func (e *gfP12b6) SpecialSquareNC(a *gfP12b6) *gfP12b6 { + f02 := &e.y.x + f01 := &e.y.y + f00 := &e.y.z + f12 := &e.x.x + f11 := &e.x.y + f10 := &e.x.z + + t00, t01, t02, t10, t11, t12 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} + + gfP4Square(t11, t00, &a.x.y, &a.y.z) + gfP4Square(t12, t01, &a.y.x, &a.x.z) + gfP4Square(t02, t10, &a.x.x, &a.y.y) + + f00.MulU1(t02) + t02.Set(t10) + t10.Set(f00) + + f00.Add(t00, t00) + t00.Add(f00, t00) + f00.Add(t01, t01) + t01.Add(f00, t01) + f00.Add(t02, t02) + t02.Add(f00, t02) + f00.Add(t10, t10) + t10.Add(f00, t10) + f00.Add(t11, t11) + t11.Add(f00, t11) + f00.Add(t12, t12) + t12.Add(f00, t12) + + f00.Add(&a.y.z, &a.y.z) + f00.Neg(f00) + f01.Add(&a.y.y, &a.y.y) + f01.Neg(f01) + f02.Add(&a.y.x, &a.y.x) + f02.Neg(f02) + f10.Add(&a.x.z, &a.x.z) + f11.Add(&a.x.y, &a.x.y) + f12.Add(&a.x.x, &a.x.x) + + f00.Add(f00, t00) + f01.Add(f01, t01) + f02.Add(f02, t02) + f10.Add(f10, t10) + f11.Add(f11, t11) + f12.Add(f12, t12) + + return e +} + +func (e *gfP12b6) SpecialSquares(a *gfP12b6, n int) *gfP12b6 { + // Square first round + in := &gfP12b6{} + f02 := &in.y.x + f01 := &in.y.y + f00 := &in.y.z + f12 := &in.x.x + f11 := &in.x.y + f10 := &in.x.z + + t00, t01, t02, t10, t11, t12 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} + gfP4Square(t11, t00, &a.x.y, &a.y.z) + gfP4Square(t12, t01, &a.y.x, &a.x.z) + gfP4Square(t02, t10, &a.x.x, &a.y.y) + + f00.MulU1(t02) + t02.Set(t10) + t10.Set(f00) + + f00.Add(t00, t00) + t00.Add(f00, t00) + f00.Add(t01, t01) + t01.Add(f00, t01) + f00.Add(t02, t02) + t02.Add(f00, t02) + f00.Add(t10, t10) + t10.Add(f00, t10) + f00.Add(t11, t11) + t11.Add(f00, t11) + f00.Add(t12, t12) + t12.Add(f00, t12) + + f00.Add(&a.y.z, &a.y.z) + f00.Neg(f00) + f01.Add(&a.y.y, &a.y.y) + f01.Neg(f01) + f02.Add(&a.y.x, &a.y.x) + f02.Neg(f02) + f10.Add(&a.x.z, &a.x.z) + f11.Add(&a.x.y, &a.x.y) + f12.Add(&a.x.x, &a.x.x) + + f00.Add(f00, t00) + f01.Add(f01, t01) + f02.Add(f02, t02) + f10.Add(f10, t10) + f11.Add(f11, t11) + f12.Add(f12, t12) + + tmp := &gfP12b6{} + var tmp2 *gfP12b6 + + for i := 1; i < n; i++ { + f02 = &tmp.y.x + f01 = &tmp.y.y + f00 = &tmp.y.z + f12 = &tmp.x.x + f11 = &tmp.x.y + f10 = &tmp.x.z + + gfP4Square(t11, t00, &in.x.y, &in.y.z) + gfP4Square(t12, t01, &in.y.x, &in.x.z) + gfP4Square(t02, t10, &in.x.x, &in.y.y) + + f00.MulU1(t02) + t02.Set(t10) + t10.Set(f00) + + f00.Add(t00, t00) + t00.Add(f00, t00) + f00.Add(t01, t01) + t01.Add(f00, t01) + f00.Add(t02, t02) + t02.Add(f00, t02) + f00.Add(t10, t10) + t10.Add(f00, t10) + f00.Add(t11, t11) + t11.Add(f00, t11) + f00.Add(t12, t12) + t12.Add(f00, t12) + + f00.Add(&in.y.z, &in.y.z) + f00.Neg(f00) + f01.Add(&in.y.y, &in.y.y) + f01.Neg(f01) + f02.Add(&in.y.x, &in.y.x) + f02.Neg(f02) + f10.Add(&in.x.z, &in.x.z) + f11.Add(&in.x.y, &in.x.y) + f12.Add(&in.x.x, &in.x.x) + + f00.Add(f00, t00) + f01.Add(f01, t01) + f02.Add(f02, t02) + f10.Add(f10, t10) + f11.Add(f11, t11) + f12.Add(f12, t12) + + // Switch references + tmp2 = in + in = tmp + tmp = tmp2 + } + e.x.Set(&in.x) + e.y.Set(&in.y) + return e +} + +func gfP4Square(retX, retY, x, y *gfP2) { + retX.SquareUNC(x) + retY.SquareNC(y) + retY.Add(retX, retY) + + retX.MulNC(x, y) + retX.Add(retX, retX) +} + func (c *gfP12b6) Exp(a *gfP12b6, power *big.Int) *gfP12b6 { sum := (&gfP12b6{}).SetOne() t := &gfP12b6{} @@ -206,8 +402,8 @@ func (e *gfP12b6) Invert(a *gfP12b6) *gfP12b6 { t0, t1 := &gfP6{}, &gfP6{} - t0.Mul(&a.y, &a.y) - t1.Mul(&a.x, &a.x).MulS(t1) + t0.MulNC(&a.y, &a.y) + t1.MulNC(&a.x, &a.x).MulS(t1) t0.Sub(t0, t1) t0.Invert(t0) diff --git a/sm9/bn256/gfp12_b6_test.go b/sm9/bn256/gfp12_b6_test.go index 3b7a1dd..2bca692 100644 --- a/sm9/bn256/gfp12_b6_test.go +++ b/sm9/bn256/gfp12_b6_test.go @@ -311,3 +311,28 @@ func BenchmarkGfP12b6Frobenius(b *testing.B) { } } } + +func TestGfP12b6SpecialSquare(t *testing.T) { + in := &gfP12b6{ + p6, + p6, + } + t1 := &gfP12b6{} + t1.x.Neg(&in.x) + t1.y.Set(&in.y) + + inv := &gfP12b6{} + inv.Invert(in) + t1.Mul(t1, inv) + + t2 := (&gfP12b6{}).FrobeniusP2(t1) + t1.Mul(t1, t2) + + got := &gfP12b6{} + expected := &gfP12b6{} + got.SpecialSquare(t1) + expected.Square(t1) + if *got != *expected { + t.Errorf("not same got=%v, expected=%v", got, expected) + } +} diff --git a/sm9/bn256/gfp12_test.go b/sm9/bn256/gfp12_test.go index 580a336..4d08177 100644 --- a/sm9/bn256/gfp12_test.go +++ b/sm9/bn256/gfp12_test.go @@ -93,7 +93,7 @@ func BenchmarkGfP12SpecialSquare(b *testing.B) { b.ReportAllocs() b.ResetTimer() for i := 0; i < b.N; i++ { - x2.Square(t1) + x2.SpecialSquare(t1) } } diff --git a/sm9/bn256/gfp12b6_exp_u.go b/sm9/bn256/gfp12b6_exp_u.go new file mode 100644 index 0000000..9fd09f3 --- /dev/null +++ b/sm9/bn256/gfp12b6_exp_u.go @@ -0,0 +1,43 @@ +package bn256 + +// Use special square +func (e *gfP12b6) gfP12ExpU(x *gfP12b6) *gfP12b6 { + // The sequence of 10 multiplications and 61 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _101 = 1 + _100 + // _1001 = _100 + _101 + // _1011 = _10 + _1001 + // _1100 = 1 + _1011 + // i56 = (_1100 << 40 + _1011) << 7 + _1011 + _100 + // i69 = (2*(i56 << 4 + _1001) + 1) << 6 + // return 2*(_101 + i69) + // + var z = e + var t0 = new(gfP12b6) + var t1 = new(gfP12b6) + var t2 = new(gfP12b6) + var t3 = new(gfP12b6) + + t2.SpecialSquareNC(x) + t1.SpecialSquareNC(t2) + z.MulNC(x, t1) + t0.MulNC(t1, z) + t2.Mul(t2, t0) + t3.MulNC(x, t2) + t3.SpecialSquares(t3, 40) + t3.Mul(t2, t3) + t3.SpecialSquares(t3, 7) + t2.Mul(t2, t3) + t1.Mul(t1, t2) + t1.SpecialSquares(t1, 4) + t0.Mul(t0, t1) + t0.SpecialSquare(t0) + t0.Mul(x, t0) + t0.SpecialSquares(t0, 6) + z.Mul(z, t0) + z.SpecialSquare(z) + return e +} diff --git a/sm9/bn256/gfp2.go b/sm9/bn256/gfp2.go index 0a17a99..613a9d7 100644 --- a/sm9/bn256/gfp2.go +++ b/sm9/bn256/gfp2.go @@ -64,13 +64,13 @@ func (e *gfP2) IsOne() bool { func (e *gfP2) Conjugate(a *gfP2) *gfP2 { e.y.Set(&a.y) - gfpSub(&e.x, genericZero, &a.x) + gfpSub(&e.x, zero, &a.x) return e } func (e *gfP2) Neg(a *gfP2) *gfP2 { - gfpSub(&e.x, genericZero, &a.x) - gfpSub(&e.y, genericZero, &a.y) + gfpSub(&e.x, zero, &a.x) + gfpSub(&e.y, zero, &a.y) return e } @@ -135,14 +135,20 @@ func (e *gfP2) MulNC(a, b *gfP2) *gfP2 { return e } +func (e *gfP2) MulU(a, b *gfP2) *gfP2 { + tmp := &gfP2{} + tmp.MulUNC(a, b) + gfp2Copy(e, tmp) + return e +} + // MulU: a * b * u // (a0+a1*u)(b0+b1*u)*u=c0+c1*u, where // c1 = (a0*b0 - 2a1*b1)u // c0 = -2 * ((a0 + a1)(b0 + b1) - a0*b0 - a1*b1) = -2 * (a0*b1 + a1*b0) -func (e *gfP2) MulU(a, b *gfP2) *gfP2 { - tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y +func (e *gfP2) MulUNC(a, b *gfP2) *gfP2 { + tx := &e.x + ty := &e.y v0, v1 := &gfP{}, &gfP{} gfpMul(v0, &a.y, &b.y) @@ -155,12 +161,11 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 { gfpSub(ty, ty, v0) gfpSub(ty, ty, v1) gfpAdd(ty, ty, ty) - gfpSub(ty, genericZero, ty) + gfpSub(ty, zero, ty) gfpSub(tx, v0, v1) gfpSub(tx, tx, v1) - gfp2Copy(e, tmp) return e } @@ -171,7 +176,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 { func (e *gfP2) MulU1(a *gfP2) *gfP2 { t := &gfP{} gfpAdd(t, &a.x, &a.x) - gfpSub(t, genericZero, t) + gfpSub(t, zero, t) gfpCopy(&e.x, &a.y) gfpCopy(&e.y, t) @@ -228,7 +233,7 @@ func (e *gfP2) SquareUNC(a *gfP2) *gfP2 { gfpAdd(tx, tx, ty) gfpAdd(ty, ty, ty) gfpAdd(ty, ty, ty) - gfpSub(ty, genericZero, ty) + gfpSub(ty, zero, ty) return e } @@ -251,7 +256,7 @@ func (e *gfP2) Invert(a *gfP2) *gfP2 { inv := &gfP{} inv.Invert(t3) // inv = (2 * a.x ^ 2 + a.y ^ 2) ^ (-1) - gfpSub(t1, genericZero, &a.x) + gfpSub(t1, zero, &a.x) gfpMul(&e.x, t1, inv) // x = - a.x * inv gfpMul(&e.y, &a.y, inv) // y = a.y * inv diff --git a/sm9/bn256/gfp4.go b/sm9/bn256/gfp4.go index a8931e5..7f655b4 100644 --- a/sm9/bn256/gfp4.go +++ b/sm9/bn256/gfp4.go @@ -73,6 +73,12 @@ func (e *gfP4) Add(a, b *gfP4) *gfP4 { return e } +func (e *gfP4) Triple(a *gfP4) *gfP4 { + e.x.Triple(&a.x) + e.y.Triple(&a.y) + return e +} + func (e *gfP4) Sub(a, b *gfP4) *gfP4 { e.x.Sub(&a.x, &b.x) e.y.Sub(&a.y, &b.y) @@ -92,12 +98,6 @@ func (e *gfP4) MulGFP(a *gfP4, b *gfP) *gfP4 { } func (e *gfP4) Mul(a, b *gfP4) *gfP4 { - // "Multiplication and Squaring on Pairing-Friendly Fields" - // Section 4, Karatsuba method. - // http://eprint.iacr.org/2006/471.pdf - //(a0+a1*v)(b0+b1*v)=c0+c1*v, where - //c0 = a0*b0 +a1*b1*u - //c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 tmp := &gfP4{} tmp.MulNC(a, b) gfp4Copy(e, tmp) @@ -129,6 +129,33 @@ func (e *gfP4) MulNC(a, b *gfP4) *gfP4 { return e } +// MulNC2 muls a with (xv+y), this method is used in mulLine function +// to avoid gfP4 instance construction. +func (e *gfP4) MulNC2(a *gfP4, x, y *gfP2) *gfP4 { + // "Multiplication and Squaring on Pairing-Friendly Fields" + // Section 4, Karatsuba method. + // http://eprint.iacr.org/2006/471.pdf + //(a0+a1*v)(b0+b1*v)=c0+c1*v, where + //c0 = a0*b0 +a1*b1*u + //c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 + tx := &e.x + ty := &e.y + v0, v1 := &gfP2{}, &gfP2{} + v0.MulNC(&a.y, y) + v1.MulNC(&a.x, x) + + tx.Add(&a.x, &a.y) + ty.Add(x, y) + tx.Mul(tx, ty) + tx.Sub(tx, v0) + tx.Sub(tx, v1) + + ty.MulU1(v1) + ty.Add(ty, v0) + + return e +} + // MulV: a * b * v // (a0+a1*v)(b0+b1*v)*v=c0+c1*v, where // (a0*b0 + a0*b1v + a1*b0*v + a1*b1*u)*v diff --git a/sm9/bn256/gfp6.go b/sm9/bn256/gfp6.go index 9ff3cdf..45fc7c9 100644 --- a/sm9/bn256/gfp6.go +++ b/sm9/bn256/gfp6.go @@ -26,9 +26,7 @@ func (e *gfP6) String() string { } func (e *gfP6) Set(a *gfP6) *gfP6 { - e.x.Set(&a.x) - e.y.Set(&a.y) - e.z.Set(&a.z) + gfp6Copy(e, a) return e } @@ -104,15 +102,25 @@ func (e *gfP6) MulGfP(a *gfP6, b *gfP) *gfP6 { } func (e *gfP6) Mul(a, b *gfP6) *gfP6 { + tmp := &gfP6{} + tmp.MulNC(a, b) + gfp6Copy(e, tmp) + return e +} + +func (e *gfP6) MulNC(a, b *gfP6) *gfP6 { // (z0 + y0*s + x0*s²)* (z1 + y1*s + x1*s²) // z0*z1 + z0*y1*s + z0*x1*s² // +y0*z1*s + y0*y1*s² + y0*x1*u // +x0*z1*s² + x0*y1*u + x0*x1*s*u //=(z0*z1+y0*x1*u+x0*y1*u) + (z0*y1+y0*z1+x0*x1*u)s + (z0*x1 + y0*y1 + x0*z1)*s² - tx, ty, tz, t, v0, v1, v2 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} - v0.Mul(&a.z, &b.z) - v1.Mul(&a.y, &b.y) - v2.Mul(&a.x, &b.x) + tx := &e.x + ty := &e.y + tz := &e.z + t, v0, v1, v2 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} + v0.MulNC(&a.z, &b.z) + v1.MulNC(&a.y, &b.y) + v2.MulNC(&a.x, &b.x) t.Add(&a.y, &a.x) tz.Add(&b.y, &b.x) @@ -137,9 +145,6 @@ func (e *gfP6) Mul(a, b *gfP6) *gfP6 { tx.Add(tx, v1) tx.Sub(tx, v2) - e.x.Set(tx) - e.y.Set(ty) - e.z.Set(tz) return e } @@ -161,30 +166,47 @@ func (e *gfP6) MulS(a *gfP6) *gfP6 { } func (e *gfP6) Square(a *gfP6) *gfP6 { + tmp := &gfP6{} + tmp.SquareNC(a) + gfp6Copy(e, tmp) + return e +} + +func (e *gfP6) SquareNC(a *gfP6) *gfP6 { // (z + y*s + x*s²)* (z + y*s + x*s²) // z^2 + z*y*s + z*x*s² + y*z*s + y^2*s² + y*x*u + x*z*s² + x*y*u + x^2 *u *s // (z^2 + y*x*s + x*y*u) + (z*y + y*z + u * x^2)s + (z*x + y^2 + x*z)*s² // (z^2 + 2*x*y*u) + (u*x^2 + 2*y*z) * s + (y^2 + 2*x*z) * s² - tx, ty, tz, t := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} + // Karatsuba method + tx := &e.x + ty := &e.y + tz := &e.z + t, v0, v1, v2 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} - tz.Square(&a.z) - t.MulU(&a.x, &a.y) - t.Add(t, t) - tz.Add(tz, t) + v0.SquareNC(&a.z) + v1.SquareNC(&a.y) + v2.SquareNC(&a.x) - ty.SquareU(&a.x) - t.Mul(&a.y, &a.z) - t.Add(t, t) + t.Add(&a.y, &a.x) + tz.SquareNC(t) + tz.Sub(tz, v1) + tz.Sub(tz, v2) + tz.MulU1(tz) + tz.Add(tz, v0) + + t.Add(&a.z, &a.y) + ty.SquareNC(t) + ty.Sub(ty, v0) + ty.Sub(ty, v1) + t.MulU1(v2) ty.Add(ty, t) - tx.Square(&a.y) - t.Mul(&a.x, &a.z) - t.Add(t, t) - tx.Add(tx, t) + t.Add(&a.z, &a.x) + tx.SquareNC(t) + tx.Sub(tx, v0) + tx.Add(tx, v1) + tx.Sub(tx, v2) - e.x.Set(tx) - e.y.Set(ty) - e.z.Set(tz) return e } @@ -209,19 +231,19 @@ func (e *gfP6) Invert(a *gfP6) *gfP6 { // See "Implementing cryptographic pairings", M. Scott, section 3.2. // ftp://136.206.11.249/pub/crypto/pairings.pdf - t1 := (&gfP2{}).MulU(&a.x, &a.y) - A := (&gfP2{}).Square(&a.z) + t1 := (&gfP2{}).MulUNC(&a.x, &a.y) + A := (&gfP2{}).SquareNC(&a.z) A.Sub(A, t1) - B := (&gfP2{}).SquareU(&a.x) + B := (&gfP2{}).SquareUNC(&a.x) t1.Mul(&a.y, &a.z) B.Sub(B, t1) - C := (&gfP2{}).Square(&a.y) + C := (&gfP2{}).SquareNC(&a.y) t1.Mul(&a.x, &a.z) C.Sub(C, t1) - F := (&gfP2{}).MulU(C, &a.y) + F := (&gfP2{}).MulUNC(C, &a.y) t1.Mul(A, &a.z) F.Add(F, t1) t1.MulU(B, &a.x) diff --git a/sm9/bn256/gfp_test.go b/sm9/bn256/gfp_test.go index 85d1123..fc8f2f9 100644 --- a/sm9/bn256/gfp_test.go +++ b/sm9/bn256/gfp_test.go @@ -179,6 +179,22 @@ func TestInvert(t *testing.T) { } } +func TestGfpNeg(t *testing.T) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + got := &gfP{} + gfpSub(got, zero, x) + expected := &gfP{} + gfpNeg(expected, x) + if *expected != *got { + t.Errorf("got %v, expected %v", got, expected) + } + gfpSub(got, zero, zero) + gfpNeg(expected, zero) + if *expected != *got { + t.Errorf("got %v, expected %v", got, expected) + } +} + func BenchmarkGfPMul(b *testing.B) { x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) b.ReportAllocs() diff --git a/sm9/bn256/select_amd64.s b/sm9/bn256/select_amd64.s index e7584f4..6d27c9c 100644 --- a/sm9/bn256/select_amd64.s +++ b/sm9/bn256/select_amd64.s @@ -95,7 +95,63 @@ copygfp4_avx2: VMOVDQU Y3, (32*3)(res_ptr) VZEROUPPER - RET + RET + +// func gfp6Copy(res, a *gfP6) +TEXT ·gfp6Copy(SB),NOSPLIT,$0 + MOVQ res+0(FP), res_ptr + MOVQ a+8(FP), x_ptr + + CMPB ·supportAVX2+0(SB), $0x01 + JEQ copygfp6_avx2 + + MOVOU (16*0)(x_ptr), X0 + MOVOU (16*1)(x_ptr), X1 + MOVOU (16*2)(x_ptr), X2 + MOVOU (16*3)(x_ptr), X3 + + MOVOU (16*4)(x_ptr), X4 + MOVOU (16*5)(x_ptr), X5 + MOVOU (16*6)(x_ptr), X6 + MOVOU (16*7)(x_ptr), X7 + + MOVOU (16*8)(x_ptr), X8 + MOVOU (16*9)(x_ptr), X9 + MOVOU (16*10)(x_ptr), X10 + MOVOU (16*11)(x_ptr), X11 + + MOVOU X0, (16*0)(res_ptr) + MOVOU X1, (16*1)(res_ptr) + MOVOU X2, (16*2)(res_ptr) + MOVOU X3, (16*3)(res_ptr) + + MOVOU X4, (16*4)(res_ptr) + MOVOU X5, (16*5)(res_ptr) + MOVOU X6, (16*6)(res_ptr) + MOVOU X7, (16*7)(res_ptr) + + MOVOU X8, (16*8)(res_ptr) + MOVOU X9, (16*9)(res_ptr) + MOVOU X10, (16*10)(res_ptr) + MOVOU X11, (16*11)(res_ptr) + +copygfp6_avx2: + VMOVDQU (32*0)(x_ptr), Y0 + VMOVDQU (32*1)(x_ptr), Y1 + VMOVDQU (32*2)(x_ptr), Y2 + VMOVDQU (32*3)(x_ptr), Y3 + VMOVDQU (32*4)(x_ptr), Y4 + VMOVDQU (32*5)(x_ptr), Y5 + + VMOVDQU Y0, (32*0)(res_ptr) + VMOVDQU Y1, (32*1)(res_ptr) + VMOVDQU Y2, (32*2)(res_ptr) + VMOVDQU Y3, (32*3)(res_ptr) + VMOVDQU Y4, (32*4)(res_ptr) + VMOVDQU Y5, (32*5)(res_ptr) + + VZEROUPPER + RET // func gfp12Copy(res, a *gfP12) TEXT ·gfp12Copy(SB),NOSPLIT,$0 diff --git a/sm9/bn256/select_arm64.s b/sm9/bn256/select_arm64.s index dd1f7bb..44eec5e 100644 --- a/sm9/bn256/select_arm64.s +++ b/sm9/bn256/select_arm64.s @@ -30,7 +30,7 @@ TEXT ·gfp2Copy(SB),NOSPLIT,$0 RET /* ---------------------------------------*/ -// func gfp4Copy(res, a *gfP2) +// func gfp4Copy(res, a *gfP4) TEXT ·gfp4Copy(SB),NOSPLIT,$0 MOVD res+0(FP), res_ptr MOVD a+8(FP), a_ptr @@ -43,6 +43,23 @@ TEXT ·gfp4Copy(SB),NOSPLIT,$0 RET +/* ---------------------------------------*/ +// func gfp6Copy(res, a *gfP6) +TEXT ·gfp6Copy(SB),NOSPLIT,$0 + MOVD res+0(FP), res_ptr + MOVD a+8(FP), a_ptr + + VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] + VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) + + VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] + VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) + + VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16] + VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr) + + RET + /* ---------------------------------------*/ // func gfp12Copy(res, a *gfP12) TEXT ·gfp12Copy(SB),NOSPLIT,$0 diff --git a/sm9/bn256/select_decl.go b/sm9/bn256/select_decl.go index acd9855..cb84269 100644 --- a/sm9/bn256/select_decl.go +++ b/sm9/bn256/select_decl.go @@ -31,5 +31,8 @@ func gfp2Copy(res, in *gfP2) //go:noescape func gfp4Copy(res, in *gfP4) +//go:noescape +func gfp6Copy(res, in *gfP6) + //go:noescape func gfp12Copy(res, in *gfP12)