sm9/bn256: value copy acceleration #136

This commit is contained in:
Sun Yimin 2023-06-30 17:51:35 +08:00 committed by GitHub
parent 78542032f2
commit c7b3d97304
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 657 additions and 108 deletions

View File

@ -95,8 +95,8 @@ func lineFunctionDouble(r *twistPoint, q *curvePoint) (a, b, c *gfP2, rOut *twis
// (ret.z + ret.y*w + ret.x*w^2)* ((cv+a) + b*w^2) // (ret.z + ret.y*w + ret.x*w^2)* ((cv+a) + b*w^2)
func mulLine(ret *gfP12, a, b, c *gfP2) { func mulLine(ret *gfP12, a, b, c *gfP2) {
t1, tz, t, bz := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} t1, tz, t, bz := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}
bz.x.Set(c) gfp2Copy(&bz.x, c)
bz.y.Set(a) gfp2Copy(&bz.y, a)
tz.Mul(&ret.z, bz) tz.Mul(&ret.z, bz)
t.MulScalar(&ret.y, b).MulV1(t) t.MulScalar(&ret.y, b).MulV1(t)
@ -109,17 +109,14 @@ func mulLine(ret *gfP12, a, b, c *gfP2) {
t.Mul(&ret.x, bz) t.Mul(&ret.x, bz)
t1.MulScalar(&ret.z, b) t1.MulScalar(&ret.z, b)
ret.x.Add(t1, t) ret.x.Add(t1, t)
gfp4Copy(&ret.z, tz)
ret.z.Set(tz)
} }
//
// R-ate Pairing G2 x G1 -> GT // R-ate Pairing G2 x G1 -> GT
// //
// P is a point of order q in G1. Q(x,y) is a point of order q in G2. // P is a point of order q in G1. Q(x,y) is a point of order q in G2.
// Note that Q is a point on the sextic twist of the curve over Fp^2, P(x,y) is a point on the // Note that Q is a point on the sextic twist of the curve over Fp^2, P(x,y) is a point on the
// curve over the base field Fp // curve over the base field Fp
//
func miller(q *twistPoint, p *curvePoint) *gfP12 { func miller(q *twistPoint, p *curvePoint) *gfP12 {
ret := (&gfP12{}).SetOne() ret := (&gfP12{}).SetOne()
@ -218,9 +215,9 @@ func finalExponentiation(in *gfP12) *gfP12 {
fp2 := (&gfP12{}).FrobeniusP2(t1) fp2 := (&gfP12{}).FrobeniusP2(t1)
fp3 := (&gfP12{}).Frobenius(fp2) fp3 := (&gfP12{}).Frobenius(fp2)
fu := (&gfP12{}).Exp(t1, u) fu := (&gfP12{}).gfP12ExpU(t1)
fu2 := (&gfP12{}).Exp(fu, u) fu2 := (&gfP12{}).gfP12ExpU(fu)
fu3 := (&gfP12{}).Exp(fu2, u) fu3 := (&gfP12{}).gfP12ExpU(fu2)
y3 := (&gfP12{}).Frobenius(fu) y3 := (&gfP12{}).Frobenius(fu)
fu2p := (&gfP12{}).Frobenius(fu2) fu2p := (&gfP12{}).Frobenius(fu2)

View File

@ -97,6 +97,14 @@ func main() {
if err = writeFile("gfp2_sqrt.go", out, out1, out2); err != nil { if err = writeFile("gfp2_sqrt.go", out, out1, out2); err != nil {
log.Fatal(err) log.Fatal(err)
} }
out, err = generate(tmplAddchainExp12, "0x600000000058f98a", "gfP12")
if err != nil {
log.Fatal(err)
}
if err = writeFile("gfp12_exp_u.go", out); err != nil {
log.Fatal(err)
}
} }
const tmplAddchainExp1 = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. const tmplAddchainExp1 = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
@ -300,3 +308,40 @@ func sqrtCandidate(z, x *Element) {
{{- end }} {{- end }}
} }
` `
const tmplAddchainExp12 = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
package bn256
func (e *Element) gfP12ExpU(x *Element) *Element {
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
var z = new(Element).Set(e)
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
return e.Set(z)
}
`

View File

@ -60,11 +60,7 @@ func (e *gfP) String() string {
} }
func (e *gfP) Set(f *gfP) *gfP { func (e *gfP) Set(f *gfP) *gfP {
e[0] = f[0] gfpCopy(e, f)
e[1] = f[1]
e[2] = f[2]
e[3] = f[3]
return e return e
} }

View File

@ -58,9 +58,7 @@ func (e *gfP12) String() string {
} }
func (e *gfP12) Set(a *gfP12) *gfP12 { func (e *gfP12) Set(a *gfP12) *gfP12 {
e.x.Set(&a.x) gfp12Copy(e, a)
e.y.Set(&a.y)
e.z.Set(&a.z)
return e return e
} }
@ -141,7 +139,11 @@ func (e *gfP12) Mul(a, b *gfP12) *gfP12 {
// +y0*z1*w + y0*y1*w^2 + y0*x1*v // +y0*z1*w + y0*y1*w^2 + y0*x1*v
// +x0*z1*w^2 + x0*y1*v + x0*x1*v*w // +x0*z1*w^2 + x0*y1*v + x0*x1*v*w
//=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2 //=(z0*z1+y0*x1*v+x0*y1*v) + (z0*y1+y0*z1+x0*x1*v)w + (z0*x1 + y0*y1 + x0*z1)*w^2
tx, ty, tz, t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} tmp := &gfP12{}
tx := &tmp.x
ty := &tmp.y
tz := &tmp.z
t, v0, v1, v2 := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{}
v0.Mul(&a.z, &b.z) v0.Mul(&a.z, &b.z)
v1.Mul(&a.y, &b.y) v1.Mul(&a.y, &b.y)
v2.Mul(&a.x, &b.x) v2.Mul(&a.x, &b.x)
@ -168,10 +170,7 @@ func (e *gfP12) Mul(a, b *gfP12) *gfP12 {
tx.Sub(tx, v0) tx.Sub(tx, v0)
tx.Add(tx, v1) tx.Add(tx, v1)
tx.Sub(tx, v2) tx.Sub(tx, v2)
gfp12Copy(e, tmp)
e.x.Set(tx)
e.y.Set(ty)
e.z.Set(tz)
return e return e
} }
@ -180,7 +179,37 @@ func (e *gfP12) Square(a *gfP12) *gfP12 {
// z^2 + z*y*w + z*x*w^2 + y*z*w + y^2*w^2 + y*x*v + x*z*w^2 + x*y*v + x^2 *v *w // z^2 + z*y*w + z*x*w^2 + y*z*w + y^2*w^2 + y*x*v + x*z*w^2 + x*y*v + x^2 *v *w
// (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2 // (z^2 + y*x*v + x*y*v) + (z*y + y*z + v * x^2)w + (z*x + y^2 + x*z)*w^2
// (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2 // (z^2 + 2*x*y*v) + (v*x^2 + 2*y*z) *w + (y^2 + 2*x*z) * w^2
tx, ty, tz, t := &gfP4{}, &gfP4{}, &gfP4{}, &gfP4{} tmp := &gfP12{}
tx := &tmp.x
ty := &tmp.y
tz := &tmp.z
t := &gfP4{}
tz.Square(&a.z)
t.MulV(&a.x, &a.y)
t.Add(t, t)
tz.Add(tz, t)
ty.SquareV(&a.x)
t.Mul(&a.y, &a.z)
t.Add(t, t)
ty.Add(ty, t)
tx.Square(&a.y)
t.Mul(&a.x, &a.z)
t.Add(t, t)
tx.Add(tx, t)
gfp12Copy(e, tmp)
return e
}
func (e *gfP12) Squares(a *gfP12, n int) *gfP12 {
// Square first round
in := &gfP12{}
tx := &in.x
ty := &in.y
tz := &in.z
t := &gfP4{}
tz.Square(&a.z) tz.Square(&a.z)
t.MulV(&a.x, &a.y) t.MulV(&a.x, &a.y)
@ -197,9 +226,36 @@ func (e *gfP12) Square(a *gfP12) *gfP12 {
t.Add(t, t) t.Add(t, t)
tx.Add(tx, t) tx.Add(tx, t)
e.x.Set(tx) tmp := &gfP12{}
e.y.Set(ty) var tmp2 *gfP12
e.z.Set(tz) tx = &tmp.x
ty = &tmp.y
tz = &tmp.z
for i := 1; i < n; i++ {
tz.Square(&in.z)
t.MulV(&in.x, &in.y)
t.Add(t, t)
tz.Add(tz, t)
ty.SquareV(&in.x)
t.Mul(&in.y, &in.z)
t.Add(t, t)
ty.Add(ty, t)
tx.Square(&in.y)
t.Mul(&in.x, &in.z)
t.Add(t, t)
tx.Add(tx, t)
// Switch references
tmp2 = in
in = tmp
tmp = tmp2
tx = &tmp.x
ty = &tmp.y
tz = &tmp.z
}
gfp12Copy(e, in)
return e return e
} }
@ -215,8 +271,7 @@ func (e *gfP12) Exp(f *gfP12, power *big.Int) *gfP12 {
sum.Set(t) sum.Set(t)
} }
} }
gfp12Copy(e, sum)
e.Set(sum)
return e return e
} }
@ -271,27 +326,26 @@ func (e *gfP12) Neg(a *gfP12) *gfP12 {
// = z^p + y^p*w*w^(p-1)+x^p*w^2*(w^2)^(p-1) // = z^p + y^p*w*w^(p-1)+x^p*w^2*(w^2)^(p-1)
// w2ToP2Minus1 = vToPMinus1 * wToPMinus1 // w2ToP2Minus1 = vToPMinus1 * wToPMinus1
func (e *gfP12) Frobenius(a *gfP12) *gfP12 { func (e *gfP12) Frobenius(a *gfP12) *gfP12 {
x, y := &gfP2{}, &gfP2{} tmp := &gfP4{}
x := &tmp.x
y := &tmp.y
x.Conjugate(&a.z.x) x.Conjugate(&a.z.x)
y.Conjugate(&a.z.y) y.Conjugate(&a.z.y)
x.MulScalar(x, vToPMinus1) x.MulScalar(x, vToPMinus1)
e.z.x.Set(x) gfp4Copy(&e.z, tmp)
e.z.y.Set(y)
x.Conjugate(&a.y.x) x.Conjugate(&a.y.x)
y.Conjugate(&a.y.y) y.Conjugate(&a.y.y)
x.MulScalar(x, w2ToP2Minus1) x.MulScalar(x, w2ToP2Minus1)
y.MulScalar(y, wToPMinus1) y.MulScalar(y, wToPMinus1)
e.y.x.Set(x) gfp4Copy(&e.y, tmp)
e.y.y.Set(y)
x.Conjugate(&a.x.x) x.Conjugate(&a.x.x)
y.Conjugate(&a.x.y) y.Conjugate(&a.x.y)
x.MulScalar(x, vToPMinus1Mw2ToPMinus1) x.MulScalar(x, vToPMinus1Mw2ToPMinus1)
y.MulScalar(y, w2ToPMinus1) y.MulScalar(y, w2ToPMinus1)
e.x.x.Set(x) gfp4Copy(&e.x, tmp)
e.x.y.Set(y)
return e return e
} }
@ -299,7 +353,9 @@ func (e *gfP12) Frobenius(a *gfP12) *gfP12 {
// (z + y*w + x*w^2)^(p^2) // (z + y*w + x*w^2)^(p^2)
// = z^(p^2) + y^(p^2)*w*w^((p^2)-1)+x^(p^2)*w^2*(w^2)^((p^2)-1) // = z^(p^2) + y^(p^2)*w*w^((p^2)-1)+x^(p^2)*w^2*(w^2)^((p^2)-1)
func (e *gfP12) FrobeniusP2(a *gfP12) *gfP12 { func (e *gfP12) FrobeniusP2(a *gfP12) *gfP12 {
tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{} tx := &e.x
ty := &e.y
tz := &e.z
tz.Conjugate(&a.z) tz.Conjugate(&a.z)
@ -308,11 +364,6 @@ func (e *gfP12) FrobeniusP2(a *gfP12) *gfP12 {
tx.Conjugate(&a.x) tx.Conjugate(&a.x)
tx.MulGFP(tx, w2ToP2Minus1) tx.MulGFP(tx, w2ToP2Minus1)
e.x.Set(tx)
e.y.Set(ty)
e.z.Set(tz)
return e return e
} }
@ -352,7 +403,9 @@ func (e *gfP12) FrobeniusP3(a *gfP12) *gfP12 {
// (z + y*w + x*w^2)^(p^6) // (z + y*w + x*w^2)^(p^6)
// = ((z + y*w + x*w^2)^(p^3))^(p^3) // = ((z + y*w + x*w^2)^(p^3))^(p^3)
func (e *gfP12) FrobeniusP6(a *gfP12) *gfP12 { func (e *gfP12) FrobeniusP6(a *gfP12) *gfP12 {
tx, ty, tz := &gfP4{}, &gfP4{}, &gfP4{} tx := &e.x
ty := &e.y
tz := &e.z
tz.Conjugate(&a.z) tz.Conjugate(&a.z)
@ -361,10 +414,6 @@ func (e *gfP12) FrobeniusP6(a *gfP12) *gfP12 {
tx.Conjugate(&a.x) tx.Conjugate(&a.x)
e.x.Set(tx)
e.y.Set(ty)
e.z.Set(tz)
return e return e
} }

43
sm9/bn256/gfp12_exp_u.go Normal file
View File

@ -0,0 +1,43 @@
package bn256
func (e *gfP12) gfP12ExpU(x *gfP12) *gfP12 {
// The sequence of 10 multiplications and 61 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _100 = 2*_10
// _101 = 1 + _100
// _1001 = _100 + _101
// _1011 = _10 + _1001
// _1100 = 1 + _1011
// i56 = (_1100 << 40 + _1011) << 7 + _1011 + _100
// i69 = (2*(i56 << 4 + _1001) + 1) << 6
// return 2*(_101 + i69)
//
var z = new(gfP12)
var t0 = new(gfP12)
var t1 = new(gfP12)
var t2 = new(gfP12)
var t3 = new(gfP12)
t2.Square(x)
t1.Square(t2)
z.Mul(x, t1)
t0.Mul(t1, z)
t2.Mul(t2, t0)
t3.Mul(x, t2)
t3.Squares(t3, 40)
t3.Mul(t2, t3)
t3.Squares(t3, 7)
t2.Mul(t2, t3)
t1.Mul(t1, t2)
t1.Squares(t1, 4)
t0.Mul(t0, t1)
t0.Square(t0)
t0.Mul(x, t0)
t0.Squares(t0, 6)
z.Mul(z, t0)
z.Square(z)
gfp12Copy(e, z)
return e
}

View File

@ -35,6 +35,20 @@ func Test_gfP12Square(t *testing.T) {
} }
} }
func BenchmarkGfP12Square(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
*(&gfP4{}).SetOne(),
}
x2 := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
x2.Square(x)
}
}
func testGfP12Invert(t *testing.T, x *gfP12) { func testGfP12Invert(t *testing.T, x *gfP12) {
xInv := &gfP12{} xInv := &gfP12{}
xInv.Invert(x) xInv.Invert(x)
@ -285,3 +299,31 @@ func BenchmarkGfP12Frobenius(b *testing.B) {
} }
} }
} }
func BenchmarkGfP12ExpU(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.gfP12ExpU(x)
}
}
func BenchmarkGfP12ExpU2(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
got := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
got.Exp(x, u)
}
}

View File

@ -26,8 +26,7 @@ func (e *gfP2) String() string {
} }
func (e *gfP2) Set(a *gfP2) *gfP2 { func (e *gfP2) Set(a *gfP2) *gfP2 {
e.x.Set(&a.x) gfp2Copy(e, a)
e.y.Set(&a.y)
return e return e
} }
@ -109,7 +108,10 @@ func (e *gfP2) Triple(a *gfP2) *gfP2 {
// c0 = a0*b0 - 2a1*b1 // c0 = a0*b0 - 2a1*b1
// c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 // c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0
func (e *gfP2) Mul(a, b *gfP2) *gfP2 { func (e *gfP2) Mul(a, b *gfP2) *gfP2 {
tx, ty, v0, v1 := &gfP{}, &gfP{}, &gfP{}, &gfP{} tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP{}, &gfP{}
gfpMul(v0, &a.y, &b.y) gfpMul(v0, &a.y, &b.y)
gfpMul(v1, &a.x, &b.x) gfpMul(v1, &a.x, &b.x)
@ -123,8 +125,7 @@ func (e *gfP2) Mul(a, b *gfP2) *gfP2 {
gfpSub(ty, v0, v1) gfpSub(ty, v0, v1)
gfpSub(ty, ty, v1) gfpSub(ty, ty, v1)
e.x.Set(tx) gfp2Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
@ -133,7 +134,10 @@ func (e *gfP2) Mul(a, b *gfP2) *gfP2 {
// c1 = (a0*b0 - 2a1*b1)u // c1 = (a0*b0 - 2a1*b1)u
// c0 = -2 * ((a0 + a1)(b0 + b1) - a0*b0 - a1*b1) = -2 * (a0*b1 + a1*b0) // c0 = -2 * ((a0 + a1)(b0 + b1) - a0*b0 - a1*b1) = -2 * (a0*b1 + a1*b0)
func (e *gfP2) MulU(a, b *gfP2) *gfP2 { func (e *gfP2) MulU(a, b *gfP2) *gfP2 {
tx, ty, v0, v1 := &gfP{}, &gfP{}, &gfP{}, &gfP{} tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP{}, &gfP{}
gfpMul(v0, &a.y, &b.y) gfpMul(v0, &a.y, &b.y)
gfpMul(v1, &a.x, &b.x) gfpMul(v1, &a.x, &b.x)
@ -150,8 +154,7 @@ func (e *gfP2) MulU(a, b *gfP2) *gfP2 {
gfpSub(tx, v0, v1) gfpSub(tx, v0, v1)
gfpSub(tx, tx, v1) gfpSub(tx, tx, v1)
e.x.Set(tx) gfp2Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
@ -164,15 +167,17 @@ func (e *gfP2) MulU1(a *gfP2) *gfP2 {
gfpAdd(t, &a.x, &a.x) gfpAdd(t, &a.x, &a.x)
gfpNeg(t, t) gfpNeg(t, t)
e.x.Set(&a.y) gfpCopy(&e.x, &a.y)
e.y.Set(t) gfpCopy(&e.y, t)
return e return e
} }
func (e *gfP2) Square(a *gfP2) *gfP2 { func (e *gfP2) Square(a *gfP2) *gfP2 {
// Complex squaring algorithm: // Complex squaring algorithm:
// (xu+y)² = y^2-2*x^2 + 2*u*x*y // (xu+y)² = y^2-2*x^2 + 2*u*x*y
tx, ty := &gfP{}, &gfP{} tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
gfpSqr(tx, &a.x, 1) gfpSqr(tx, &a.x, 1)
gfpSqr(ty, &a.y, 1) gfpSqr(ty, &a.y, 1)
gfpSub(ty, ty, tx) gfpSub(ty, ty, tx)
@ -180,9 +185,7 @@ func (e *gfP2) Square(a *gfP2) *gfP2 {
gfpMul(tx, &a.x, &a.y) gfpMul(tx, &a.x, &a.y)
gfpAdd(tx, tx, tx) gfpAdd(tx, tx, tx)
gfp2Copy(e, tmp)
e.x.Set(tx)
e.y.Set(ty)
return e return e
} }
@ -190,7 +193,9 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 {
// Complex squaring algorithm: // Complex squaring algorithm:
// (xu+y)²*u = (y^2-2*x^2)u - 4*x*y // (xu+y)²*u = (y^2-2*x^2)u - 4*x*y
tx, ty := &gfP{}, &gfP{} tmp := &gfP2{}
tx := &tmp.x
ty := &tmp.y
// tx = a0^2 - 2 * a1^2 // tx = a0^2 - 2 * a1^2
gfpSqr(ty, &a.x, 1) gfpSqr(ty, &a.x, 1)
gfpSqr(tx, &a.y, 1) gfpSqr(tx, &a.y, 1)
@ -203,8 +208,7 @@ func (e *gfP2) SquareU(a *gfP2) *gfP2 {
gfpAdd(ty, ty, ty) gfpAdd(ty, ty, ty)
gfpNeg(ty, ty) gfpNeg(ty, ty)
e.x.Set(tx) gfp2Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
@ -251,8 +255,10 @@ func (e *gfP2) Exp(f *gfP2, power *big.Int) *gfP2 {
} }
// xu+y)^p = x * u^p + y // xu+y)^p = x * u^p + y
//
// = x * u * u^(p-1) + y // = x * u * u^(p-1) + y
// = (-x)*u + y // = (-x)*u + y
//
// here u^(p-1) = -1 // here u^(p-1) = -1
func (e *gfP2) Frobenius(a *gfP2) *gfP2 { func (e *gfP2) Frobenius(a *gfP2) *gfP2 {
e.Conjugate(a) e.Conjugate(a)

View File

@ -25,8 +25,7 @@ func (e *gfP4) String() string {
} }
func (e *gfP4) Set(a *gfP4) *gfP4 { func (e *gfP4) Set(a *gfP4) *gfP4 {
e.x.Set(&a.x) gfp4Copy(e, a)
e.y.Set(&a.y)
return e return e
} }
@ -99,7 +98,10 @@ func (e *gfP4) Mul(a, b *gfP4) *gfP4 {
//(a0+a1*v)(b0+b1*v)=c0+c1*v, where //(a0+a1*v)(b0+b1*v)=c0+c1*v, where
//c0 = a0*b0 +a1*b1*u //c0 = a0*b0 +a1*b1*u
//c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0 //c1 = (a0 + a1)(b0 + b1) - a0*b0 - a1*b1 = a0*b1 + a1*b0
tx, ty, v0, v1 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP2{}, &gfP2{}
v0.Mul(&a.y, &b.y) v0.Mul(&a.y, &b.y)
v1.Mul(&a.x, &b.x) v1.Mul(&a.x, &b.x)
@ -112,8 +114,7 @@ func (e *gfP4) Mul(a, b *gfP4) *gfP4 {
ty.MulU1(v1) ty.MulU1(v1)
ty.Add(ty, v0) ty.Add(ty, v0)
e.x.Set(tx) gfp4Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
@ -124,7 +125,10 @@ func (e *gfP4) Mul(a, b *gfP4) *gfP4 {
// c0 = a0*b1*u + a1*b0*u // c0 = a0*b1*u + a1*b0*u
// c1 = a0*b0 + a1*b1*u // c1 = a0*b0 + a1*b1*u
func (e *gfP4) MulV(a, b *gfP4) *gfP4 { func (e *gfP4) MulV(a, b *gfP4) *gfP4 {
tx, ty, v0, v1 := &gfP2{}, &gfP2{}, &gfP2{}, &gfP2{} tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
v0, v1 := &gfP2{}, &gfP2{}
v0.Mul(&a.y, &b.y) v0.Mul(&a.y, &b.y)
v1.Mul(&a.x, &b.x) v1.Mul(&a.x, &b.x)
@ -138,8 +142,7 @@ func (e *gfP4) MulV(a, b *gfP4) *gfP4 {
tx.MulU1(v1) tx.MulU1(v1)
tx.Add(tx, v0) tx.Add(tx, v0)
e.x.Set(tx) gfp4Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
@ -148,17 +151,20 @@ func (e *gfP4) MulV(a, b *gfP4) *gfP4 {
// c0 = a1*u // c0 = a1*u
// c1 = a0 // c1 = a0
func (e *gfP4) MulV1(a *gfP4) *gfP4 { func (e *gfP4) MulV1(a *gfP4) *gfP4 {
tx := (&gfP2{}).Set(&a.y) tx := &gfP2{}
gfp2Copy(tx, &a.y)
e.y.MulU1(&a.x) e.y.MulU1(&a.x)
e.x.Set(tx) gfp2Copy(&e.x, tx)
return e return e
} }
func (e *gfP4) Square(a *gfP4) *gfP4 { func (e *gfP4) Square(a *gfP4) *gfP4 {
// Complex squaring algorithm: // Complex squaring algorithm:
// (xv+y)² = (x^2*u + y^2) + 2*x*y*v // (xv+y)² = (x^2*u + y^2) + 2*x*y*v
tx, ty := &gfP2{}, &gfP2{} tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
tx.SquareU(&a.x) tx.SquareU(&a.x)
ty.Square(&a.y) ty.Square(&a.y)
ty.Add(tx, ty) ty.Add(tx, ty)
@ -166,15 +172,16 @@ func (e *gfP4) Square(a *gfP4) *gfP4 {
tx.Mul(&a.x, &a.y) tx.Mul(&a.x, &a.y)
tx.Add(tx, tx) tx.Add(tx, tx)
e.x.Set(tx) gfp4Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
// SquareV: (a^2) * v // SquareV: (a^2) * v
// v*(xv+y)² = (x^2*u + y^2)v + 2*x*y*u // v*(xv+y)² = (x^2*u + y^2)v + 2*x*y*u
func (e *gfP4) SquareV(a *gfP4) *gfP4 { func (e *gfP4) SquareV(a *gfP4) *gfP4 {
tx, ty := &gfP2{}, &gfP2{} tmp := &gfP4{}
tx := &tmp.x
ty := &tmp.y
tx.SquareU(&a.x) tx.SquareU(&a.x)
ty.Square(&a.y) ty.Square(&a.y)
tx.Add(tx, ty) tx.Add(tx, ty)
@ -182,15 +189,18 @@ func (e *gfP4) SquareV(a *gfP4) *gfP4 {
ty.MulU(&a.x, &a.y) ty.MulU(&a.x, &a.y)
ty.Add(ty, ty) ty.Add(ty, ty)
e.x.Set(tx) gfp4Copy(e, tmp)
e.y.Set(ty)
return e return e
} }
func (e *gfP4) Invert(a *gfP4) *gfP4 { func (e *gfP4) Invert(a *gfP4) *gfP4 {
// See "Implementing cryptographic pairings", M. Scott, section 3.2. // See "Implementing cryptographic pairings", M. Scott, section 3.2.
// ftp://136.206.11.249/pub/crypto/pairings.pdf // ftp://136.206.11.249/pub/crypto/pairings.pdf
t1, t2, t3 := &gfP2{}, &gfP2{}, &gfP2{} tmp := &gfP4{}
t2 := &tmp.x
t1 := &tmp.y
t3 := &gfP2{}
t3.SquareU(&a.x) t3.SquareU(&a.x)
t1.Square(&a.y) t1.Square(&a.y)
@ -202,8 +212,7 @@ func (e *gfP4) Invert(a *gfP4) *gfP4 {
t2.Mul(&a.x, t3) t2.Mul(&a.x, t3)
e.x.Set(t2) gfp4Copy(e, tmp)
e.y.Set(t1)
return e return e
} }
@ -225,22 +234,26 @@ func (e *gfP4) Exp(f *gfP4, power *big.Int) *gfP4 {
} }
// (y+x*v)^p // (y+x*v)^p
//
// = y^p + x^p*v^p // = y^p + x^p*v^p
// = f(y) + f(x) * v^p // = f(y) + f(x) * v^p
// = f(y) + f(x) * v * v^(p-1) // = f(y) + f(x) * v * v^(p-1)
func (e *gfP4) Frobenius(a *gfP4) *gfP4 { func (e *gfP4) Frobenius(a *gfP4) *gfP4 {
x, y := &gfP2{}, &gfP2{} tmp := &gfP4{}
x := &tmp.x
y := &tmp.y
x.Conjugate(&a.x) x.Conjugate(&a.x)
y.Conjugate(&a.y) y.Conjugate(&a.y)
x.MulScalar(x, vToPMinus1) x.MulScalar(x, vToPMinus1)
e.x.Set(x) gfp4Copy(e, tmp)
e.y.Set(y)
return e return e
} }
// (y+x*v)^(p^2) // (y+x*v)^(p^2)
//
// y + x*v * v^(p^2-1) // y + x*v * v^(p^2-1)
func (e *gfP4) FrobeniusP2(a *gfP4) *gfP4 { func (e *gfP4) FrobeniusP2(a *gfP4) *gfP4 {
e.Conjugate(a) e.Conjugate(a)
@ -248,16 +261,18 @@ func (e *gfP4) FrobeniusP2(a *gfP4) *gfP4 {
} }
// (y+x*v)^(p^3) // (y+x*v)^(p^3)
//
// = ((y+x*v)^p)^(p^2) // = ((y+x*v)^p)^(p^2)
func (e *gfP4) FrobeniusP3(a *gfP4) *gfP4 { func (e *gfP4) FrobeniusP3(a *gfP4) *gfP4 {
x, y := &gfP2{}, &gfP2{} tmp := &gfP4{}
x := &tmp.x
y := &tmp.y
x.Conjugate(&a.x) x.Conjugate(&a.x)
y.Conjugate(&a.y) y.Conjugate(&a.y)
x.MulScalar(x, vToPMinus1) x.MulScalar(x, vToPMinus1)
x.Neg(x) x.Neg(x)
e.x.Set(x) gfp4Copy(e, tmp)
e.y.Set(y)
return e return e
} }

View File

@ -7,6 +7,198 @@
#define x_ptr SI #define x_ptr SI
#define y_ptr CX #define y_ptr CX
// func gfpCopy(res, a *gfP)
TEXT ·gfpCopy(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr
MOVQ a+8(FP), x_ptr
CMPB ·supportAVX2+0(SB), $0x01
JEQ copygfp_avx2
MOVOU (16*0)(x_ptr), X0
MOVOU (16*1)(x_ptr), X1
MOVOU X0, (16*0)(res_ptr)
MOVOU X1, (16*1)(res_ptr)
copygfp_avx2:
VMOVDQU (x_ptr), Y0
VMOVDQU Y0, (res_ptr)
VZEROUPPER
RET
// func gfp2Copy(res, a *gfP2)
TEXT ·gfp2Copy(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr
MOVQ a+8(FP), x_ptr
CMPB ·supportAVX2+0(SB), $0x01
JEQ copygfp2_avx2
MOVOU (16*0)(x_ptr), X0
MOVOU (16*1)(x_ptr), X1
MOVOU (16*2)(x_ptr), X2
MOVOU (16*3)(x_ptr), X3
MOVOU X0, (16*0)(res_ptr)
MOVOU X1, (16*1)(res_ptr)
MOVOU X2, (16*2)(res_ptr)
MOVOU X3, (16*3)(res_ptr)
copygfp2_avx2:
VMOVDQU (32*0)(x_ptr), Y0
VMOVDQU (32*1)(x_ptr), Y1
VMOVDQU Y0, (32*0)(res_ptr)
VMOVDQU Y1, (32*1)(res_ptr)
VZEROUPPER
RET
// func gfp4Copy(res, a *gfP4)
TEXT ·gfp4Copy(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr
MOVQ a+8(FP), x_ptr
CMPB ·supportAVX2+0(SB), $0x01
JEQ copygfp4_avx2
MOVOU (16*0)(x_ptr), X0
MOVOU (16*1)(x_ptr), X1
MOVOU (16*2)(x_ptr), X2
MOVOU (16*3)(x_ptr), X3
MOVOU (16*4)(x_ptr), X4
MOVOU (16*5)(x_ptr), X5
MOVOU (16*6)(x_ptr), X6
MOVOU (16*7)(x_ptr), X7
MOVOU X0, (16*0)(res_ptr)
MOVOU X1, (16*1)(res_ptr)
MOVOU X2, (16*2)(res_ptr)
MOVOU X3, (16*3)(res_ptr)
MOVOU X4, (16*4)(res_ptr)
MOVOU X5, (16*5)(res_ptr)
MOVOU X6, (16*6)(res_ptr)
MOVOU X7, (16*7)(res_ptr)
copygfp4_avx2:
VMOVDQU (32*0)(x_ptr), Y0
VMOVDQU (32*1)(x_ptr), Y1
VMOVDQU (32*2)(x_ptr), Y2
VMOVDQU (32*3)(x_ptr), Y3
VMOVDQU Y0, (32*0)(res_ptr)
VMOVDQU Y1, (32*1)(res_ptr)
VMOVDQU Y2, (32*2)(res_ptr)
VMOVDQU Y3, (32*3)(res_ptr)
VZEROUPPER
RET
// func gfp12Copy(res, a *gfP12)
TEXT ·gfp12Copy(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr
MOVQ a+8(FP), x_ptr
CMPB ·supportAVX2+0(SB), $0x01
JEQ copygfp12_avx2
MOVOU (16*0)(x_ptr), X0
MOVOU (16*1)(x_ptr), X1
MOVOU (16*2)(x_ptr), X2
MOVOU (16*3)(x_ptr), X3
MOVOU (16*4)(x_ptr), X4
MOVOU (16*5)(x_ptr), X5
MOVOU (16*6)(x_ptr), X6
MOVOU (16*7)(x_ptr), X7
MOVOU X0, (16*0)(res_ptr)
MOVOU X1, (16*1)(res_ptr)
MOVOU X2, (16*2)(res_ptr)
MOVOU X3, (16*3)(res_ptr)
MOVOU X4, (16*4)(res_ptr)
MOVOU X5, (16*5)(res_ptr)
MOVOU X6, (16*6)(res_ptr)
MOVOU X7, (16*7)(res_ptr)
MOVOU (16*8)(x_ptr), X0
MOVOU (16*9)(x_ptr), X1
MOVOU (16*10)(x_ptr), X2
MOVOU (16*11)(x_ptr), X3
MOVOU (16*12)(x_ptr), X4
MOVOU (16*13)(x_ptr), X5
MOVOU (16*14)(x_ptr), X6
MOVOU (16*15)(x_ptr), X7
MOVOU X0, (16*8)(res_ptr)
MOVOU X1, (16*9)(res_ptr)
MOVOU X2, (16*10)(res_ptr)
MOVOU X3, (16*11)(res_ptr)
MOVOU X4, (16*12)(res_ptr)
MOVOU X5, (16*13)(res_ptr)
MOVOU X6, (16*14)(res_ptr)
MOVOU X7, (16*15)(res_ptr)
MOVOU (16*16)(x_ptr), X0
MOVOU (16*17)(x_ptr), X1
MOVOU (16*18)(x_ptr), X2
MOVOU (16*19)(x_ptr), X3
MOVOU (16*20)(x_ptr), X4
MOVOU (16*21)(x_ptr), X5
MOVOU (16*22)(x_ptr), X6
MOVOU (16*23)(x_ptr), X7
MOVOU X0, (16*16)(res_ptr)
MOVOU X1, (16*17)(res_ptr)
MOVOU X2, (16*18)(res_ptr)
MOVOU X3, (16*19)(res_ptr)
MOVOU X4, (16*20)(res_ptr)
MOVOU X5, (16*21)(res_ptr)
MOVOU X6, (16*22)(res_ptr)
MOVOU X7, (16*23)(res_ptr)
copygfp12_avx2:
VMOVDQU (32*0)(x_ptr), Y0
VMOVDQU (32*1)(x_ptr), Y1
VMOVDQU (32*2)(x_ptr), Y2
VMOVDQU (32*3)(x_ptr), Y3
VMOVDQU (32*4)(x_ptr), Y4
VMOVDQU (32*5)(x_ptr), Y5
VMOVDQU (32*6)(x_ptr), Y6
VMOVDQU (32*7)(x_ptr), Y7
VMOVDQU (32*8)(x_ptr), Y8
VMOVDQU (32*9)(x_ptr), Y9
VMOVDQU (32*10)(x_ptr), Y10
VMOVDQU (32*11)(x_ptr), Y11
VMOVDQU Y0, (32*0)(res_ptr)
VMOVDQU Y1, (32*1)(res_ptr)
VMOVDQU Y2, (32*2)(res_ptr)
VMOVDQU Y3, (32*3)(res_ptr)
VMOVDQU Y4, (32*4)(res_ptr)
VMOVDQU Y5, (32*5)(res_ptr)
VMOVDQU Y6, (32*6)(res_ptr)
VMOVDQU Y7, (32*7)(res_ptr)
VMOVDQU Y8, (32*8)(res_ptr)
VMOVDQU Y9, (32*9)(res_ptr)
VMOVDQU Y10, (32*10)(res_ptr)
VMOVDQU Y11, (32*11)(res_ptr)
VZEROUPPER
RET
// func gfP12MovCond(res, a, b *gfP12, cond int) // func gfP12MovCond(res, a, b *gfP12, cond int)
TEXT ·gfP12MovCond(SB),NOSPLIT,$0 TEXT ·gfP12MovCond(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr MOVQ res+0(FP), res_ptr

View File

@ -7,6 +7,68 @@
#define a_ptr R1 #define a_ptr R1
#define b_ptr R2 #define b_ptr R2
/* ---------------------------------------*/
// func gfpCopy(res, a *gfP)
TEXT ·gfpCopy(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD a+8(FP), a_ptr
VLD1 (a_ptr), [V0.B16, V1.B16]
VST1 [V0.B16, V1.B16], (res_ptr)
RET
/* ---------------------------------------*/
// func gfp2Copy(res, a *gfP2)
TEXT ·gfp2Copy(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD a+8(FP), a_ptr
VLD1 (a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1 [V0.B16, V1.B16, V2.B16, V3.B16], (res_ptr)
RET
/* ---------------------------------------*/
// func gfp4Copy(res, a *gfP2)
TEXT ·gfp4Copy(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD a+8(FP), a_ptr
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
RET
/* ---------------------------------------*/
// func gfp12Copy(res, a *gfP12)
TEXT ·gfp12Copy(SB),NOSPLIT,$0
MOVD res+0(FP), res_ptr
MOVD a+8(FP), a_ptr
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
VLD1.P 64(a_ptr), [V0.B16, V1.B16, V2.B16, V3.B16]
VST1.P [V0.B16, V1.B16, V2.B16, V3.B16], 64(res_ptr)
RET
/* ---------------------------------------*/ /* ---------------------------------------*/
// func gfP12MovCond(res, a, b *gfP12, cond int) // func gfP12MovCond(res, a, b *gfP12, cond int)
// If cond == 0 res=b, else res=a // If cond == 0 res=b, else res=a

View File

@ -21,3 +21,15 @@ func curvePointMovCond(res, a, b *curvePoint, cond int)
// //
//go:noescape //go:noescape
func twistPointMovCond(res, a, b *twistPoint, cond int) func twistPointMovCond(res, a, b *twistPoint, cond int)
//go:noescape
func gfpCopy(res, in *gfP)
//go:noescape
func gfp2Copy(res, in *gfP2)
//go:noescape
func gfp4Copy(res, in *gfP4)
//go:noescape
func gfp12Copy(res, in *gfP12)

View File

@ -14,3 +14,39 @@ func curvePointMovCond(res, a, b *curvePoint, cond int) {
func twistPointMovCond(res, a, b *twistPoint, cond int) { func twistPointMovCond(res, a, b *twistPoint, cond int) {
res.Select(a, b, cond) res.Select(a, b, cond)
} }
func gfpCopy(res, in *gfP) {
res[0] = in[0]
res[1] = in[1]
res[2] = in[2]
res[3] = in[3]
}
func gfp2Copy(res, in *gfP2) {
gfpCopy(&res.x, &in.x)
gfpCopy(&res.y, &in.y)
}
func gfp4Copy(res, in *gfP4) {
gfpCopy(&res.x.x, &in.x.x)
gfpCopy(&res.x.y, &in.x.y)
gfpCopy(&res.y.x, &in.y.x)
gfpCopy(&res.y.y, &in.y.y)
}
func gfp12Copy(res, in *gfP12) {
gfpCopy(&res.x.x.x, &in.x.x.x)
gfpCopy(&res.x.x.y, &in.x.x.y)
gfpCopy(&res.x.y.x, &in.x.y.x)
gfpCopy(&res.x.y.y, &in.x.y.y)
gfpCopy(&res.y.x.x, &in.y.x.x)
gfpCopy(&res.y.x.y, &in.y.x.y)
gfpCopy(&res.y.y.x, &in.y.y.x)
gfpCopy(&res.y.y.y, &in.y.y.y)
gfpCopy(&res.z.x.x, &in.z.x.x)
gfpCopy(&res.z.x.y, &in.z.x.y)
gfpCopy(&res.z.y.x, &in.z.y.x)
gfpCopy(&res.z.y.y, &in.z.y.y)
}

54
sm9/bn256/select_test.go Normal file
View File

@ -0,0 +1,54 @@
package bn256
import "testing"
func BenchmarkGfP12Copy(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
res := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
gfp12Copy(res, x)
}
}
func gfpCopyForTest(res, in *gfP) {
res[0] = in[0]
res[1] = in[1]
res[2] = in[2]
res[3] = in[3]
}
func gfp2CopyForTest(res, in *gfP2) {
gfpCopyForTest(&res.x, &in.x)
gfpCopyForTest(&res.y, &in.y)
}
func gfp4CopyForTest(res, in *gfP4) {
gfp2CopyForTest(&res.x, &in.x)
gfp2CopyForTest(&res.y, &in.y)
}
func gfp12CopyForTest(res, in *gfP12) {
gfp4CopyForTest(&res.x, &in.x)
gfp4CopyForTest(&res.y, &in.y)
gfp4CopyForTest(&res.z, &in.z)
}
func BenchmarkGfP12Set(b *testing.B) {
x := &gfP12{
testdataP4,
testdataP4,
testdataP4,
}
res := &gfP12{}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
gfp12CopyForTest(res, x)
}
}