sm9: optimize generate and gfP2 sqrt

This commit is contained in:
emmansun 2023-04-29 13:33:44 +08:00
parent b197c96040
commit 13ddfad8e4
7 changed files with 1086 additions and 362 deletions

View File

@ -256,7 +256,9 @@ func (e *G1) UnmarshalCompressed(data []byte) ([]byte, error) {
e.p.x.Unmarshal(data[1:])
montEncode(&e.p.x, &e.p.x)
x3 := e.p.polynomial(&e.p.x)
e.p.y.Sqrt(x3)
if !Sqrt(&e.p.y, x3) {
return nil, errors.New("sm9.G1: invalid compressed point encoding")
}
montDecode(x3, &e.p.y)
if byte(x3[0]&1) != data[0]&1 {
gfpNeg(&e.p.y, &e.p.y)

View File

@ -17,50 +17,196 @@ import (
// go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0
//
func main() {
tmplAddchainFile, err := os.CreateTemp("", "addchain-template")
func generate(template, exp, element string) ([]byte, error) {
tmplAddchainFileInvert, err := os.CreateTemp("", "addchain-template")
if err != nil {
log.Fatal(err)
return nil, err
}
defer os.Remove(tmplAddchainFile.Name())
if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil {
log.Fatal(err)
defer os.Remove(tmplAddchainFileInvert.Name())
if _, err := io.WriteString(tmplAddchainFileInvert, template); err != nil {
return nil, err
}
if err := tmplAddchainFile.Close(); err != nil {
log.Fatal(err)
if err := tmplAddchainFileInvert.Close(); err != nil {
return nil, err
}
log.Printf("Generating gfp_invert.go...")
f, err := os.CreateTemp("", "addchain-gfp")
if err != nil {
log.Fatal(err)
return nil, err
}
defer os.Remove(f.Name())
cmd := exec.Command("addchain", "search", "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b")
cmd := exec.Command("addchain", "search", exp)
cmd.Stderr = os.Stderr
cmd.Stdout = f
if err := cmd.Run(); err != nil {
log.Fatal(err)
return nil, err
}
if err := f.Close(); err != nil {
log.Fatal(err)
return nil, err
}
cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name())
cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFileInvert.Name(), f.Name())
cmd.Stderr = os.Stderr
out, err := cmd.Output()
if err != nil {
log.Fatal(err)
return nil, err
}
out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1)
out, err = format.Source(out)
out = bytes.Replace(out, []byte("Element"), []byte(element), -1)
return format.Source(out)
}
func writeFile(fileName string, buffers ...[]byte) error {
log.Printf("Generating %v...", fileName)
f, err := os.Create(fileName)
if err != nil {
log.Fatal(err)
}
if err := os.WriteFile("gfp_invert.go", out, 0644); err != nil {
defer f.Close()
for _, buffer := range buffers {
if _, err := f.Write(buffer); err != nil {
return err
}
}
return nil
}
func main() {
out, err := generate(tmplAddchainInvert, "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b", "gfP")
if err != nil {
log.Fatal(err)
}
out1, err := generate(tmplAddchainSqrt, "0x16c80000005474de3ac07569feb1d8e8a43e5269634f5ddb7cadf364fc6a28af", "gfP")
if err != nil {
log.Fatal(err)
}
if err = writeFile("gfp_invert_sqrt.go", out, out1); err != nil {
log.Fatal(err)
}
out, err = generate(tmplAddchainExp1, "0x2d90000000a8e9bc7580ead3fd63b1d1487ca4d2c69ebbb6f95be6c9f8d4515f", "gfP2")
if err != nil {
log.Fatal(err)
}
out1, err = generate(tmplAddchainExp2, "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457d", "gfP2")
if err != nil {
log.Fatal(err)
}
out2, err := generate(tmplAddchainExp3, "0x5b2000000151d378eb01d5a7fac763a290f949a58d3d776df2b7cd93f1a8a2be", "gfP2")
if err != nil {
log.Fatal(err)
}
if err = writeFile("gfp2_sqrt.go", out, out1, out2); err != nil {
log.Fatal(err)
}
}
const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
const tmplAddchainExp1 = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
package bn256
func (e *Element) expPMinus1Over4(x *Element) *Element {
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
var z = new(Element).Set(e)
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
return e.Set(z)
}
`
const tmplAddchainExp2 = `
func (e *Element) expP(x *Element) *Element {
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
var z = new(Element).Set(e)
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
return e.Set(z)
}
`
const tmplAddchainExp3 = `
func (e *Element) expPMinus1Over2(x *Element) *Element {
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
var z = new(Element).Set(e)
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
return e.Set(z)
}
`
const tmplAddchainInvert = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
package bn256
// Invert sets e = 1/x, and returns e.
//
@ -99,3 +245,58 @@ func (e *Element) Invert(x *Element) *Element {
return e.Set(z)
}
`
const tmplAddchainSqrt = `
// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns
// false and e is unchanged. e and x can overlap.
func Sqrt(e, x *Element) (isSquare bool) {
candidate, b, i := &gfP{}, &gfP{}, &gfP{}
sqrtCandidate(candidate, x)
gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1
gfpMul(candidate, x, b) // a1=fb
gfpMul(i, two, candidate) // i=2(fb)
gfpMul(i, i, b) // i=2(fb)b
gfpSub(i, i, one) // i=2(fb)b-1
gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1)
square := new(Element).Square(i)
if square.Equal(x) != 1 {
return false
}
e.Set(i)
return true
}
// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap.
func sqrtCandidate(z, x *Element) {
// Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate.
//
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
}
`

View File

@ -260,15 +260,16 @@ func (e *gfP2) Frobenius(a *gfP2) *gfP2 {
}
// Sqrt method is only required when we implement compressed format
// TODO: use addchain to improve performance for 3 exp operations.
func (ret *gfP2) Sqrt(a *gfP2) *gfP2 {
// Algorithm 10 https://eprint.iacr.org/2012/685.pdf
// TODO
ret.SetZero()
c := &twistGen.x
b, b2, bq := &gfP2{}, &gfP2{}, &gfP2{}
b.Exp(a, pMinus1Over4)
b = b.expPMinus1Over4(a)
b2.Mul(b, b)
bq.Exp(b, p)
bq = bq.expP(b)
t := &gfP2{}
x0 := &gfP{}
@ -286,7 +287,7 @@ func (ret *gfP2) Sqrt(a *gfP2) *gfP2 {
ret.Set(t)
} else {
d, e, f := &gfP2{}, &gfP2{}, &gfP2{}
d.Exp(c, pMinus1Over2Big)
d = d.expPMinus1Over2(c)
e.Mul(d, c)
f.Square(e)
e.Invert(e)

640
sm9/bn256/gfp2_sqrt.go Normal file
View File

@ -0,0 +1,640 @@
// Code generated by addchain. DO NOT EDIT.
package bn256
func (e *gfP2) expPMinus1Over4(x *gfP2) *gfP2 {
// The sequence of 53 multiplications and 249 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _100 = 2*_10
// _110 = _10 + _100
// _1010 = _100 + _110
// _1011 = 1 + _1010
// _1101 = _10 + _1011
// _10000 = _110 + _1010
// _10101 = _1010 + _1011
// _11011 = _110 + _10101
// _11101 = _10 + _11011
// _11111 = _10 + _11101
// _101001 = _1010 + _11111
// _101011 = _10 + _101001
// _111011 = _10000 + _101011
// _1000101 = _1010 + _111011
// _1001111 = _1010 + _1000101
// _1010001 = _10 + _1001111
// _1011011 = _1010 + _1010001
// _1011101 = _10 + _1011011
// _1011111 = _10 + _1011101
// _1100011 = _100 + _1011111
// _1101001 = _110 + _1100011
// _1101101 = _100 + _1101001
// _1101111 = _10 + _1101101
// _1110101 = _110 + _1101111
// i71 = ((_1011011 << 3 + 1) << 33 + _10101) << 8
// i93 = ((_11101 + i71) << 9 + _1101111) << 10 + _1110101
// i115 = ((2*i93 + 1) << 14 + _1110101) << 5
// i128 = 2*((_1101 + i115) << 9 + _1110101) + _10101
// i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8
// i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111
// i200 = ((i173 << 8 + _101001) << 9 + _1101001) << 8
// i219 = ((_1100011 + i200) << 8 + _1001111) << 8 + _1011101
// i243 = ((i219 << 7 + _1101101) << 7 + _1011111) << 8
// i259 = ((_101011 + i243) << 6 + _11111) << 7 + _11011
// i285 = ((i259 << 9 + _1001111) << 7 + _1100011) << 8
// return ((_1010001 + i285) << 8 + _1000101) << 6 + _11111
//
var z = new(gfP2).Set(e)
var t0 = new(gfP2)
var t1 = new(gfP2)
var t2 = new(gfP2)
var t3 = new(gfP2)
var t4 = new(gfP2)
var t5 = new(gfP2)
var t6 = new(gfP2)
var t7 = new(gfP2)
var t8 = new(gfP2)
var t9 = new(gfP2)
var t10 = new(gfP2)
var t11 = new(gfP2)
var t12 = new(gfP2)
var t13 = new(gfP2)
var t14 = new(gfP2)
var t15 = new(gfP2)
var t16 = new(gfP2)
var t17 = new(gfP2)
var t18 = new(gfP2)
t17.Square(x)
t7.Square(t17)
t15.Mul(t17, t7)
t2.Mul(t7, t15)
t13.Mul(x, t2)
t16.Mul(t17, t13)
t0.Mul(t15, t2)
t14.Mul(t2, t13)
t4.Mul(t15, t14)
t11.Mul(t17, t4)
z.Mul(t17, t11)
t10.Mul(t2, z)
t5.Mul(t17, t10)
t12.Mul(t0, t5)
t0.Mul(t2, t12)
t3.Mul(t2, t0)
t1.Mul(t17, t3)
t18.Mul(t2, t1)
t8.Mul(t17, t18)
t6.Mul(t17, t8)
t2.Mul(t7, t6)
t9.Mul(t15, t2)
t7.Mul(t7, t9)
t17.Mul(t17, t7)
t15.Mul(t15, t17)
for s := 0; s < 3; s++ {
t18.Square(t18)
}
t18.Mul(x, t18)
for s := 0; s < 33; s++ {
t18.Square(t18)
}
t18.Mul(t14, t18)
for s := 0; s < 8; s++ {
t18.Square(t18)
}
t18.Mul(t11, t18)
for s := 0; s < 9; s++ {
t18.Square(t18)
}
t17.Mul(t17, t18)
for s := 0; s < 10; s++ {
t17.Square(t17)
}
t17.Mul(t15, t17)
t17.Square(t17)
t17.Mul(x, t17)
for s := 0; s < 14; s++ {
t17.Square(t17)
}
t17.Mul(t15, t17)
for s := 0; s < 5; s++ {
t17.Square(t17)
}
t16.Mul(t16, t17)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t15.Mul(t15, t16)
t15.Square(t15)
t14.Mul(t14, t15)
for s := 0; s < 5; s++ {
t14.Square(t14)
}
t13.Mul(t13, t14)
for s := 0; s < 9; s++ {
t13.Square(t13)
}
t12.Mul(t12, t13)
for s := 0; s < 8; s++ {
t12.Square(t12)
}
t11.Mul(t11, t12)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t11.Mul(t10, t11)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t11.Mul(z, t11)
for s := 0; s < 8; s++ {
t11.Square(t11)
}
t10.Mul(t10, t11)
for s := 0; s < 9; s++ {
t10.Square(t10)
}
t9.Mul(t9, t10)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t9.Mul(t2, t9)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t9.Mul(t3, t9)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t8.Mul(t8, t9)
for s := 0; s < 7; s++ {
t8.Square(t8)
}
t7.Mul(t7, t8)
for s := 0; s < 7; s++ {
t7.Square(t7)
}
t6.Mul(t6, t7)
for s := 0; s < 8; s++ {
t6.Square(t6)
}
t5.Mul(t5, t6)
for s := 0; s < 6; s++ {
t5.Square(t5)
}
t5.Mul(z, t5)
for s := 0; s < 7; s++ {
t5.Square(t5)
}
t4.Mul(t4, t5)
for s := 0; s < 9; s++ {
t4.Square(t4)
}
t3.Mul(t3, t4)
for s := 0; s < 7; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 8; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 8; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 6; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
return e.Set(z)
}
func (e *gfP2) expP(x *gfP2) *gfP2 {
// The sequence of 56 multiplications and 250 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _11 = 1 + _10
// _100 = 1 + _11
// _101 = 1 + _100
// _1000 = _11 + _101
// _1001 = 1 + _1000
// _1011 = _10 + _1001
// _1101 = _10 + _1011
// _10101 = _1000 + _1101
// _11001 = _100 + _10101
// _11101 = _100 + _11001
// _11111 = _10 + _11101
// _100011 = _100 + _11111
// _100101 = _10 + _100011
// _101001 = _100 + _100101
// _101011 = _10 + _101001
// _101101 = _10 + _101011
// _101111 = _10 + _101101
// _110011 = _100 + _101111
// _110101 = _10 + _110011
// _110111 = _10 + _110101
// _111011 = _100 + _110111
// _111101 = _10 + _111011
// _111111 = _10 + _111101
// _1011010 = _11101 + _111101
// i71 = ((_1011010 << 3 + _1001) << 33 + _10101) << 8
// i88 = ((_11101 + i71) << 8 + _110111) << 6 + _100011
// i115 = ((i88 << 6 + _101011) << 12 + _11101) << 7
// i128 = ((_101101 + i115) << 8 + _111111) << 2 + _11
// i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8
// i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111
// i195 = ((i173 << 8 + _101001) << 6 + _1101) << 6
// i213 = ((_1011 + i195) << 7 + _1101) << 8 + _111101
// i234 = ((i213 << 7 + _111011) << 6 + _101101) << 6
// i250 = ((_101111 + i234) << 6 + _100101) << 7 + _110111
// i272 = ((i250 << 6 + _110011) << 6 + _11001) << 8
// i290 = ((_111111 + i272) << 9 + _110101) << 6 + _101
// return (i290 << 9 + _101011) << 5 + _11101
//
var z = new(gfP2).Set(e)
var t0 = new(gfP2)
var t1 = new(gfP2)
var t2 = new(gfP2)
var t3 = new(gfP2)
var t4 = new(gfP2)
var t5 = new(gfP2)
var t6 = new(gfP2)
var t7 = new(gfP2)
var t8 = new(gfP2)
var t9 = new(gfP2)
var t10 = new(gfP2)
var t11 = new(gfP2)
var t12 = new(gfP2)
var t13 = new(gfP2)
var t14 = new(gfP2)
var t15 = new(gfP2)
var t16 = new(gfP2)
var t17 = new(gfP2)
var t18 = new(gfP2)
var t19 = new(gfP2)
var t20 = new(gfP2)
t3.Square(x)
t16.Mul(x, t3)
t10.Mul(x, t16)
t1.Mul(x, t10)
z.Mul(t16, t1)
t19.Mul(x, z)
t13.Mul(t3, t19)
t12.Mul(t3, t13)
t18.Mul(z, t12)
t4.Mul(t10, t18)
z.Mul(t10, t4)
t15.Mul(t3, z)
t17.Mul(t10, t15)
t7.Mul(t3, t17)
t14.Mul(t10, t7)
t0.Mul(t3, t14)
t9.Mul(t3, t0)
t8.Mul(t3, t9)
t5.Mul(t10, t8)
t2.Mul(t3, t5)
t6.Mul(t3, t2)
t10.Mul(t10, t6)
t11.Mul(t3, t10)
t3.Mul(t3, t11)
t20.Mul(z, t11)
for s := 0; s < 3; s++ {
t20.Square(t20)
}
t19.Mul(t19, t20)
for s := 0; s < 33; s++ {
t19.Square(t19)
}
t18.Mul(t18, t19)
for s := 0; s < 8; s++ {
t18.Square(t18)
}
t18.Mul(z, t18)
for s := 0; s < 8; s++ {
t18.Square(t18)
}
t18.Mul(t6, t18)
for s := 0; s < 6; s++ {
t18.Square(t18)
}
t17.Mul(t17, t18)
for s := 0; s < 6; s++ {
t17.Square(t17)
}
t17.Mul(t0, t17)
for s := 0; s < 12; s++ {
t17.Square(t17)
}
t17.Mul(z, t17)
for s := 0; s < 7; s++ {
t17.Square(t17)
}
t17.Mul(t9, t17)
for s := 0; s < 8; s++ {
t17.Square(t17)
}
t17.Mul(t3, t17)
for s := 0; s < 2; s++ {
t17.Square(t17)
}
t16.Mul(t16, t17)
for s := 0; s < 5; s++ {
t16.Square(t16)
}
t16.Mul(t13, t16)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t16.Mul(t10, t16)
for s := 0; s < 8; s++ {
t16.Square(t16)
}
t16.Mul(z, t16)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t16.Mul(t14, t16)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t15.Mul(t15, t16)
for s := 0; s < 8; s++ {
t15.Square(t15)
}
t14.Mul(t14, t15)
for s := 0; s < 6; s++ {
t14.Square(t14)
}
t14.Mul(t12, t14)
for s := 0; s < 6; s++ {
t14.Square(t14)
}
t13.Mul(t13, t14)
for s := 0; s < 7; s++ {
t13.Square(t13)
}
t12.Mul(t12, t13)
for s := 0; s < 8; s++ {
t12.Square(t12)
}
t11.Mul(t11, t12)
for s := 0; s < 7; s++ {
t11.Square(t11)
}
t10.Mul(t10, t11)
for s := 0; s < 6; s++ {
t10.Square(t10)
}
t9.Mul(t9, t10)
for s := 0; s < 6; s++ {
t9.Square(t9)
}
t8.Mul(t8, t9)
for s := 0; s < 6; s++ {
t8.Square(t8)
}
t7.Mul(t7, t8)
for s := 0; s < 7; s++ {
t7.Square(t7)
}
t6.Mul(t6, t7)
for s := 0; s < 6; s++ {
t6.Square(t6)
}
t5.Mul(t5, t6)
for s := 0; s < 6; s++ {
t5.Square(t5)
}
t4.Mul(t4, t5)
for s := 0; s < 8; s++ {
t4.Square(t4)
}
t3.Mul(t3, t4)
for s := 0; s < 9; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 6; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 9; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 5; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
return e.Set(z)
}
func (e *gfP2) expPMinus1Over2(x *gfP2) *gfP2 {
// The sequence of 53 multiplications and 250 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _100 = 2*_10
// _110 = _10 + _100
// _1010 = _100 + _110
// _1011 = 1 + _1010
// _1101 = _10 + _1011
// _10000 = _110 + _1010
// _10101 = _1010 + _1011
// _11011 = _110 + _10101
// _11101 = _10 + _11011
// _11111 = _10 + _11101
// _101001 = _1010 + _11111
// _101011 = _10 + _101001
// _111011 = _10000 + _101011
// _1000101 = _1010 + _111011
// _1001111 = _1010 + _1000101
// _1010001 = _10 + _1001111
// _1011011 = _1010 + _1010001
// _1011101 = _10 + _1011011
// _1011111 = _10 + _1011101
// _1100011 = _100 + _1011111
// _1101001 = _110 + _1100011
// _1101101 = _100 + _1101001
// _1101111 = _10 + _1101101
// _1110101 = _110 + _1101111
// i71 = ((_1011011 << 3 + 1) << 33 + _10101) << 8
// i93 = ((_11101 + i71) << 9 + _1101111) << 10 + _1110101
// i115 = ((2*i93 + 1) << 14 + _1110101) << 5
// i128 = 2*((_1101 + i115) << 9 + _1110101) + _10101
// i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8
// i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111
// i200 = ((i173 << 8 + _101001) << 9 + _1101001) << 8
// i219 = ((_1100011 + i200) << 8 + _1001111) << 8 + _1011101
// i243 = ((i219 << 7 + _1101101) << 7 + _1011111) << 8
// i259 = ((_101011 + i243) << 6 + _11111) << 7 + _11011
// i285 = ((i259 << 9 + _1001111) << 7 + _1100011) << 8
// i302 = ((_1010001 + i285) << 8 + _1000101) << 6 + _11111
// return 2*i302
//
var z = new(gfP2).Set(e)
var t0 = new(gfP2)
var t1 = new(gfP2)
var t2 = new(gfP2)
var t3 = new(gfP2)
var t4 = new(gfP2)
var t5 = new(gfP2)
var t6 = new(gfP2)
var t7 = new(gfP2)
var t8 = new(gfP2)
var t9 = new(gfP2)
var t10 = new(gfP2)
var t11 = new(gfP2)
var t12 = new(gfP2)
var t13 = new(gfP2)
var t14 = new(gfP2)
var t15 = new(gfP2)
var t16 = new(gfP2)
var t17 = new(gfP2)
var t18 = new(gfP2)
t17.Square(x)
t7.Square(t17)
t15.Mul(t17, t7)
t2.Mul(t7, t15)
t13.Mul(x, t2)
t16.Mul(t17, t13)
t0.Mul(t15, t2)
t14.Mul(t2, t13)
t4.Mul(t15, t14)
t11.Mul(t17, t4)
z.Mul(t17, t11)
t10.Mul(t2, z)
t5.Mul(t17, t10)
t12.Mul(t0, t5)
t0.Mul(t2, t12)
t3.Mul(t2, t0)
t1.Mul(t17, t3)
t18.Mul(t2, t1)
t8.Mul(t17, t18)
t6.Mul(t17, t8)
t2.Mul(t7, t6)
t9.Mul(t15, t2)
t7.Mul(t7, t9)
t17.Mul(t17, t7)
t15.Mul(t15, t17)
for s := 0; s < 3; s++ {
t18.Square(t18)
}
t18.Mul(x, t18)
for s := 0; s < 33; s++ {
t18.Square(t18)
}
t18.Mul(t14, t18)
for s := 0; s < 8; s++ {
t18.Square(t18)
}
t18.Mul(t11, t18)
for s := 0; s < 9; s++ {
t18.Square(t18)
}
t17.Mul(t17, t18)
for s := 0; s < 10; s++ {
t17.Square(t17)
}
t17.Mul(t15, t17)
t17.Square(t17)
t17.Mul(x, t17)
for s := 0; s < 14; s++ {
t17.Square(t17)
}
t17.Mul(t15, t17)
for s := 0; s < 5; s++ {
t17.Square(t17)
}
t16.Mul(t16, t17)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t15.Mul(t15, t16)
t15.Square(t15)
t14.Mul(t14, t15)
for s := 0; s < 5; s++ {
t14.Square(t14)
}
t13.Mul(t13, t14)
for s := 0; s < 9; s++ {
t13.Square(t13)
}
t12.Mul(t12, t13)
for s := 0; s < 8; s++ {
t12.Square(t12)
}
t11.Mul(t11, t12)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t11.Mul(t10, t11)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t11.Mul(z, t11)
for s := 0; s < 8; s++ {
t11.Square(t11)
}
t10.Mul(t10, t11)
for s := 0; s < 9; s++ {
t10.Square(t10)
}
t9.Mul(t9, t10)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t9.Mul(t2, t9)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t9.Mul(t3, t9)
for s := 0; s < 8; s++ {
t9.Square(t9)
}
t8.Mul(t8, t9)
for s := 0; s < 7; s++ {
t8.Square(t8)
}
t7.Mul(t7, t8)
for s := 0; s < 7; s++ {
t7.Square(t7)
}
t6.Mul(t6, t7)
for s := 0; s < 8; s++ {
t6.Square(t6)
}
t5.Mul(t5, t6)
for s := 0; s < 6; s++ {
t5.Square(t5)
}
t5.Mul(z, t5)
for s := 0; s < 7; s++ {
t5.Square(t5)
}
t4.Mul(t4, t5)
for s := 0; s < 9; s++ {
t4.Square(t4)
}
t3.Mul(t3, t4)
for s := 0; s < 7; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 8; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 8; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 6; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
z.Square(z)
return e.Set(z)
}

View File

@ -1,221 +0,0 @@
// Code generated by addchain. DO NOT EDIT.
package bn256
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *gfP) Invert(x *gfP) *gfP {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 56 multiplications and 250 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _100 = 2*_10
// _110 = _10 + _100
// _1010 = _100 + _110
// _1011 = 1 + _1010
// _1101 = _10 + _1011
// _10000 = _110 + _1010
// _10101 = _1010 + _1011
// _11011 = _110 + _10101
// _11101 = _10 + _11011
// _11111 = _10 + _11101
// _101001 = _1010 + _11111
// _101011 = _10 + _101001
// _111011 = _10000 + _101011
// _1000101 = _1010 + _111011
// _1001111 = _1010 + _1000101
// _1010001 = _10 + _1001111
// _1011011 = _1010 + _1010001
// _1011101 = _10 + _1011011
// _1011111 = _10 + _1011101
// _1100011 = _100 + _1011111
// _1101001 = _110 + _1100011
// _1101101 = _100 + _1101001
// _1101111 = _10 + _1101101
// _1110101 = _110 + _1101111
// _1111011 = _110 + _1110101
// _10110110 = _111011 + _1111011
// i72 = ((_10110110 << 2 + 1) << 33 + _10101) << 8
// i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101
// i116 = ((2*i94 + 1) << 14 + _1110101) << 5
// i129 = 2*((_1101 + i116) << 9 + _1111011 + _100)
// i146 = ((1 + i129) << 5 + _1011) << 9 + _111011
// i174 = ((i146 << 8 + _11101) << 9 + _101001) << 9
// i194 = ((_11111 + i174) << 8 + _101001) << 9 + _1101001
// i220 = ((i194 << 8 + _1100011) << 8 + _1001111) << 8
// i237 = ((_1011101 + i220) << 7 + _1101101) << 7 + _1011111
// i260 = ((i237 << 8 + _101011) << 6 + _11111) << 7
// i279 = ((_11011 + i260) << 9 + _1001111) << 7 + _1100011
// i305 = ((i279 << 8 + _1010001) << 8 + _1000101) << 8
// return _1111011 + i305
//
var z = new(gfP).Set(e)
var t0 = new(gfP)
var t1 = new(gfP)
var t2 = new(gfP)
var t3 = new(gfP)
var t4 = new(gfP)
var t5 = new(gfP)
var t6 = new(gfP)
var t7 = new(gfP)
var t8 = new(gfP)
var t9 = new(gfP)
var t10 = new(gfP)
var t11 = new(gfP)
var t12 = new(gfP)
var t13 = new(gfP)
var t14 = new(gfP)
var t15 = new(gfP)
var t16 = new(gfP)
var t17 = new(gfP)
var t18 = new(gfP)
var t19 = new(gfP)
var t20 = new(gfP)
t17.Square(x)
t15.Square(t17)
z.Mul(t17, t15)
t2.Mul(t15, z)
t14.Mul(x, t2)
t16.Mul(t17, t14)
t0.Mul(z, t2)
t19.Mul(t2, t14)
t4.Mul(z, t19)
t12.Mul(t17, t4)
t5.Mul(t17, t12)
t11.Mul(t2, t5)
t6.Mul(t17, t11)
t13.Mul(t0, t6)
t0.Mul(t2, t13)
t3.Mul(t2, t0)
t1.Mul(t17, t3)
t2.Mul(t2, t1)
t9.Mul(t17, t2)
t7.Mul(t17, t9)
t2.Mul(t15, t7)
t10.Mul(z, t2)
t8.Mul(t15, t10)
t18.Mul(t17, t8)
t17.Mul(z, t18)
z.Mul(z, t17)
t20.Mul(t13, z)
for s := 0; s < 2; s++ {
t20.Square(t20)
}
t20.Mul(x, t20)
for s := 0; s < 33; s++ {
t20.Square(t20)
}
t19.Mul(t19, t20)
for s := 0; s < 8; s++ {
t19.Square(t19)
}
t19.Mul(t12, t19)
for s := 0; s < 9; s++ {
t19.Square(t19)
}
t18.Mul(t18, t19)
for s := 0; s < 10; s++ {
t18.Square(t18)
}
t18.Mul(t17, t18)
t18.Square(t18)
t18.Mul(x, t18)
for s := 0; s < 14; s++ {
t18.Square(t18)
}
t17.Mul(t17, t18)
for s := 0; s < 5; s++ {
t17.Square(t17)
}
t16.Mul(t16, t17)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t16.Mul(z, t16)
t15.Mul(t15, t16)
t15.Square(t15)
t15.Mul(x, t15)
for s := 0; s < 5; s++ {
t15.Square(t15)
}
t14.Mul(t14, t15)
for s := 0; s < 9; s++ {
t14.Square(t14)
}
t13.Mul(t13, t14)
for s := 0; s < 8; s++ {
t13.Square(t13)
}
t12.Mul(t12, t13)
for s := 0; s < 9; s++ {
t12.Square(t12)
}
t12.Mul(t11, t12)
for s := 0; s < 9; s++ {
t12.Square(t12)
}
t12.Mul(t5, t12)
for s := 0; s < 8; s++ {
t12.Square(t12)
}
t11.Mul(t11, t12)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t10.Mul(t10, t11)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t10.Mul(t2, t10)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t10.Mul(t3, t10)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t9.Mul(t9, t10)
for s := 0; s < 7; s++ {
t9.Square(t9)
}
t8.Mul(t8, t9)
for s := 0; s < 7; s++ {
t8.Square(t8)
}
t7.Mul(t7, t8)
for s := 0; s < 8; s++ {
t7.Square(t7)
}
t6.Mul(t6, t7)
for s := 0; s < 6; s++ {
t6.Square(t6)
}
t5.Mul(t5, t6)
for s := 0; s < 7; s++ {
t5.Square(t5)
}
t4.Mul(t4, t5)
for s := 0; s < 9; s++ {
t4.Square(t4)
}
t3.Mul(t3, t4)
for s := 0; s < 7; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 8; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 8; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 8; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
return e.Set(z)
}

View File

@ -1,6 +1,225 @@
// Code generated by addchain. DO NOT EDIT.
package bn256
// Invert sets e = 1/x, and returns e.
//
// If x == 0, Invert returns e = 0.
func (e *gfP) Invert(x *gfP) *gfP {
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 56 multiplications and 250 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _100 = 2*_10
// _110 = _10 + _100
// _1010 = _100 + _110
// _1011 = 1 + _1010
// _1101 = _10 + _1011
// _10000 = _110 + _1010
// _10101 = _1010 + _1011
// _11011 = _110 + _10101
// _11101 = _10 + _11011
// _11111 = _10 + _11101
// _101001 = _1010 + _11111
// _101011 = _10 + _101001
// _111011 = _10000 + _101011
// _1000101 = _1010 + _111011
// _1001111 = _1010 + _1000101
// _1010001 = _10 + _1001111
// _1011011 = _1010 + _1010001
// _1011101 = _10 + _1011011
// _1011111 = _10 + _1011101
// _1100011 = _100 + _1011111
// _1101001 = _110 + _1100011
// _1101101 = _100 + _1101001
// _1101111 = _10 + _1101101
// _1110101 = _110 + _1101111
// _1111011 = _110 + _1110101
// _10110110 = _111011 + _1111011
// i72 = ((_10110110 << 2 + 1) << 33 + _10101) << 8
// i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101
// i116 = ((2*i94 + 1) << 14 + _1110101) << 5
// i129 = 2*((_1101 + i116) << 9 + _1111011 + _100)
// i146 = ((1 + i129) << 5 + _1011) << 9 + _111011
// i174 = ((i146 << 8 + _11101) << 9 + _101001) << 9
// i194 = ((_11111 + i174) << 8 + _101001) << 9 + _1101001
// i220 = ((i194 << 8 + _1100011) << 8 + _1001111) << 8
// i237 = ((_1011101 + i220) << 7 + _1101101) << 7 + _1011111
// i260 = ((i237 << 8 + _101011) << 6 + _11111) << 7
// i279 = ((_11011 + i260) << 9 + _1001111) << 7 + _1100011
// i305 = ((i279 << 8 + _1010001) << 8 + _1000101) << 8
// return _1111011 + i305
//
var z = new(gfP).Set(e)
var t0 = new(gfP)
var t1 = new(gfP)
var t2 = new(gfP)
var t3 = new(gfP)
var t4 = new(gfP)
var t5 = new(gfP)
var t6 = new(gfP)
var t7 = new(gfP)
var t8 = new(gfP)
var t9 = new(gfP)
var t10 = new(gfP)
var t11 = new(gfP)
var t12 = new(gfP)
var t13 = new(gfP)
var t14 = new(gfP)
var t15 = new(gfP)
var t16 = new(gfP)
var t17 = new(gfP)
var t18 = new(gfP)
var t19 = new(gfP)
var t20 = new(gfP)
t17.Square(x)
t15.Square(t17)
z.Mul(t17, t15)
t2.Mul(t15, z)
t14.Mul(x, t2)
t16.Mul(t17, t14)
t0.Mul(z, t2)
t19.Mul(t2, t14)
t4.Mul(z, t19)
t12.Mul(t17, t4)
t5.Mul(t17, t12)
t11.Mul(t2, t5)
t6.Mul(t17, t11)
t13.Mul(t0, t6)
t0.Mul(t2, t13)
t3.Mul(t2, t0)
t1.Mul(t17, t3)
t2.Mul(t2, t1)
t9.Mul(t17, t2)
t7.Mul(t17, t9)
t2.Mul(t15, t7)
t10.Mul(z, t2)
t8.Mul(t15, t10)
t18.Mul(t17, t8)
t17.Mul(z, t18)
z.Mul(z, t17)
t20.Mul(t13, z)
for s := 0; s < 2; s++ {
t20.Square(t20)
}
t20.Mul(x, t20)
for s := 0; s < 33; s++ {
t20.Square(t20)
}
t19.Mul(t19, t20)
for s := 0; s < 8; s++ {
t19.Square(t19)
}
t19.Mul(t12, t19)
for s := 0; s < 9; s++ {
t19.Square(t19)
}
t18.Mul(t18, t19)
for s := 0; s < 10; s++ {
t18.Square(t18)
}
t18.Mul(t17, t18)
t18.Square(t18)
t18.Mul(x, t18)
for s := 0; s < 14; s++ {
t18.Square(t18)
}
t17.Mul(t17, t18)
for s := 0; s < 5; s++ {
t17.Square(t17)
}
t16.Mul(t16, t17)
for s := 0; s < 9; s++ {
t16.Square(t16)
}
t16.Mul(z, t16)
t15.Mul(t15, t16)
t15.Square(t15)
t15.Mul(x, t15)
for s := 0; s < 5; s++ {
t15.Square(t15)
}
t14.Mul(t14, t15)
for s := 0; s < 9; s++ {
t14.Square(t14)
}
t13.Mul(t13, t14)
for s := 0; s < 8; s++ {
t13.Square(t13)
}
t12.Mul(t12, t13)
for s := 0; s < 9; s++ {
t12.Square(t12)
}
t12.Mul(t11, t12)
for s := 0; s < 9; s++ {
t12.Square(t12)
}
t12.Mul(t5, t12)
for s := 0; s < 8; s++ {
t12.Square(t12)
}
t11.Mul(t11, t12)
for s := 0; s < 9; s++ {
t11.Square(t11)
}
t10.Mul(t10, t11)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t10.Mul(t2, t10)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t10.Mul(t3, t10)
for s := 0; s < 8; s++ {
t10.Square(t10)
}
t9.Mul(t9, t10)
for s := 0; s < 7; s++ {
t9.Square(t9)
}
t8.Mul(t8, t9)
for s := 0; s < 7; s++ {
t8.Square(t8)
}
t7.Mul(t7, t8)
for s := 0; s < 8; s++ {
t7.Square(t7)
}
t6.Mul(t6, t7)
for s := 0; s < 6; s++ {
t6.Square(t6)
}
t5.Mul(t5, t6)
for s := 0; s < 7; s++ {
t5.Square(t5)
}
t4.Mul(t4, t5)
for s := 0; s < 9; s++ {
t4.Square(t4)
}
t3.Mul(t3, t4)
for s := 0; s < 7; s++ {
t3.Square(t3)
}
t2.Mul(t2, t3)
for s := 0; s < 8; s++ {
t2.Square(t2)
}
t1.Mul(t1, t2)
for s := 0; s < 8; s++ {
t1.Square(t1)
}
t0.Mul(t0, t1)
for s := 0; s < 8; s++ {
t0.Square(t0)
}
z.Mul(z, t0)
return e.Set(z)
}
// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns
// false and e is unchanged. e and x can overlap.
func Sqrt(e, x *gfP) (isSquare bool) {

View File

@ -1,118 +0,0 @@
//go:build ignore
// +build ignore
package main
import (
"bytes"
"go/format"
"io"
"log"
"os"
"os/exec"
)
// Running this generator requires addchain v0.4.0, which can be installed with
//
// go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0
//
func main() {
tmplAddchainFile, err := os.CreateTemp("", "addchain-template")
if err != nil {
log.Fatal(err)
}
defer os.Remove(tmplAddchainFile.Name())
if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil {
log.Fatal(err)
}
if err := tmplAddchainFile.Close(); err != nil {
log.Fatal(err)
}
log.Printf("Generating gfp_sqrt.go...")
f, err := os.CreateTemp("", "addchain-gfp")
if err != nil {
log.Fatal(err)
}
defer os.Remove(f.Name())
cmd := exec.Command("addchain", "search", "0x16c80000005474de3ac07569feb1d8e8a43e5269634f5ddb7cadf364fc6a28af")
cmd.Stderr = os.Stderr
cmd.Stdout = f
if err := cmd.Run(); err != nil {
log.Fatal(err)
}
if err := f.Close(); err != nil {
log.Fatal(err)
}
cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name())
cmd.Stderr = os.Stderr
out, err := cmd.Output()
if err != nil {
log.Fatal(err)
}
out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1)
out, err = format.Source(out)
if err != nil {
log.Fatal(err)
}
if err := os.WriteFile("gfp_sqrt.go", out, 0644); err != nil {
log.Fatal(err)
}
}
const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT.
package bn256
// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns
// false and e is unchanged. e and x can overlap.
func Sqrt(e, x *Element) (isSquare bool) {
candidate, b, i := &gfP{}, &gfP{}, &gfP{}
sqrtCandidate(candidate, x)
gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1
gfpMul(candidate, x, b) // a1=fb
gfpMul(i, two, candidate) // i=2(fb)
gfpMul(i, i, b) // i=2(fb)b
gfpSub(i, i, one) // i=2(fb)b-1
gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1)
square := new(Element).Square(i)
if square.Equal(x) != 1 {
return false
}
e.Set(i)
return true
}
// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap.
func sqrtCandidate(z, x *Element) {
// Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate.
//
// The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the
// following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}.
//
{{- range lines (format .Script) }}
// {{ . }}
{{- end }}
//
{{- range .Program.Temporaries }}
var {{ . }} = new(Element)
{{- end }}
{{ range $i := .Program.Instructions -}}
{{- with add $i.Op }}
{{ $i.Output }}.Mul({{ .X }}, {{ .Y }})
{{- end -}}
{{- with double $i.Op }}
{{ $i.Output }}.Square({{ .X }})
{{- end -}}
{{- with shift $i.Op -}}
{{- $first := 0 -}}
{{- if ne $i.Output.Identifier .X.Identifier }}
{{ $i.Output }}.Square({{ .X }})
{{- $first = 1 -}}
{{- end }}
for s := {{ $first }}; s < {{ .S }}; s++ {
{{ $i.Output }}.Square({{ $i.Output }})
}
{{- end -}}
{{- end }}
}
`