From 13ddfad8e4d6468c0d9a7267dd0248c952b709a7 Mon Sep 17 00:00:00 2001 From: emmansun Date: Sat, 29 Apr 2023 13:33:44 +0800 Subject: [PATCH] sm9: optimize generate and gfP2 sqrt --- sm9/bn256/g1.go | 4 +- sm9/bn256/generate.go | 239 ++++++- sm9/bn256/gfp2.go | 7 +- sm9/bn256/gfp2_sqrt.go | 640 ++++++++++++++++++ sm9/bn256/gfp_invert.go | 221 ------ sm9/bn256/{gfp_sqrt.go => gfp_invert_sqrt.go} | 219 ++++++ sm9/generate.go | 118 ---- 7 files changed, 1086 insertions(+), 362 deletions(-) create mode 100644 sm9/bn256/gfp2_sqrt.go delete mode 100644 sm9/bn256/gfp_invert.go rename sm9/bn256/{gfp_sqrt.go => gfp_invert_sqrt.go} (52%) delete mode 100644 sm9/generate.go diff --git a/sm9/bn256/g1.go b/sm9/bn256/g1.go index a1777aa..78cb762 100644 --- a/sm9/bn256/g1.go +++ b/sm9/bn256/g1.go @@ -256,7 +256,9 @@ func (e *G1) UnmarshalCompressed(data []byte) ([]byte, error) { e.p.x.Unmarshal(data[1:]) montEncode(&e.p.x, &e.p.x) x3 := e.p.polynomial(&e.p.x) - e.p.y.Sqrt(x3) + if !Sqrt(&e.p.y, x3) { + return nil, errors.New("sm9.G1: invalid compressed point encoding") + } montDecode(x3, &e.p.y) if byte(x3[0]&1) != data[0]&1 { gfpNeg(&e.p.y, &e.p.y) diff --git a/sm9/bn256/generate.go b/sm9/bn256/generate.go index ab46952..ac5b1c7 100644 --- a/sm9/bn256/generate.go +++ b/sm9/bn256/generate.go @@ -17,50 +17,196 @@ import ( // go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0 // -func main() { - tmplAddchainFile, err := os.CreateTemp("", "addchain-template") +func generate(template, exp, element string) ([]byte, error) { + tmplAddchainFileInvert, err := os.CreateTemp("", "addchain-template") if err != nil { - log.Fatal(err) + return nil, err } - defer os.Remove(tmplAddchainFile.Name()) - if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil { - log.Fatal(err) + defer os.Remove(tmplAddchainFileInvert.Name()) + if _, err := io.WriteString(tmplAddchainFileInvert, template); err != nil { + return nil, err } - if err := tmplAddchainFile.Close(); err != nil { - log.Fatal(err) + if err := tmplAddchainFileInvert.Close(); err != nil { + return nil, err } - log.Printf("Generating gfp_invert.go...") + f, err := os.CreateTemp("", "addchain-gfp") if err != nil { - log.Fatal(err) + return nil, err } defer os.Remove(f.Name()) - cmd := exec.Command("addchain", "search", "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b") + cmd := exec.Command("addchain", "search", exp) cmd.Stderr = os.Stderr cmd.Stdout = f if err := cmd.Run(); err != nil { - log.Fatal(err) + return nil, err } if err := f.Close(); err != nil { - log.Fatal(err) + return nil, err } - cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name()) + cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFileInvert.Name(), f.Name()) cmd.Stderr = os.Stderr out, err := cmd.Output() if err != nil { - log.Fatal(err) + return nil, err } - out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1) - out, err = format.Source(out) + out = bytes.Replace(out, []byte("Element"), []byte(element), -1) + return format.Source(out) +} + +func writeFile(fileName string, buffers ...[]byte) error { + log.Printf("Generating %v...", fileName) + f, err := os.Create(fileName) if err != nil { log.Fatal(err) } - if err := os.WriteFile("gfp_invert.go", out, 0644); err != nil { + defer f.Close() + for _, buffer := range buffers { + if _, err := f.Write(buffer); err != nil { + return err + } + } + return nil +} + +func main() { + out, err := generate(tmplAddchainInvert, "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b", "gfP") + if err != nil { + log.Fatal(err) + } + out1, err := generate(tmplAddchainSqrt, "0x16c80000005474de3ac07569feb1d8e8a43e5269634f5ddb7cadf364fc6a28af", "gfP") + if err != nil { + log.Fatal(err) + } + if err = writeFile("gfp_invert_sqrt.go", out, out1); err != nil { + log.Fatal(err) + } + + out, err = generate(tmplAddchainExp1, "0x2d90000000a8e9bc7580ead3fd63b1d1487ca4d2c69ebbb6f95be6c9f8d4515f", "gfP2") + if err != nil { + log.Fatal(err) + } + out1, err = generate(tmplAddchainExp2, "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457d", "gfP2") + if err != nil { + log.Fatal(err) + } + out2, err := generate(tmplAddchainExp3, "0x5b2000000151d378eb01d5a7fac763a290f949a58d3d776df2b7cd93f1a8a2be", "gfP2") + if err != nil { + log.Fatal(err) + } + if err = writeFile("gfp2_sqrt.go", out, out1, out2); err != nil { log.Fatal(err) } } -const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. +const tmplAddchainExp1 = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. +package bn256 + +func (e *Element) expPMinus1Over4(x *Element) *Element { + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + var z = new(Element).Set(e) + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} + return e.Set(z) +} +` + +const tmplAddchainExp2 = ` +func (e *Element) expP(x *Element) *Element { + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + var z = new(Element).Set(e) + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} + return e.Set(z) +} +` + +const tmplAddchainExp3 = ` +func (e *Element) expPMinus1Over2(x *Element) *Element { + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + var z = new(Element).Set(e) + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} + return e.Set(z) +} +` + +const tmplAddchainInvert = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. package bn256 // Invert sets e = 1/x, and returns e. // @@ -99,3 +245,58 @@ func (e *Element) Invert(x *Element) *Element { return e.Set(z) } ` + +const tmplAddchainSqrt = ` +// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns +// false and e is unchanged. e and x can overlap. +func Sqrt(e, x *Element) (isSquare bool) { + candidate, b, i := &gfP{}, &gfP{}, &gfP{} + sqrtCandidate(candidate, x) + gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1 + gfpMul(candidate, x, b) // a1=fb + gfpMul(i, two, candidate) // i=2(fb) + gfpMul(i, i, b) // i=2(fb)b + gfpSub(i, i, one) // i=2(fb)b-1 + gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1) + square := new(Element).Square(i) + if square.Equal(x) != 1 { + return false + } + e.Set(i) + return true +} + +// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap. +func sqrtCandidate(z, x *Element) { + // Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate. + // + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} +} +` diff --git a/sm9/bn256/gfp2.go b/sm9/bn256/gfp2.go index 17198cb..69e39a5 100644 --- a/sm9/bn256/gfp2.go +++ b/sm9/bn256/gfp2.go @@ -260,15 +260,16 @@ func (e *gfP2) Frobenius(a *gfP2) *gfP2 { } // Sqrt method is only required when we implement compressed format +// TODO: use addchain to improve performance for 3 exp operations. func (ret *gfP2) Sqrt(a *gfP2) *gfP2 { // Algorithm 10 https://eprint.iacr.org/2012/685.pdf // TODO ret.SetZero() c := &twistGen.x b, b2, bq := &gfP2{}, &gfP2{}, &gfP2{} - b.Exp(a, pMinus1Over4) + b = b.expPMinus1Over4(a) b2.Mul(b, b) - bq.Exp(b, p) + bq = bq.expP(b) t := &gfP2{} x0 := &gfP{} @@ -286,7 +287,7 @@ func (ret *gfP2) Sqrt(a *gfP2) *gfP2 { ret.Set(t) } else { d, e, f := &gfP2{}, &gfP2{}, &gfP2{} - d.Exp(c, pMinus1Over2Big) + d = d.expPMinus1Over2(c) e.Mul(d, c) f.Square(e) e.Invert(e) diff --git a/sm9/bn256/gfp2_sqrt.go b/sm9/bn256/gfp2_sqrt.go new file mode 100644 index 0000000..e0d7677 --- /dev/null +++ b/sm9/bn256/gfp2_sqrt.go @@ -0,0 +1,640 @@ +// Code generated by addchain. DO NOT EDIT. +package bn256 + +func (e *gfP2) expPMinus1Over4(x *gfP2) *gfP2 { + // The sequence of 53 multiplications and 249 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _110 = _10 + _100 + // _1010 = _100 + _110 + // _1011 = 1 + _1010 + // _1101 = _10 + _1011 + // _10000 = _110 + _1010 + // _10101 = _1010 + _1011 + // _11011 = _110 + _10101 + // _11101 = _10 + _11011 + // _11111 = _10 + _11101 + // _101001 = _1010 + _11111 + // _101011 = _10 + _101001 + // _111011 = _10000 + _101011 + // _1000101 = _1010 + _111011 + // _1001111 = _1010 + _1000101 + // _1010001 = _10 + _1001111 + // _1011011 = _1010 + _1010001 + // _1011101 = _10 + _1011011 + // _1011111 = _10 + _1011101 + // _1100011 = _100 + _1011111 + // _1101001 = _110 + _1100011 + // _1101101 = _100 + _1101001 + // _1101111 = _10 + _1101101 + // _1110101 = _110 + _1101111 + // i71 = ((_1011011 << 3 + 1) << 33 + _10101) << 8 + // i93 = ((_11101 + i71) << 9 + _1101111) << 10 + _1110101 + // i115 = ((2*i93 + 1) << 14 + _1110101) << 5 + // i128 = 2*((_1101 + i115) << 9 + _1110101) + _10101 + // i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8 + // i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111 + // i200 = ((i173 << 8 + _101001) << 9 + _1101001) << 8 + // i219 = ((_1100011 + i200) << 8 + _1001111) << 8 + _1011101 + // i243 = ((i219 << 7 + _1101101) << 7 + _1011111) << 8 + // i259 = ((_101011 + i243) << 6 + _11111) << 7 + _11011 + // i285 = ((i259 << 9 + _1001111) << 7 + _1100011) << 8 + // return ((_1010001 + i285) << 8 + _1000101) << 6 + _11111 + // + var z = new(gfP2).Set(e) + var t0 = new(gfP2) + var t1 = new(gfP2) + var t2 = new(gfP2) + var t3 = new(gfP2) + var t4 = new(gfP2) + var t5 = new(gfP2) + var t6 = new(gfP2) + var t7 = new(gfP2) + var t8 = new(gfP2) + var t9 = new(gfP2) + var t10 = new(gfP2) + var t11 = new(gfP2) + var t12 = new(gfP2) + var t13 = new(gfP2) + var t14 = new(gfP2) + var t15 = new(gfP2) + var t16 = new(gfP2) + var t17 = new(gfP2) + var t18 = new(gfP2) + + t17.Square(x) + t7.Square(t17) + t15.Mul(t17, t7) + t2.Mul(t7, t15) + t13.Mul(x, t2) + t16.Mul(t17, t13) + t0.Mul(t15, t2) + t14.Mul(t2, t13) + t4.Mul(t15, t14) + t11.Mul(t17, t4) + z.Mul(t17, t11) + t10.Mul(t2, z) + t5.Mul(t17, t10) + t12.Mul(t0, t5) + t0.Mul(t2, t12) + t3.Mul(t2, t0) + t1.Mul(t17, t3) + t18.Mul(t2, t1) + t8.Mul(t17, t18) + t6.Mul(t17, t8) + t2.Mul(t7, t6) + t9.Mul(t15, t2) + t7.Mul(t7, t9) + t17.Mul(t17, t7) + t15.Mul(t15, t17) + for s := 0; s < 3; s++ { + t18.Square(t18) + } + t18.Mul(x, t18) + for s := 0; s < 33; s++ { + t18.Square(t18) + } + t18.Mul(t14, t18) + for s := 0; s < 8; s++ { + t18.Square(t18) + } + t18.Mul(t11, t18) + for s := 0; s < 9; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 10; s++ { + t17.Square(t17) + } + t17.Mul(t15, t17) + t17.Square(t17) + t17.Mul(x, t17) + for s := 0; s < 14; s++ { + t17.Square(t17) + } + t17.Mul(t15, t17) + for s := 0; s < 5; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t15.Mul(t15, t16) + t15.Square(t15) + t14.Mul(t14, t15) + for s := 0; s < 5; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 9; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t11.Mul(t10, t11) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t11.Mul(z, t11) + for s := 0; s < 8; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 9; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t9.Mul(t2, t9) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t9.Mul(t3, t9) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 7; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 7; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 8; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 6; s++ { + t5.Square(t5) + } + t5.Mul(z, t5) + for s := 0; s < 7; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 9; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 7; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 8; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 8; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 6; s++ { + t0.Square(t0) + } + z.Mul(z, t0) + return e.Set(z) +} + +func (e *gfP2) expP(x *gfP2) *gfP2 { + // The sequence of 56 multiplications and 250 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _11 = 1 + _10 + // _100 = 1 + _11 + // _101 = 1 + _100 + // _1000 = _11 + _101 + // _1001 = 1 + _1000 + // _1011 = _10 + _1001 + // _1101 = _10 + _1011 + // _10101 = _1000 + _1101 + // _11001 = _100 + _10101 + // _11101 = _100 + _11001 + // _11111 = _10 + _11101 + // _100011 = _100 + _11111 + // _100101 = _10 + _100011 + // _101001 = _100 + _100101 + // _101011 = _10 + _101001 + // _101101 = _10 + _101011 + // _101111 = _10 + _101101 + // _110011 = _100 + _101111 + // _110101 = _10 + _110011 + // _110111 = _10 + _110101 + // _111011 = _100 + _110111 + // _111101 = _10 + _111011 + // _111111 = _10 + _111101 + // _1011010 = _11101 + _111101 + // i71 = ((_1011010 << 3 + _1001) << 33 + _10101) << 8 + // i88 = ((_11101 + i71) << 8 + _110111) << 6 + _100011 + // i115 = ((i88 << 6 + _101011) << 12 + _11101) << 7 + // i128 = ((_101101 + i115) << 8 + _111111) << 2 + _11 + // i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8 + // i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111 + // i195 = ((i173 << 8 + _101001) << 6 + _1101) << 6 + // i213 = ((_1011 + i195) << 7 + _1101) << 8 + _111101 + // i234 = ((i213 << 7 + _111011) << 6 + _101101) << 6 + // i250 = ((_101111 + i234) << 6 + _100101) << 7 + _110111 + // i272 = ((i250 << 6 + _110011) << 6 + _11001) << 8 + // i290 = ((_111111 + i272) << 9 + _110101) << 6 + _101 + // return (i290 << 9 + _101011) << 5 + _11101 + // + var z = new(gfP2).Set(e) + var t0 = new(gfP2) + var t1 = new(gfP2) + var t2 = new(gfP2) + var t3 = new(gfP2) + var t4 = new(gfP2) + var t5 = new(gfP2) + var t6 = new(gfP2) + var t7 = new(gfP2) + var t8 = new(gfP2) + var t9 = new(gfP2) + var t10 = new(gfP2) + var t11 = new(gfP2) + var t12 = new(gfP2) + var t13 = new(gfP2) + var t14 = new(gfP2) + var t15 = new(gfP2) + var t16 = new(gfP2) + var t17 = new(gfP2) + var t18 = new(gfP2) + var t19 = new(gfP2) + var t20 = new(gfP2) + + t3.Square(x) + t16.Mul(x, t3) + t10.Mul(x, t16) + t1.Mul(x, t10) + z.Mul(t16, t1) + t19.Mul(x, z) + t13.Mul(t3, t19) + t12.Mul(t3, t13) + t18.Mul(z, t12) + t4.Mul(t10, t18) + z.Mul(t10, t4) + t15.Mul(t3, z) + t17.Mul(t10, t15) + t7.Mul(t3, t17) + t14.Mul(t10, t7) + t0.Mul(t3, t14) + t9.Mul(t3, t0) + t8.Mul(t3, t9) + t5.Mul(t10, t8) + t2.Mul(t3, t5) + t6.Mul(t3, t2) + t10.Mul(t10, t6) + t11.Mul(t3, t10) + t3.Mul(t3, t11) + t20.Mul(z, t11) + for s := 0; s < 3; s++ { + t20.Square(t20) + } + t19.Mul(t19, t20) + for s := 0; s < 33; s++ { + t19.Square(t19) + } + t18.Mul(t18, t19) + for s := 0; s < 8; s++ { + t18.Square(t18) + } + t18.Mul(z, t18) + for s := 0; s < 8; s++ { + t18.Square(t18) + } + t18.Mul(t6, t18) + for s := 0; s < 6; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 6; s++ { + t17.Square(t17) + } + t17.Mul(t0, t17) + for s := 0; s < 12; s++ { + t17.Square(t17) + } + t17.Mul(z, t17) + for s := 0; s < 7; s++ { + t17.Square(t17) + } + t17.Mul(t9, t17) + for s := 0; s < 8; s++ { + t17.Square(t17) + } + t17.Mul(t3, t17) + for s := 0; s < 2; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + for s := 0; s < 5; s++ { + t16.Square(t16) + } + t16.Mul(t13, t16) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t16.Mul(t10, t16) + for s := 0; s < 8; s++ { + t16.Square(t16) + } + t16.Mul(z, t16) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t16.Mul(t14, t16) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t15.Mul(t15, t16) + for s := 0; s < 8; s++ { + t15.Square(t15) + } + t14.Mul(t14, t15) + for s := 0; s < 6; s++ { + t14.Square(t14) + } + t14.Mul(t12, t14) + for s := 0; s < 6; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 7; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 7; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 6; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 6; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 6; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 7; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 6; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 6; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 8; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 9; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 6; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 9; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 5; s++ { + t0.Square(t0) + } + z.Mul(z, t0) + return e.Set(z) +} + +func (e *gfP2) expPMinus1Over2(x *gfP2) *gfP2 { + // The sequence of 53 multiplications and 250 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _110 = _10 + _100 + // _1010 = _100 + _110 + // _1011 = 1 + _1010 + // _1101 = _10 + _1011 + // _10000 = _110 + _1010 + // _10101 = _1010 + _1011 + // _11011 = _110 + _10101 + // _11101 = _10 + _11011 + // _11111 = _10 + _11101 + // _101001 = _1010 + _11111 + // _101011 = _10 + _101001 + // _111011 = _10000 + _101011 + // _1000101 = _1010 + _111011 + // _1001111 = _1010 + _1000101 + // _1010001 = _10 + _1001111 + // _1011011 = _1010 + _1010001 + // _1011101 = _10 + _1011011 + // _1011111 = _10 + _1011101 + // _1100011 = _100 + _1011111 + // _1101001 = _110 + _1100011 + // _1101101 = _100 + _1101001 + // _1101111 = _10 + _1101101 + // _1110101 = _110 + _1101111 + // i71 = ((_1011011 << 3 + 1) << 33 + _10101) << 8 + // i93 = ((_11101 + i71) << 9 + _1101111) << 10 + _1110101 + // i115 = ((2*i93 + 1) << 14 + _1110101) << 5 + // i128 = 2*((_1101 + i115) << 9 + _1110101) + _10101 + // i152 = ((i128 << 5 + _1011) << 9 + _111011) << 8 + // i173 = ((_11101 + i152) << 9 + _101001) << 9 + _11111 + // i200 = ((i173 << 8 + _101001) << 9 + _1101001) << 8 + // i219 = ((_1100011 + i200) << 8 + _1001111) << 8 + _1011101 + // i243 = ((i219 << 7 + _1101101) << 7 + _1011111) << 8 + // i259 = ((_101011 + i243) << 6 + _11111) << 7 + _11011 + // i285 = ((i259 << 9 + _1001111) << 7 + _1100011) << 8 + // i302 = ((_1010001 + i285) << 8 + _1000101) << 6 + _11111 + // return 2*i302 + // + var z = new(gfP2).Set(e) + var t0 = new(gfP2) + var t1 = new(gfP2) + var t2 = new(gfP2) + var t3 = new(gfP2) + var t4 = new(gfP2) + var t5 = new(gfP2) + var t6 = new(gfP2) + var t7 = new(gfP2) + var t8 = new(gfP2) + var t9 = new(gfP2) + var t10 = new(gfP2) + var t11 = new(gfP2) + var t12 = new(gfP2) + var t13 = new(gfP2) + var t14 = new(gfP2) + var t15 = new(gfP2) + var t16 = new(gfP2) + var t17 = new(gfP2) + var t18 = new(gfP2) + + t17.Square(x) + t7.Square(t17) + t15.Mul(t17, t7) + t2.Mul(t7, t15) + t13.Mul(x, t2) + t16.Mul(t17, t13) + t0.Mul(t15, t2) + t14.Mul(t2, t13) + t4.Mul(t15, t14) + t11.Mul(t17, t4) + z.Mul(t17, t11) + t10.Mul(t2, z) + t5.Mul(t17, t10) + t12.Mul(t0, t5) + t0.Mul(t2, t12) + t3.Mul(t2, t0) + t1.Mul(t17, t3) + t18.Mul(t2, t1) + t8.Mul(t17, t18) + t6.Mul(t17, t8) + t2.Mul(t7, t6) + t9.Mul(t15, t2) + t7.Mul(t7, t9) + t17.Mul(t17, t7) + t15.Mul(t15, t17) + for s := 0; s < 3; s++ { + t18.Square(t18) + } + t18.Mul(x, t18) + for s := 0; s < 33; s++ { + t18.Square(t18) + } + t18.Mul(t14, t18) + for s := 0; s < 8; s++ { + t18.Square(t18) + } + t18.Mul(t11, t18) + for s := 0; s < 9; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 10; s++ { + t17.Square(t17) + } + t17.Mul(t15, t17) + t17.Square(t17) + t17.Mul(x, t17) + for s := 0; s < 14; s++ { + t17.Square(t17) + } + t17.Mul(t15, t17) + for s := 0; s < 5; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t15.Mul(t15, t16) + t15.Square(t15) + t14.Mul(t14, t15) + for s := 0; s < 5; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 9; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t11.Mul(t10, t11) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t11.Mul(z, t11) + for s := 0; s < 8; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 9; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t9.Mul(t2, t9) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t9.Mul(t3, t9) + for s := 0; s < 8; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 7; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 7; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 8; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 6; s++ { + t5.Square(t5) + } + t5.Mul(z, t5) + for s := 0; s < 7; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 9; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 7; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 8; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 8; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 6; s++ { + t0.Square(t0) + } + z.Mul(z, t0) + z.Square(z) + return e.Set(z) +} diff --git a/sm9/bn256/gfp_invert.go b/sm9/bn256/gfp_invert.go deleted file mode 100644 index 78ff3a8..0000000 --- a/sm9/bn256/gfp_invert.go +++ /dev/null @@ -1,221 +0,0 @@ -// Code generated by addchain. DO NOT EDIT. -package bn256 - -// Invert sets e = 1/x, and returns e. -// -// If x == 0, Invert returns e = 0. -func (e *gfP) Invert(x *gfP) *gfP { - // Inversion is implemented as exponentiation with exponent p − 2. - // The sequence of 56 multiplications and 250 squarings is derived from the - // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. - // - // _10 = 2*1 - // _100 = 2*_10 - // _110 = _10 + _100 - // _1010 = _100 + _110 - // _1011 = 1 + _1010 - // _1101 = _10 + _1011 - // _10000 = _110 + _1010 - // _10101 = _1010 + _1011 - // _11011 = _110 + _10101 - // _11101 = _10 + _11011 - // _11111 = _10 + _11101 - // _101001 = _1010 + _11111 - // _101011 = _10 + _101001 - // _111011 = _10000 + _101011 - // _1000101 = _1010 + _111011 - // _1001111 = _1010 + _1000101 - // _1010001 = _10 + _1001111 - // _1011011 = _1010 + _1010001 - // _1011101 = _10 + _1011011 - // _1011111 = _10 + _1011101 - // _1100011 = _100 + _1011111 - // _1101001 = _110 + _1100011 - // _1101101 = _100 + _1101001 - // _1101111 = _10 + _1101101 - // _1110101 = _110 + _1101111 - // _1111011 = _110 + _1110101 - // _10110110 = _111011 + _1111011 - // i72 = ((_10110110 << 2 + 1) << 33 + _10101) << 8 - // i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101 - // i116 = ((2*i94 + 1) << 14 + _1110101) << 5 - // i129 = 2*((_1101 + i116) << 9 + _1111011 + _100) - // i146 = ((1 + i129) << 5 + _1011) << 9 + _111011 - // i174 = ((i146 << 8 + _11101) << 9 + _101001) << 9 - // i194 = ((_11111 + i174) << 8 + _101001) << 9 + _1101001 - // i220 = ((i194 << 8 + _1100011) << 8 + _1001111) << 8 - // i237 = ((_1011101 + i220) << 7 + _1101101) << 7 + _1011111 - // i260 = ((i237 << 8 + _101011) << 6 + _11111) << 7 - // i279 = ((_11011 + i260) << 9 + _1001111) << 7 + _1100011 - // i305 = ((i279 << 8 + _1010001) << 8 + _1000101) << 8 - // return _1111011 + i305 - // - var z = new(gfP).Set(e) - var t0 = new(gfP) - var t1 = new(gfP) - var t2 = new(gfP) - var t3 = new(gfP) - var t4 = new(gfP) - var t5 = new(gfP) - var t6 = new(gfP) - var t7 = new(gfP) - var t8 = new(gfP) - var t9 = new(gfP) - var t10 = new(gfP) - var t11 = new(gfP) - var t12 = new(gfP) - var t13 = new(gfP) - var t14 = new(gfP) - var t15 = new(gfP) - var t16 = new(gfP) - var t17 = new(gfP) - var t18 = new(gfP) - var t19 = new(gfP) - var t20 = new(gfP) - - t17.Square(x) - t15.Square(t17) - z.Mul(t17, t15) - t2.Mul(t15, z) - t14.Mul(x, t2) - t16.Mul(t17, t14) - t0.Mul(z, t2) - t19.Mul(t2, t14) - t4.Mul(z, t19) - t12.Mul(t17, t4) - t5.Mul(t17, t12) - t11.Mul(t2, t5) - t6.Mul(t17, t11) - t13.Mul(t0, t6) - t0.Mul(t2, t13) - t3.Mul(t2, t0) - t1.Mul(t17, t3) - t2.Mul(t2, t1) - t9.Mul(t17, t2) - t7.Mul(t17, t9) - t2.Mul(t15, t7) - t10.Mul(z, t2) - t8.Mul(t15, t10) - t18.Mul(t17, t8) - t17.Mul(z, t18) - z.Mul(z, t17) - t20.Mul(t13, z) - for s := 0; s < 2; s++ { - t20.Square(t20) - } - t20.Mul(x, t20) - for s := 0; s < 33; s++ { - t20.Square(t20) - } - t19.Mul(t19, t20) - for s := 0; s < 8; s++ { - t19.Square(t19) - } - t19.Mul(t12, t19) - for s := 0; s < 9; s++ { - t19.Square(t19) - } - t18.Mul(t18, t19) - for s := 0; s < 10; s++ { - t18.Square(t18) - } - t18.Mul(t17, t18) - t18.Square(t18) - t18.Mul(x, t18) - for s := 0; s < 14; s++ { - t18.Square(t18) - } - t17.Mul(t17, t18) - for s := 0; s < 5; s++ { - t17.Square(t17) - } - t16.Mul(t16, t17) - for s := 0; s < 9; s++ { - t16.Square(t16) - } - t16.Mul(z, t16) - t15.Mul(t15, t16) - t15.Square(t15) - t15.Mul(x, t15) - for s := 0; s < 5; s++ { - t15.Square(t15) - } - t14.Mul(t14, t15) - for s := 0; s < 9; s++ { - t14.Square(t14) - } - t13.Mul(t13, t14) - for s := 0; s < 8; s++ { - t13.Square(t13) - } - t12.Mul(t12, t13) - for s := 0; s < 9; s++ { - t12.Square(t12) - } - t12.Mul(t11, t12) - for s := 0; s < 9; s++ { - t12.Square(t12) - } - t12.Mul(t5, t12) - for s := 0; s < 8; s++ { - t12.Square(t12) - } - t11.Mul(t11, t12) - for s := 0; s < 9; s++ { - t11.Square(t11) - } - t10.Mul(t10, t11) - for s := 0; s < 8; s++ { - t10.Square(t10) - } - t10.Mul(t2, t10) - for s := 0; s < 8; s++ { - t10.Square(t10) - } - t10.Mul(t3, t10) - for s := 0; s < 8; s++ { - t10.Square(t10) - } - t9.Mul(t9, t10) - for s := 0; s < 7; s++ { - t9.Square(t9) - } - t8.Mul(t8, t9) - for s := 0; s < 7; s++ { - t8.Square(t8) - } - t7.Mul(t7, t8) - for s := 0; s < 8; s++ { - t7.Square(t7) - } - t6.Mul(t6, t7) - for s := 0; s < 6; s++ { - t6.Square(t6) - } - t5.Mul(t5, t6) - for s := 0; s < 7; s++ { - t5.Square(t5) - } - t4.Mul(t4, t5) - for s := 0; s < 9; s++ { - t4.Square(t4) - } - t3.Mul(t3, t4) - for s := 0; s < 7; s++ { - t3.Square(t3) - } - t2.Mul(t2, t3) - for s := 0; s < 8; s++ { - t2.Square(t2) - } - t1.Mul(t1, t2) - for s := 0; s < 8; s++ { - t1.Square(t1) - } - t0.Mul(t0, t1) - for s := 0; s < 8; s++ { - t0.Square(t0) - } - z.Mul(z, t0) - return e.Set(z) -} diff --git a/sm9/bn256/gfp_sqrt.go b/sm9/bn256/gfp_invert_sqrt.go similarity index 52% rename from sm9/bn256/gfp_sqrt.go rename to sm9/bn256/gfp_invert_sqrt.go index d22800c..6850b72 100644 --- a/sm9/bn256/gfp_sqrt.go +++ b/sm9/bn256/gfp_invert_sqrt.go @@ -1,6 +1,225 @@ // Code generated by addchain. DO NOT EDIT. package bn256 +// Invert sets e = 1/x, and returns e. +// +// If x == 0, Invert returns e = 0. +func (e *gfP) Invert(x *gfP) *gfP { + // Inversion is implemented as exponentiation with exponent p − 2. + // The sequence of 56 multiplications and 250 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _110 = _10 + _100 + // _1010 = _100 + _110 + // _1011 = 1 + _1010 + // _1101 = _10 + _1011 + // _10000 = _110 + _1010 + // _10101 = _1010 + _1011 + // _11011 = _110 + _10101 + // _11101 = _10 + _11011 + // _11111 = _10 + _11101 + // _101001 = _1010 + _11111 + // _101011 = _10 + _101001 + // _111011 = _10000 + _101011 + // _1000101 = _1010 + _111011 + // _1001111 = _1010 + _1000101 + // _1010001 = _10 + _1001111 + // _1011011 = _1010 + _1010001 + // _1011101 = _10 + _1011011 + // _1011111 = _10 + _1011101 + // _1100011 = _100 + _1011111 + // _1101001 = _110 + _1100011 + // _1101101 = _100 + _1101001 + // _1101111 = _10 + _1101101 + // _1110101 = _110 + _1101111 + // _1111011 = _110 + _1110101 + // _10110110 = _111011 + _1111011 + // i72 = ((_10110110 << 2 + 1) << 33 + _10101) << 8 + // i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101 + // i116 = ((2*i94 + 1) << 14 + _1110101) << 5 + // i129 = 2*((_1101 + i116) << 9 + _1111011 + _100) + // i146 = ((1 + i129) << 5 + _1011) << 9 + _111011 + // i174 = ((i146 << 8 + _11101) << 9 + _101001) << 9 + // i194 = ((_11111 + i174) << 8 + _101001) << 9 + _1101001 + // i220 = ((i194 << 8 + _1100011) << 8 + _1001111) << 8 + // i237 = ((_1011101 + i220) << 7 + _1101101) << 7 + _1011111 + // i260 = ((i237 << 8 + _101011) << 6 + _11111) << 7 + // i279 = ((_11011 + i260) << 9 + _1001111) << 7 + _1100011 + // i305 = ((i279 << 8 + _1010001) << 8 + _1000101) << 8 + // return _1111011 + i305 + // + var z = new(gfP).Set(e) + var t0 = new(gfP) + var t1 = new(gfP) + var t2 = new(gfP) + var t3 = new(gfP) + var t4 = new(gfP) + var t5 = new(gfP) + var t6 = new(gfP) + var t7 = new(gfP) + var t8 = new(gfP) + var t9 = new(gfP) + var t10 = new(gfP) + var t11 = new(gfP) + var t12 = new(gfP) + var t13 = new(gfP) + var t14 = new(gfP) + var t15 = new(gfP) + var t16 = new(gfP) + var t17 = new(gfP) + var t18 = new(gfP) + var t19 = new(gfP) + var t20 = new(gfP) + + t17.Square(x) + t15.Square(t17) + z.Mul(t17, t15) + t2.Mul(t15, z) + t14.Mul(x, t2) + t16.Mul(t17, t14) + t0.Mul(z, t2) + t19.Mul(t2, t14) + t4.Mul(z, t19) + t12.Mul(t17, t4) + t5.Mul(t17, t12) + t11.Mul(t2, t5) + t6.Mul(t17, t11) + t13.Mul(t0, t6) + t0.Mul(t2, t13) + t3.Mul(t2, t0) + t1.Mul(t17, t3) + t2.Mul(t2, t1) + t9.Mul(t17, t2) + t7.Mul(t17, t9) + t2.Mul(t15, t7) + t10.Mul(z, t2) + t8.Mul(t15, t10) + t18.Mul(t17, t8) + t17.Mul(z, t18) + z.Mul(z, t17) + t20.Mul(t13, z) + for s := 0; s < 2; s++ { + t20.Square(t20) + } + t20.Mul(x, t20) + for s := 0; s < 33; s++ { + t20.Square(t20) + } + t19.Mul(t19, t20) + for s := 0; s < 8; s++ { + t19.Square(t19) + } + t19.Mul(t12, t19) + for s := 0; s < 9; s++ { + t19.Square(t19) + } + t18.Mul(t18, t19) + for s := 0; s < 10; s++ { + t18.Square(t18) + } + t18.Mul(t17, t18) + t18.Square(t18) + t18.Mul(x, t18) + for s := 0; s < 14; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 5; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t16.Mul(z, t16) + t15.Mul(t15, t16) + t15.Square(t15) + t15.Mul(x, t15) + for s := 0; s < 5; s++ { + t15.Square(t15) + } + t14.Mul(t14, t15) + for s := 0; s < 9; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 8; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t11, t12) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t5, t12) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t2, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t3, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 7; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 7; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 8; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 6; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 7; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 9; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 7; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 8; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 8; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 8; s++ { + t0.Square(t0) + } + z.Mul(z, t0) + return e.Set(z) +} + // Sqrt sets e to a square root of x. If x is not a square, Sqrt returns // false and e is unchanged. e and x can overlap. func Sqrt(e, x *gfP) (isSquare bool) { diff --git a/sm9/generate.go b/sm9/generate.go deleted file mode 100644 index 212beeb..0000000 --- a/sm9/generate.go +++ /dev/null @@ -1,118 +0,0 @@ -//go:build ignore -// +build ignore - -package main - -import ( - "bytes" - "go/format" - "io" - "log" - "os" - "os/exec" -) - -// Running this generator requires addchain v0.4.0, which can be installed with -// -// go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0 -// - -func main() { - tmplAddchainFile, err := os.CreateTemp("", "addchain-template") - if err != nil { - log.Fatal(err) - } - defer os.Remove(tmplAddchainFile.Name()) - if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil { - log.Fatal(err) - } - if err := tmplAddchainFile.Close(); err != nil { - log.Fatal(err) - } - log.Printf("Generating gfp_sqrt.go...") - f, err := os.CreateTemp("", "addchain-gfp") - if err != nil { - log.Fatal(err) - } - defer os.Remove(f.Name()) - cmd := exec.Command("addchain", "search", "0x16c80000005474de3ac07569feb1d8e8a43e5269634f5ddb7cadf364fc6a28af") - cmd.Stderr = os.Stderr - cmd.Stdout = f - if err := cmd.Run(); err != nil { - log.Fatal(err) - } - if err := f.Close(); err != nil { - log.Fatal(err) - } - cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name()) - cmd.Stderr = os.Stderr - out, err := cmd.Output() - if err != nil { - log.Fatal(err) - } - out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1) - out, err = format.Source(out) - if err != nil { - log.Fatal(err) - } - if err := os.WriteFile("gfp_sqrt.go", out, 0644); err != nil { - log.Fatal(err) - } -} - -const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. -package bn256 - -// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns -// false and e is unchanged. e and x can overlap. -func Sqrt(e, x *Element) (isSquare bool) { - candidate, b, i := &gfP{}, &gfP{}, &gfP{} - sqrtCandidate(candidate, x) - gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1 - gfpMul(candidate, x, b) // a1=fb - gfpMul(i, two, candidate) // i=2(fb) - gfpMul(i, i, b) // i=2(fb)b - gfpSub(i, i, one) // i=2(fb)b-1 - gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1) - square := new(Element).Square(i) - if square.Equal(x) != 1 { - return false - } - e.Set(i) - return true -} - -// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap. -func sqrtCandidate(z, x *Element) { - // Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate. - // - // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the - // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. - // - {{- range lines (format .Script) }} - // {{ . }} - {{- end }} - // - {{- range .Program.Temporaries }} - var {{ . }} = new(Element) - {{- end }} - {{ range $i := .Program.Instructions -}} - {{- with add $i.Op }} - {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) - {{- end -}} - {{- with double $i.Op }} - {{ $i.Output }}.Square({{ .X }}) - {{- end -}} - {{- with shift $i.Op -}} - {{- $first := 0 -}} - {{- if ne $i.Output.Identifier .X.Identifier }} - {{ $i.Output }}.Square({{ .X }}) - {{- $first = 1 -}} - {{- end }} - for s := {{ $first }}; s < {{ .S }}; s++ { - {{ $i.Output }}.Square({{ $i.Output }}) - } - {{- end -}} - {{- end }} -} -`