From f58cb10ad9c945965e0874a76e06280d7b03fa0e Mon Sep 17 00:00:00 2001 From: emmansun Date: Sat, 29 Apr 2023 10:30:57 +0800 Subject: [PATCH] sm9: improve gfP invert & sqrt performance --- sm9/bn256/generate.go | 101 +++++++++++++++++ sm9/bn256/gfp.go | 24 ++++- sm9/bn256/gfp_invert.go | 221 ++++++++++++++++++++++++++++++++++++++ sm9/bn256/gfp_sqrt.go | 232 ++++++++++++++++++++++++++++++++++++++++ sm9/bn256/gfp_test.go | 27 +++++ sm9/generate.go | 118 ++++++++++++++++++++ 6 files changed, 719 insertions(+), 4 deletions(-) create mode 100644 sm9/bn256/generate.go create mode 100644 sm9/bn256/gfp_invert.go create mode 100644 sm9/bn256/gfp_sqrt.go create mode 100644 sm9/generate.go diff --git a/sm9/bn256/generate.go b/sm9/bn256/generate.go new file mode 100644 index 0000000..ab46952 --- /dev/null +++ b/sm9/bn256/generate.go @@ -0,0 +1,101 @@ +//go:build ignore +// +build ignore + +package main + +import ( + "bytes" + "go/format" + "io" + "log" + "os" + "os/exec" +) + +// Running this generator requires addchain v0.4.0, which can be installed with +// +// go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0 +// + +func main() { + tmplAddchainFile, err := os.CreateTemp("", "addchain-template") + if err != nil { + log.Fatal(err) + } + defer os.Remove(tmplAddchainFile.Name()) + if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil { + log.Fatal(err) + } + if err := tmplAddchainFile.Close(); err != nil { + log.Fatal(err) + } + log.Printf("Generating gfp_invert.go...") + f, err := os.CreateTemp("", "addchain-gfp") + if err != nil { + log.Fatal(err) + } + defer os.Remove(f.Name()) + cmd := exec.Command("addchain", "search", "0xb640000002a3a6f1d603ab4ff58ec74521f2934b1a7aeedbe56f9b27e351457b") + cmd.Stderr = os.Stderr + cmd.Stdout = f + if err := cmd.Run(); err != nil { + log.Fatal(err) + } + if err := f.Close(); err != nil { + log.Fatal(err) + } + cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name()) + cmd.Stderr = os.Stderr + out, err := cmd.Output() + if err != nil { + log.Fatal(err) + } + out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1) + out, err = format.Source(out) + if err != nil { + log.Fatal(err) + } + if err := os.WriteFile("gfp_invert.go", out, 0644); err != nil { + log.Fatal(err) + } +} + +const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. +package bn256 +// Invert sets e = 1/x, and returns e. +// +// If x == 0, Invert returns e = 0. +func (e *Element) Invert(x *Element) *Element { + // Inversion is implemented as exponentiation with exponent p − 2. + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + var z = new(Element).Set(e) + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} + return e.Set(z) +} +` diff --git a/sm9/bn256/gfp.go b/sm9/bn256/gfp.go index e733de6..b4d4726 100644 --- a/sm9/bn256/gfp.go +++ b/sm9/bn256/gfp.go @@ -59,11 +59,13 @@ func (e *gfP) String() string { return fmt.Sprintf("%16.16x%16.16x%16.16x%16.16x", e[3], e[2], e[1], e[0]) } -func (e *gfP) Set(f *gfP) { +func (e *gfP) Set(f *gfP) *gfP { e[0] = f[0] e[1] = f[1] e[2] = f[2] e[3] = f[3] + + return e } func (e *gfP) exp(f *gfP, bits [4]uint64) { @@ -84,8 +86,22 @@ func (e *gfP) exp(f *gfP, bits [4]uint64) { e.Set(sum) } -func (e *gfP) Invert(f *gfP) { - e.exp(f, pMinus2) +func (e *gfP) Mul(a, b *gfP) *gfP { + gfpMul(e, a, b) + return e +} + +func (e *gfP) Square(a *gfP) *gfP { + gfpMul(e, a, a) + return e +} + +// Equal returns 1 if e == t, and zero otherwise. +func (e *gfP) Equal(t *gfP) int { + if *e == *t { + return 1 + } + return 0 } func (e *gfP) Sqrt(f *gfP) { @@ -95,7 +111,7 @@ func (e *gfP) Sqrt(f *gfP) { // https://eprint.iacr.org/2012/685.pdf // a1, b, i := &gfP{}, &gfP{}, &gfP{} - a1.exp(f, pMinus5Over8) + sqrtCandidate(a1, f) gfpMul(b, twoExpPMinus5Over8, a1) // b=ta1 gfpMul(a1, f, b) // a1=fb gfpMul(i, two, a1) // i=2(fb) diff --git a/sm9/bn256/gfp_invert.go b/sm9/bn256/gfp_invert.go new file mode 100644 index 0000000..78ff3a8 --- /dev/null +++ b/sm9/bn256/gfp_invert.go @@ -0,0 +1,221 @@ +// Code generated by addchain. DO NOT EDIT. +package bn256 + +// Invert sets e = 1/x, and returns e. +// +// If x == 0, Invert returns e = 0. +func (e *gfP) Invert(x *gfP) *gfP { + // Inversion is implemented as exponentiation with exponent p − 2. + // The sequence of 56 multiplications and 250 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _110 = _10 + _100 + // _1010 = _100 + _110 + // _1011 = 1 + _1010 + // _1101 = _10 + _1011 + // _10000 = _110 + _1010 + // _10101 = _1010 + _1011 + // _11011 = _110 + _10101 + // _11101 = _10 + _11011 + // _11111 = _10 + _11101 + // _101001 = _1010 + _11111 + // _101011 = _10 + _101001 + // _111011 = _10000 + _101011 + // _1000101 = _1010 + _111011 + // _1001111 = _1010 + _1000101 + // _1010001 = _10 + _1001111 + // _1011011 = _1010 + _1010001 + // _1011101 = _10 + _1011011 + // _1011111 = _10 + _1011101 + // _1100011 = _100 + _1011111 + // _1101001 = _110 + _1100011 + // _1101101 = _100 + _1101001 + // _1101111 = _10 + _1101101 + // _1110101 = _110 + _1101111 + // _1111011 = _110 + _1110101 + // _10110110 = _111011 + _1111011 + // i72 = ((_10110110 << 2 + 1) << 33 + _10101) << 8 + // i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101 + // i116 = ((2*i94 + 1) << 14 + _1110101) << 5 + // i129 = 2*((_1101 + i116) << 9 + _1111011 + _100) + // i146 = ((1 + i129) << 5 + _1011) << 9 + _111011 + // i174 = ((i146 << 8 + _11101) << 9 + _101001) << 9 + // i194 = ((_11111 + i174) << 8 + _101001) << 9 + _1101001 + // i220 = ((i194 << 8 + _1100011) << 8 + _1001111) << 8 + // i237 = ((_1011101 + i220) << 7 + _1101101) << 7 + _1011111 + // i260 = ((i237 << 8 + _101011) << 6 + _11111) << 7 + // i279 = ((_11011 + i260) << 9 + _1001111) << 7 + _1100011 + // i305 = ((i279 << 8 + _1010001) << 8 + _1000101) << 8 + // return _1111011 + i305 + // + var z = new(gfP).Set(e) + var t0 = new(gfP) + var t1 = new(gfP) + var t2 = new(gfP) + var t3 = new(gfP) + var t4 = new(gfP) + var t5 = new(gfP) + var t6 = new(gfP) + var t7 = new(gfP) + var t8 = new(gfP) + var t9 = new(gfP) + var t10 = new(gfP) + var t11 = new(gfP) + var t12 = new(gfP) + var t13 = new(gfP) + var t14 = new(gfP) + var t15 = new(gfP) + var t16 = new(gfP) + var t17 = new(gfP) + var t18 = new(gfP) + var t19 = new(gfP) + var t20 = new(gfP) + + t17.Square(x) + t15.Square(t17) + z.Mul(t17, t15) + t2.Mul(t15, z) + t14.Mul(x, t2) + t16.Mul(t17, t14) + t0.Mul(z, t2) + t19.Mul(t2, t14) + t4.Mul(z, t19) + t12.Mul(t17, t4) + t5.Mul(t17, t12) + t11.Mul(t2, t5) + t6.Mul(t17, t11) + t13.Mul(t0, t6) + t0.Mul(t2, t13) + t3.Mul(t2, t0) + t1.Mul(t17, t3) + t2.Mul(t2, t1) + t9.Mul(t17, t2) + t7.Mul(t17, t9) + t2.Mul(t15, t7) + t10.Mul(z, t2) + t8.Mul(t15, t10) + t18.Mul(t17, t8) + t17.Mul(z, t18) + z.Mul(z, t17) + t20.Mul(t13, z) + for s := 0; s < 2; s++ { + t20.Square(t20) + } + t20.Mul(x, t20) + for s := 0; s < 33; s++ { + t20.Square(t20) + } + t19.Mul(t19, t20) + for s := 0; s < 8; s++ { + t19.Square(t19) + } + t19.Mul(t12, t19) + for s := 0; s < 9; s++ { + t19.Square(t19) + } + t18.Mul(t18, t19) + for s := 0; s < 10; s++ { + t18.Square(t18) + } + t18.Mul(t17, t18) + t18.Square(t18) + t18.Mul(x, t18) + for s := 0; s < 14; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 5; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + for s := 0; s < 9; s++ { + t16.Square(t16) + } + t16.Mul(z, t16) + t15.Mul(t15, t16) + t15.Square(t15) + t15.Mul(x, t15) + for s := 0; s < 5; s++ { + t15.Square(t15) + } + t14.Mul(t14, t15) + for s := 0; s < 9; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 8; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t11, t12) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t5, t12) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t2, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t3, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 7; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 7; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 8; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 6; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 7; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 9; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 7; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 8; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 8; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 8; s++ { + t0.Square(t0) + } + z.Mul(z, t0) + return e.Set(z) +} diff --git a/sm9/bn256/gfp_sqrt.go b/sm9/bn256/gfp_sqrt.go new file mode 100644 index 0000000..d22800c --- /dev/null +++ b/sm9/bn256/gfp_sqrt.go @@ -0,0 +1,232 @@ +// Code generated by addchain. DO NOT EDIT. +package bn256 + +// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns +// false and e is unchanged. e and x can overlap. +func Sqrt(e, x *gfP) (isSquare bool) { + candidate, b, i := &gfP{}, &gfP{}, &gfP{} + sqrtCandidate(candidate, x) + gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1 + gfpMul(candidate, x, b) // a1=fb + gfpMul(i, two, candidate) // i=2(fb) + gfpMul(i, i, b) // i=2(fb)b + gfpSub(i, i, one) // i=2(fb)b-1 + gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1) + square := new(gfP).Square(i) + if square.Equal(x) != 1 { + return false + } + e.Set(i) + return true +} + +// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap. +func sqrtCandidate(z, x *gfP) { + // Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate. + // + // The sequence of 54 multiplications and 248 squarings is derived from the + // following addition chain generated with github.com/mmcloughlin/addchain v0.4.0. + // + // _10 = 2*1 + // _100 = 2*_10 + // _110 = _10 + _100 + // _1010 = _100 + _110 + // _1011 = 1 + _1010 + // _1101 = _10 + _1011 + // _1111 = _10 + _1101 + // _10000 = 1 + _1111 + // _10101 = _110 + _1111 + // _11011 = _110 + _10101 + // _11101 = _10 + _11011 + // _11111 = _10 + _11101 + // _101001 = _1010 + _11111 + // _101011 = _10 + _101001 + // _111011 = _10000 + _101011 + // _1000101 = _1010 + _111011 + // _1001111 = _1010 + _1000101 + // _1010001 = _10 + _1001111 + // _1011011 = _1010 + _1010001 + // _1011101 = _10 + _1011011 + // _1011111 = _10 + _1011101 + // _1100011 = _100 + _1011111 + // _1101001 = _110 + _1100011 + // _1101101 = _100 + _1101001 + // _1101111 = _10 + _1101101 + // _1110101 = _110 + _1101111 + // i72 = ((_1011011 << 3 + 1) << 33 + _10101) << 8 + // i94 = ((_11101 + i72) << 9 + _1101111) << 10 + _1110101 + // i116 = ((2*i94 + 1) << 14 + _1110101) << 5 + // i129 = 2*((_1101 + i116) << 9 + _1110101) + _10101 + // i153 = ((i129 << 5 + _1011) << 9 + _111011) << 8 + // i174 = ((_11101 + i153) << 9 + _101001) << 9 + _11111 + // i201 = ((i174 << 8 + _101001) << 9 + _1101001) << 8 + // i220 = ((_1100011 + i201) << 8 + _1001111) << 8 + _1011101 + // i244 = ((i220 << 7 + _1101101) << 7 + _1011111) << 8 + // i260 = ((_101011 + i244) << 6 + _11111) << 7 + _11011 + // i286 = ((i260 << 9 + _1001111) << 7 + _1100011) << 8 + // return ((_1010001 + i286) << 8 + _1000101) << 5 + _1111 + // + var t0 = new(gfP) + var t1 = new(gfP) + var t2 = new(gfP) + var t3 = new(gfP) + var t4 = new(gfP) + var t5 = new(gfP) + var t6 = new(gfP) + var t7 = new(gfP) + var t8 = new(gfP) + var t9 = new(gfP) + var t10 = new(gfP) + var t11 = new(gfP) + var t12 = new(gfP) + var t13 = new(gfP) + var t14 = new(gfP) + var t15 = new(gfP) + var t16 = new(gfP) + var t17 = new(gfP) + var t18 = new(gfP) + var t19 = new(gfP) + + t18.Square(x) + t8.Square(t18) + t16.Mul(t18, t8) + t2.Mul(t8, t16) + t14.Mul(x, t2) + t17.Mul(t18, t14) + z.Mul(t18, t17) + t0.Mul(x, z) + t15.Mul(t16, z) + t4.Mul(t16, t15) + t12.Mul(t18, t4) + t5.Mul(t18, t12) + t11.Mul(t2, t5) + t6.Mul(t18, t11) + t13.Mul(t0, t6) + t0.Mul(t2, t13) + t3.Mul(t2, t0) + t1.Mul(t18, t3) + t19.Mul(t2, t1) + t9.Mul(t18, t19) + t7.Mul(t18, t9) + t2.Mul(t8, t7) + t10.Mul(t16, t2) + t8.Mul(t8, t10) + t18.Mul(t18, t8) + t16.Mul(t16, t18) + for s := 0; s < 3; s++ { + t19.Square(t19) + } + t19.Mul(x, t19) + for s := 0; s < 33; s++ { + t19.Square(t19) + } + t19.Mul(t15, t19) + for s := 0; s < 8; s++ { + t19.Square(t19) + } + t19.Mul(t12, t19) + for s := 0; s < 9; s++ { + t19.Square(t19) + } + t18.Mul(t18, t19) + for s := 0; s < 10; s++ { + t18.Square(t18) + } + t18.Mul(t16, t18) + t18.Square(t18) + t18.Mul(x, t18) + for s := 0; s < 14; s++ { + t18.Square(t18) + } + t18.Mul(t16, t18) + for s := 0; s < 5; s++ { + t18.Square(t18) + } + t17.Mul(t17, t18) + for s := 0; s < 9; s++ { + t17.Square(t17) + } + t16.Mul(t16, t17) + t16.Square(t16) + t15.Mul(t15, t16) + for s := 0; s < 5; s++ { + t15.Square(t15) + } + t14.Mul(t14, t15) + for s := 0; s < 9; s++ { + t14.Square(t14) + } + t13.Mul(t13, t14) + for s := 0; s < 8; s++ { + t13.Square(t13) + } + t12.Mul(t12, t13) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t11, t12) + for s := 0; s < 9; s++ { + t12.Square(t12) + } + t12.Mul(t5, t12) + for s := 0; s < 8; s++ { + t12.Square(t12) + } + t11.Mul(t11, t12) + for s := 0; s < 9; s++ { + t11.Square(t11) + } + t10.Mul(t10, t11) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t2, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t10.Mul(t3, t10) + for s := 0; s < 8; s++ { + t10.Square(t10) + } + t9.Mul(t9, t10) + for s := 0; s < 7; s++ { + t9.Square(t9) + } + t8.Mul(t8, t9) + for s := 0; s < 7; s++ { + t8.Square(t8) + } + t7.Mul(t7, t8) + for s := 0; s < 8; s++ { + t7.Square(t7) + } + t6.Mul(t6, t7) + for s := 0; s < 6; s++ { + t6.Square(t6) + } + t5.Mul(t5, t6) + for s := 0; s < 7; s++ { + t5.Square(t5) + } + t4.Mul(t4, t5) + for s := 0; s < 9; s++ { + t4.Square(t4) + } + t3.Mul(t3, t4) + for s := 0; s < 7; s++ { + t3.Square(t3) + } + t2.Mul(t2, t3) + for s := 0; s < 8; s++ { + t2.Square(t2) + } + t1.Mul(t1, t2) + for s := 0; s < 8; s++ { + t1.Square(t1) + } + t0.Mul(t0, t1) + for s := 0; s < 5; s++ { + t0.Square(t0) + } + z.Mul(z, t0) +} diff --git a/sm9/bn256/gfp_test.go b/sm9/bn256/gfp_test.go index 449e915..963c9aa 100644 --- a/sm9/bn256/gfp_test.go +++ b/sm9/bn256/gfp_test.go @@ -103,6 +103,33 @@ func TestSqrt(t *testing.T) { } } +func TestGeneratedSqrt(t *testing.T) { + tests := []string{ + "9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596", + "92fe90b700fbd4d8cc177d300ed16e4e15471a681b2c9e3728c1b82c885e49c2", + } + for i, test := range tests { + y2 := bigFromHex(test) + y21 := new(big.Int).ModSqrt(y2, p) + + y3 := new(big.Int).Mul(y21, y21) + y3.Mod(y3, p) + if y2.Cmp(y3) != 0 { + t.Error("Invalid sqrt") + } + + tmp := fromBigInt(y2) + e := &gfP{} + Sqrt(e, tmp) + montDecode(e, e) + var res [32]byte + e.Marshal(res[:]) + if hex.EncodeToString(res[:]) != hex.EncodeToString(y21.Bytes()) { + t.Errorf("case %v, got %v, expected %v\n", i, hex.EncodeToString(res[:]), hex.EncodeToString(y21.Bytes())) + } + } +} + func TestInvert(t *testing.T) { x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) xInv := &gfP{} diff --git a/sm9/generate.go b/sm9/generate.go new file mode 100644 index 0000000..212beeb --- /dev/null +++ b/sm9/generate.go @@ -0,0 +1,118 @@ +//go:build ignore +// +build ignore + +package main + +import ( + "bytes" + "go/format" + "io" + "log" + "os" + "os/exec" +) + +// Running this generator requires addchain v0.4.0, which can be installed with +// +// go install github.com/mmcloughlin/addchain/cmd/addchain@v0.4.0 +// + +func main() { + tmplAddchainFile, err := os.CreateTemp("", "addchain-template") + if err != nil { + log.Fatal(err) + } + defer os.Remove(tmplAddchainFile.Name()) + if _, err := io.WriteString(tmplAddchainFile, tmplAddchain); err != nil { + log.Fatal(err) + } + if err := tmplAddchainFile.Close(); err != nil { + log.Fatal(err) + } + log.Printf("Generating gfp_sqrt.go...") + f, err := os.CreateTemp("", "addchain-gfp") + if err != nil { + log.Fatal(err) + } + defer os.Remove(f.Name()) + cmd := exec.Command("addchain", "search", "0x16c80000005474de3ac07569feb1d8e8a43e5269634f5ddb7cadf364fc6a28af") + cmd.Stderr = os.Stderr + cmd.Stdout = f + if err := cmd.Run(); err != nil { + log.Fatal(err) + } + if err := f.Close(); err != nil { + log.Fatal(err) + } + cmd = exec.Command("addchain", "gen", "-tmpl", tmplAddchainFile.Name(), f.Name()) + cmd.Stderr = os.Stderr + out, err := cmd.Output() + if err != nil { + log.Fatal(err) + } + out = bytes.Replace(out, []byte("Element"), []byte("gfP"), -1) + out, err = format.Source(out) + if err != nil { + log.Fatal(err) + } + if err := os.WriteFile("gfp_sqrt.go", out, 0644); err != nil { + log.Fatal(err) + } +} + +const tmplAddchain = `// Code generated by {{ .Meta.Name }}. DO NOT EDIT. +package bn256 + +// Sqrt sets e to a square root of x. If x is not a square, Sqrt returns +// false and e is unchanged. e and x can overlap. +func Sqrt(e, x *Element) (isSquare bool) { + candidate, b, i := &gfP{}, &gfP{}, &gfP{} + sqrtCandidate(candidate, x) + gfpMul(b, twoExpPMinus5Over8, candidate) // b=ta1 + gfpMul(candidate, x, b) // a1=fb + gfpMul(i, two, candidate) // i=2(fb) + gfpMul(i, i, b) // i=2(fb)b + gfpSub(i, i, one) // i=2(fb)b-1 + gfpMul(i, candidate, i) // i=(fb)(2(fb)b-1) + square := new(Element).Square(i) + if square.Equal(x) != 1 { + return false + } + e.Set(i) + return true +} + +// sqrtCandidate sets z to a square root candidate for x. z and x must not overlap. +func sqrtCandidate(z, x *Element) { + // Since p = 8k+5, exponentiation by (p - 5) / 8 yields a square root candidate. + // + // The sequence of {{ .Ops.Adds }} multiplications and {{ .Ops.Doubles }} squarings is derived from the + // following addition chain generated with {{ .Meta.Module }} {{ .Meta.ReleaseTag }}. + // + {{- range lines (format .Script) }} + // {{ . }} + {{- end }} + // + {{- range .Program.Temporaries }} + var {{ . }} = new(Element) + {{- end }} + {{ range $i := .Program.Instructions -}} + {{- with add $i.Op }} + {{ $i.Output }}.Mul({{ .X }}, {{ .Y }}) + {{- end -}} + {{- with double $i.Op }} + {{ $i.Output }}.Square({{ .X }}) + {{- end -}} + {{- with shift $i.Op -}} + {{- $first := 0 -}} + {{- if ne $i.Output.Identifier .X.Identifier }} + {{ $i.Output }}.Square({{ .X }}) + {{- $first = 1 -}} + {{- end }} + for s := {{ $first }}; s < {{ .S }}; s++ { + {{ $i.Output }}.Square({{ $i.Output }}) + } + {{- end -}} + {{- end }} +} +`