diff --git a/sm9/bn256/gfp2_g1_arm64.go b/sm9/bn256/gfp2_g1_arm64.go index 8fe0640..f92c169 100644 --- a/sm9/bn256/gfp2_g1_arm64.go +++ b/sm9/bn256/gfp2_g1_arm64.go @@ -23,35 +23,10 @@ func gfp2Square(c, a *gfP2) //go:noescape func gfp2SquareU(c, a *gfP2) -func curvePointDoubleComplete(c, p *curvePoint) { - // Complete addition formula for a = 0 from "Complete addition formulas for - // prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §3.2. - // Algorithm 9: Exception-free point doubling for prime order j-invariant 0 short Weierstrass curves. - t0, t1, t2 := new(gfP), new(gfP), new(gfP) - x3, y3, z3 := new(gfP), new(gfP), new(gfP) - - gfpSqr(t0, &p.y, 1) // t0 := Y^2 - gfpDouble(z3, t0) // Z3 := t0 + t0 - gfpDouble(z3, z3) // Z3 := Z3 + Z3 - gfpDouble(z3, z3) // Z3 := Z3 + Z3 - gfpMul(t1, &p.y, &p.z) // t1 := YZ - gfpSqr(t2, &p.z, 1) // t2 := Z^2 - gfpMul(t2, threeCurveB, t2) // t2 := 3b * t2 = 3bZ^2 - gfpMul(x3, t2, z3) // X3 := t2 * Z3 - gfpAdd(y3, t0, t2) // Y3 := t0 + t2 - gfpMul(z3, t1, z3) // Z3 := t1 * Z3 - gfpTriple(t2, t2) // t2 := t2 + t2 + t2 - gfpSub(t0, t0, t2) // t0 := t0 - t2 - gfpMul(y3, t0, y3) // Y3 := t0 * Y3 - gfpAdd(y3, x3, y3) // Y3 := X3 + Y3 - gfpMul(t1, &p.x, &p.y) // t1 := XY - gfpMul(x3, t0, t1) // X3 := t0 * t1 - gfpDouble(x3, x3) // X3 := X3 + X3 - - c.x.Set(x3) - c.y.Set(y3) - c.z.Set(z3) -} +// Point doubling. Sets res = in + in. in can be the point at infinity. +// +//go:noescape +func curvePointDoubleComplete(c, a *curvePoint) func curvePointAddComplete(c, p1, p2 *curvePoint) { // Complete addition formula for a = 0 from "Complete addition formulas for diff --git a/sm9/bn256/gfp2_g1_arm64.s b/sm9/bn256/gfp2_g1_arm64.s index 23b7f54..115bd8c 100644 --- a/sm9/bn256/gfp2_g1_arm64.s +++ b/sm9/bn256/gfp2_g1_arm64.s @@ -676,3 +676,83 @@ TEXT ·gfp2SquareU(SB),NOSPLIT,$72-16 STx (y2in) RET + +/* ---------------------------------------*/ +#undef tmp2 +#define x3t(off) (32*2 + 8 + off)(RSP) +#define y3t(off) (32*3 + 8 + off)(RSP) +#define z3t(off) (32*4 + 8 + off)(RSP) + +// func curvePointDoubleComplete(c, a *curvePoint) +TEXT ·curvePointDoubleComplete(SB),NOSPLIT,$168-16 + MOVD res+0(FP), b_ptr + MOVD in1+8(FP), a_ptr + + MOVD ·np+0x00(SB), hlp1 + LDP ·p2+0x00(SB), (const0, const1) + LDP ·p2+0x10(SB), (const2, const3) + + LDx (y1in) + CALL gfpSqrInternal(SB) // t0 := Y^2 + STy (tmp0) + + gfpMulBy2Inline // Z3 := t0 + t0 + x2y + gfpMulBy2Inline // Z3 := Z3 + Z3 + x2y + gfpMulBy2Inline // Z3 := Z3 + Z3 + STx (z3t) + + LDx (z1in) + CALL gfpSqrInternal(SB) // t2 := Z^2 + STy (tmp1) + gfpMulBy2Inline + x2y + gfpMulBy2Inline + x2y + gfpMulBy2Inline + x2y + gfpMulBy2Inline + x2y + LDx (tmp1) + CALL gfpSubInternal(SB) // t2 := 3b * t2 = 3bZ^2 + STx (tmp1) + LDy (z3t) + CALL gfpMulInternal(SB) // X3 := t2 * Z3 + STy (x3t) + + LDx (tmp0) + LDy (tmp1) + gfpAddInline // Y3 := t0 + t2 + STx (y3t) + gfpMulBy2Inline + gfpAddInline // t2 := t2 + t2 + t2 + STx (tmp1) + LDy (tmp0) + CALL gfpSubInternal(SB) // t0 := t0 - t2 + LDy (y3t) + CALL gfpMulInternal(SB) // Y3 := t0 * Y3 + LDx (x3t) + gfpAddInline // Y3 := X3 + Y3 + STx (y2in) + + LDx (y1in) + LDy (z1in) + CALL gfpMulInternal(SB) // t1 := YZ + LDx (z3t) + CALL gfpMulInternal(SB) // Z3 := t1 * Z3 + STy (z2in) + + LDx (x1in) + LDy (y1in) + CALL gfpMulInternal(SB) // t1 := XY + LDx (tmp0) + CALL gfpMulInternal(SB) // X3 := t0 * t1 + gfpMulBy2Inline // X3 := X3 + X3 + STx (x2in) + + RET + +#undef x3t +#undef y3t +#undef z3t