sm9/bn256: arm64 complete add

This commit is contained in:
Sun Yimin 2023-07-25 15:02:41 +08:00 committed by GitHub
parent db92a6f60e
commit 0a559b5202
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 167 additions and 76 deletions

View File

@ -1,74 +0,0 @@
//go:build arm64 && !purego
// +build arm64,!purego
package bn256
// gfP2 multiplication.
//
//go:noescape
func gfp2Mul(c, a, b *gfP2)
// gfP2 multiplication. c = a*b*u
//
//go:noescape
func gfp2MulU(c, a, b *gfP2)
// gfP2 square.
//
//go:noescape
func gfp2Square(c, a *gfP2)
// gfP2 square and mult u.
//
//go:noescape
func gfp2SquareU(c, a *gfP2)
// Point doubling. Sets res = in + in. in can be the point at infinity.
//
//go:noescape
func curvePointDoubleComplete(c, a *curvePoint)
func curvePointAddComplete(c, p1, p2 *curvePoint) {
// Complete addition formula for a = 0 from "Complete addition formulas for
// prime order elliptic curves" (https://eprint.iacr.org/2015/1060), §3.2.
// Algorithm 7: Complete, projective point addition for prime order j-invariant 0 short Weierstrass curves.
t0, t1, t2, t3, t4 := new(gfP), new(gfP), new(gfP), new(gfP), new(gfP)
x3, y3, z3 := new(gfP), new(gfP), new(gfP)
gfpMul(t0, &p1.x, &p2.x) // t0 := X1X2
gfpMul(t1, &p1.y, &p2.y) // t1 := Y1Y2
gfpMul(t2, &p1.z, &p2.z) // t2 := Z1Z2
gfpAdd(t3, &p1.x, &p1.y) // t3 := X1 + Y1
gfpAdd(t4, &p2.x, &p2.y) // t4 := X2 + Y2
gfpMul(t3, t3, t4) // t3 := t3 * t4 = (X1 + Y1) * (X2 + Y2)
gfpAdd(t4, t0, t1) // t4 := t0 + t1
gfpSub(t3, t3, t4) // t3 := t3 - t4 = X1Y2 + X2Y1
gfpAdd(t4, &p1.y, &p1.z) // t4 := Y1 + Z1
gfpAdd(x3, &p2.y, &p2.z) // X3 := Y2 + Z2
gfpMul(t4, t4, x3) // t4 := t4 * X3 = (Y1 + Z1)(Y2 + Z2)
gfpAdd(x3, t1, t2) // X3 := t1 + t2
gfpSub(t4, t4, x3) // t4 := t4 - X3 = Y1Z2 + Y2Z1
gfpAdd(x3, &p1.x, &p1.z) // X3 := X1 + Z1
gfpAdd(y3, &p2.x, &p2.z) // Y3 := X2 + Z2
gfpMul(x3, x3, y3) // X3 := X3 * Y3
gfpAdd(y3, t0, t2) // Y3 := t0 + t2
gfpSub(y3, x3, y3) // Y3 := X3 - Y3 = X1Z2 + X2Z1
gfpTriple(t0, t0) // t0 := t0 + t0 + t0 = 3X1X2
gfpMul(t2, threeCurveB, t2) // t2 := 3b * t2 = 3bZ1Z2
gfpAdd(z3, t1, t2) // Z3 := t1 + t2 = Y1Y2 + 3bZ1Z2
gfpSub(t1, t1, t2) // t1 := t1 - t2 = Y1Y2 - 3bZ1Z2
gfpMul(y3, threeCurveB, y3) // Y3 = 3b * Y3 = 3b(X1Z2 + X2Z1)
gfpMul(x3, t4, y3) // X3 := t4 * Y3 = 3b(X1Z2 + X2Z1)(Y1Z2 + Y2Z1)
gfpMul(t2, t3, t1) // t2 := t3 * t1 = (X1Y2 + X2Y1)(Y1Y2 - 3bZ1Z2)
gfpSub(x3, t2, x3) // X3 := t2 - X3 = (X1Y2 + X2Y1)(Y1Y2 - 3bZ1Z2) - 3b(Y1Z2 + Y2Z1)(X1Z2 + X2Z1)
gfpMul(y3, y3, t0) // Y3 := Y3 * t0 = 9bX1X2(X1Z2 + X2Z1)
gfpMul(t1, t1, z3) // t1 := t1 * Z3 = (Y1Y2 + 3bZ1Z2)(Y1Y2 - 3bZ1Z2)
gfpAdd(y3, t1, y3) // Y3 := t1 + Y3 = (Y1Y2 + 3bZ1Z2)(Y1Y2 - 3bZ1Z2) + 9bX1X2(X1Z2 + X2Z1)
gfpMul(t0, t0, t3) // t0 := t0 * t3 = 3X1X2(X1Y2 + X2Y1)
gfpMul(z3, z3, t4) // Z3 := Z3 * t4 = (Y1Z2 + Y2Z1)(Y1Y2 + 3bZ1Z2)
gfpAdd(z3, z3, t0) // Z3 := Z3 + t0 = (Y1Z2 + Y2Z1)(Y1Y2 + 3bZ1Z2) + 3X1X2(X1Y2 + X2Y1)
c.x.Set(x3)
c.y.Set(y3)
c.z.Set(z3)
}

View File

@ -757,6 +757,171 @@ TEXT ·curvePointDoubleComplete(SB),NOSPLIT,$168-16
RET RET
/* ---------------------------------------*/
#undef x3t #undef x3t
#undef y3t #undef y3t
#undef z3t #undef z3t
#define tmp2(off) (32*2 + 8 + off)(RSP)
#define tmp3(off) (32*3 + 8 + off)(RSP)
#define tmp4(off) (32*4 + 8 + off)(RSP)
#define x3t(off) (32*5 + 8 + off)(RSP)
#define y3t(off) (32*6 + 8 + off)(RSP)
#define z3t(off) (32*7 + 8 + off)(RSP)
// func curvePointAddComplete(c, a, b *curvePoint)
TEXT ·curvePointAddComplete(SB),0,$264-24
MOVD in1+8(FP), a_ptr
MOVD in2+16(FP), b_ptr
MOVD ·np+0x00(SB), hlp1
LDP ·p2+0x00(SB), (const0, const1)
LDP ·p2+0x10(SB), (const2, const3)
LDx (x1in)
LDy (x2in)
CALL gfpMulInternal(SB) // t0 := X1X2
STy (tmp0)
LDx (y1in)
LDy (y2in)
CALL gfpMulInternal(SB) // t1 := Y1Y2
STy (tmp1)
LDx (z1in)
LDy (z2in)
CALL gfpMulInternal(SB) // t2 := Z1Z2
STy (tmp2)
LDx (x1in)
LDy (y1in)
gfpAddInline // t3 := X1 + Y1
STx (tmp3)
LDx (x2in)
LDy (y2in)
gfpAddInline // t4 := X2 + Y2
LDy (tmp3)
CALL gfpMulInternal(SB) // t3 := t3 * t4 = (X1 + Y1) * (X2 + Y2)
STy (tmp3)
LDx (tmp0)
LDy (tmp1)
gfpAddInline // t4 := t0 + t1
LDy (tmp3)
CALL gfpSubInternal(SB) // t3 := t3 - t4 = X1Y2 + X2Y1
STx (tmp3)
LDx (y1in)
LDy (z1in)
gfpAddInline // t4 := Y1 + Z1
STx (tmp4)
LDx (y2in)
LDy (z2in)
gfpAddInline // t3 := Y2 + Z2
LDy (tmp4)
CALL gfpMulInternal(SB) // t4 := t4 * X3 = (Y1 + Z1)(Y2 + Z2)
STy (tmp4)
LDx (tmp1)
LDy (tmp2)
gfpAddInline // X3 := t1 + t2
LDy (tmp4)
CALL gfpSubInternal(SB) // t4 := t4 - X3 = Y1Z2 + Y2Z1
STx (tmp4)
LDx (x1in)
LDy (z1in)
gfpAddInline // X3 := X1 + Z1
STx (x3t)
LDx (x2in)
LDy (z2in)
gfpAddInline // Y3 := X2 + Z2
LDy (x3t)
CALL gfpMulInternal(SB) // X3 := X3 * Y3
STy (x3t)
LDx (tmp0)
LDy (tmp2)
gfpAddInline // Y3 := t0 + t2
LDy (x3t)
CALL gfpSubInternal(SB) // Y3 := X3 - Y3 = X1Z2 + X2Z1
STx (y3t)
LDy (tmp0)
gfpMulBy2Inline
gfpAddInline // t0 := t0 + t0 + t0 = 3X1X2
STx (tmp0)
LDy (tmp2)
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
LDx (tmp2)
CALL gfpSubInternal(SB) // t2 := 3b * t2 = 3bZ1Z2
STx (tmp2)
LDy (tmp1)
gfpAddInline // Z3 := t1 + t2 = Y1Y2 + 3bZ1Z2
STx (z3t)
LDx (tmp2)
CALL gfpSubInternal(SB) // t1 := t1 - t2 = Y1Y2 - 3bZ1Z2
STx (tmp1)
LDy (y3t)
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
gfpMulBy2Inline
x2y
LDx (y3t)
CALL gfpSubInternal(SB) // Y3 = 3b * Y3 = 3b(X1Z2 + X2Z1)
STx (y3t)
LDy (tmp4)
CALL gfpMulInternal(SB) // X3 := t4 * Y3 = 3b(X1Z2 + X2Z1)(Y1Z2 + Y2Z1)
STy (x3t)
MOVD res+0(FP), b_ptr
LDx (tmp3)
LDy (tmp1)
CALL gfpMulInternal(SB) // t2 := t3 * t1 = (X1Y2 + X2Y1)(Y1Y2 - 3bZ1Z2)
LDx (x3t)
CALL gfpSubInternal(SB) // X3 := t2 - X3 = (X1Y2 + X2Y1)(Y1Y2 - 3bZ1Z2) - 3b(Y1Z2 + Y2Z1)(X1Z2 + X2Z1)
STx (x2in)
LDy (y3t)
LDx (tmp0)
CALL gfpMulInternal(SB) // Y3 := Y3 * t0 = 9bX1X2(X1Z2 + X2Z1)
STy (y3t)
LDx (tmp1)
LDy (z3t)
CALL gfpMulInternal(SB) // t1 := t1 * Z3 = (Y1Y2 + 3bZ1Z2)(Y1Y2 - 3bZ1Z2)
LDx (y3t)
gfpAddInline // Y3 := t1 + Y3 = (Y1Y2 + 3bZ1Z2)(Y1Y2 - 3bZ1Z2) + 9bX1X2(X1Z2 + X2Z1)
STx (y2in)
LDx (tmp0)
LDy (tmp3)
CALL gfpMulInternal(SB) // t0 := t0 * t3 = 3X1X2(X1Y2 + X2Y1)
STy (tmp0)
LDx (tmp4)
LDy (z3t)
CALL gfpMulInternal(SB) // Z3 := Z3 * t4 = (Y1Z2 + Y2Z1)(Y1Y2 + 3bZ1Z2)
LDx (tmp0)
gfpAddInline // Z3 := Z3 + t0 = (Y1Z2 + Y2Z1)(Y1Y2 + 3bZ1Z2) + 3X1X2(X1Y2 + X2Y1)
STx (x2in)
RET

View File

@ -1,5 +1,5 @@
//go:build amd64 && !purego //go:build (amd64 && !purego) || (arm64 && !purego)
// +build amd64,!purego // +build amd64,!purego arm64,!purego
package bn256 package bn256