From 04e6a1c9b3ea743526b6845f3f972577b23b86f8 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 25 Jul 2023 08:35:00 +0800 Subject: [PATCH] sm9/bn256: arm64 gfp2Square & gfp2SquareU --- sm9/bn256/gfp2_g1_arm64.go | 40 ++++---------------- sm9/bn256/gfp2_g1_arm64.s | 75 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 32 deletions(-) diff --git a/sm9/bn256/gfp2_g1_arm64.go b/sm9/bn256/gfp2_g1_arm64.go index 1c65710..8fe0640 100644 --- a/sm9/bn256/gfp2_g1_arm64.go +++ b/sm9/bn256/gfp2_g1_arm64.go @@ -13,39 +13,15 @@ func gfp2Mul(c, a, b *gfP2) //go:noescape func gfp2MulU(c, a, b *gfP2) -func gfp2Square(c, a *gfP2) { - tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y +// gfP2 square. +// +//go:noescape +func gfp2Square(c, a *gfP2) - gfpAdd(ty, &a.x, &a.y) - gfpDouble(tx, &a.x) - gfpSub(tx, &a.y, tx) - gfpMul(ty, tx, ty) - gfpMul(tx, &a.x, &a.y) - gfpAdd(ty, tx, ty) - gfpDouble(tx, tx) - - gfp2Copy(c, tmp) -} - -func gfp2SquareU(c, a *gfP2) { - tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y - - gfpAdd(tx, &a.x, &a.y) - gfpDouble(ty, &a.x) - gfpSub(ty, &a.y, ty) - gfpMul(tx, tx, ty) - gfpMul(ty, &a.x, &a.y) - gfpAdd(tx, tx, ty) - gfpDouble(ty, ty) - gfpDouble(ty, ty) - gfpNeg(ty, ty) - - gfp2Copy(c, tmp) -} +// gfP2 square and mult u. +// +//go:noescape +func gfp2SquareU(c, a *gfP2) func curvePointDoubleComplete(c, p *curvePoint) { // Complete addition formula for a = 0 from "Complete addition formulas for diff --git a/sm9/bn256/gfp2_g1_arm64.s b/sm9/bn256/gfp2_g1_arm64.s index 36b1af6..23b7f54 100644 --- a/sm9/bn256/gfp2_g1_arm64.s +++ b/sm9/bn256/gfp2_g1_arm64.s @@ -601,3 +601,78 @@ TEXT ·gfp2MulU(SB),NOSPLIT,$104-24 STx (x3out) RET + +// func gfp2Square(c, a *gfP2) +TEXT ·gfp2Square(SB),NOSPLIT,$72-16 + MOVD res+0(FP), b_ptr + MOVD in1+8(FP), a_ptr + + MOVD ·np+0x00(SB), hlp1 + LDP ·p2+0x00(SB), (const0, const1) + LDP ·p2+0x10(SB), (const2, const3) + + LDx (y1in) + LDy (x1in) + gfpAddInline + STx (tmp0) + gfpMulBy2Inline + LDy (y1in) + CALL gfpSubInternal(SB) + LDy (tmp0) + CALL gfpMulInternal(SB) + STy (tmp0) + + LDx (y1in) + LDy (x1in) + CALL gfpMulInternal(SB) + //STy (tmp1) + LDx (tmp0) + gfpAddInline + STx (y2in) + + //LDy (tmp1) + gfpMulBy2Inline + STx (x2in) + + RET + +// func gfp2SquareU(c, a *gfP2) +TEXT ·gfp2SquareU(SB),NOSPLIT,$72-16 + MOVD res+0(FP), b_ptr + MOVD in1+8(FP), a_ptr + + MOVD ·np+0x00(SB), hlp1 + LDP ·p2+0x00(SB), (const0, const1) + LDP ·p2+0x10(SB), (const2, const3) + + LDx (y1in) + LDy (x1in) + gfpAddInline + STx (tmp0) + gfpMulBy2Inline + LDy (y1in) + CALL gfpSubInternal(SB) + LDy (tmp0) + CALL gfpMulInternal(SB) + STy (tmp0) + + LDx (y1in) + LDy (x1in) + CALL gfpMulInternal(SB) + //STy (tmp1) + LDx (tmp0) + gfpAddInline + STx (x2in) + + //LDy (tmp1) + gfpMulBy2Inline + x2y + gfpMulBy2Inline + MOVD $0, y0 + MOVD $0, y1 + MOVD $0, y2 + MOVD $0, y3 + CALL gfpSubInternal(SB) + STx (y2in) + + RET