From c62d6daf7471facd32843f0ec91effde786ddd1b Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 25 Jul 2023 08:21:44 +0800 Subject: [PATCH] sm9/bn256: arm64 gfp2MulU --- sm9/bn256/gfp2_g1_arm64.go | 27 +++--------------- sm9/bn256/gfp2_g1_arm64.s | 57 ++++++++++++++++++++++++++++++++++++-- 2 files changed, 59 insertions(+), 25 deletions(-) diff --git a/sm9/bn256/gfp2_g1_arm64.go b/sm9/bn256/gfp2_g1_arm64.go index 7fdf2b8..1c65710 100644 --- a/sm9/bn256/gfp2_g1_arm64.go +++ b/sm9/bn256/gfp2_g1_arm64.go @@ -8,29 +8,10 @@ package bn256 //go:noescape func gfp2Mul(c, a, b *gfP2) -func gfp2MulU(c, a, b *gfP2) { - tmp := &gfP2{} - tx := &tmp.x - ty := &tmp.y - v0, v1 := &gfP{}, &gfP{} - - gfpMul(v0, &a.y, &b.y) - gfpMul(v1, &a.x, &b.x) - - gfpAdd(tx, &a.x, &a.y) - gfpAdd(ty, &b.x, &b.y) - - gfpMul(ty, tx, ty) - gfpSub(ty, ty, v0) - gfpSub(ty, ty, v1) - gfpDouble(ty, ty) - gfpNeg(ty, ty) - - gfpSub(tx, v0, v1) - gfpSub(tx, tx, v1) - - gfp2Copy(c, tmp) -} +// gfP2 multiplication. c = a*b*u +// +//go:noescape +func gfp2MulU(c, a, b *gfP2) func gfp2Square(c, a *gfP2) { tmp := &gfP2{} diff --git a/sm9/bn256/gfp2_g1_arm64.s b/sm9/bn256/gfp2_g1_arm64.s index beb414f..36b1af6 100644 --- a/sm9/bn256/gfp2_g1_arm64.s +++ b/sm9/bn256/gfp2_g1_arm64.s @@ -515,8 +515,8 @@ TEXT ·gfp2Mul(SB),NOSPLIT,$104-24 LDx (y1in) LDy (y2in) CALL gfpMulInternal(SB) - STy (tmp0) + LDx (x1in) LDy (x2in) CALL gfpMulInternal(SB) @@ -538,7 +538,7 @@ TEXT ·gfp2Mul(SB),NOSPLIT,$104-24 x2y LDx (tmp1) CALL gfpSubInternal(SB) - MOVD res+0(FP), res_ptr // not use hlp1 any more + MOVD res+0(FP), res_ptr // not use hlp1 any more STx (x3out) LDy (tmp1) @@ -548,3 +548,56 @@ TEXT ·gfp2Mul(SB),NOSPLIT,$104-24 STx (y3out) RET + +// func gfp2MulU(c, a, b *gfP2) +TEXT ·gfp2MulU(SB),NOSPLIT,$104-24 + MOVD in1+8(FP), a_ptr + MOVD in2+16(FP), b_ptr + + MOVD ·np+0x00(SB), hlp1 + LDP ·p2+0x00(SB), (const0, const1) + LDP ·p2+0x10(SB), (const2, const3) + + LDx (y1in) + LDy (y2in) + CALL gfpMulInternal(SB) + STy (tmp0) + + LDx (x1in) + LDy (x2in) + CALL gfpMulInternal(SB) + STy (tmp1) + + LDx (x1in) + LDy (y1in) + gfpAddInline + STx (tmp2) + + LDx (x2in) + LDy (y2in) + gfpAddInline + LDy (tmp2) + CALL gfpMulInternal(SB) + + LDx (tmp0) + CALL gfpSubInternal(SB) + x2y + LDx (tmp1) + CALL gfpSubInternal(SB) + x2y + gfpMulBy2Inline + MOVD $0, y0 + MOVD $0, y1 + MOVD $0, y2 + MOVD $0, y3 + CALL gfpSubInternal(SB) + MOVD res+0(FP), res_ptr // not use hlp1 any more + STx (y3out) + + LDy (tmp1) + gfpMulBy2Inline + LDy (tmp0) + CALL gfpSubInternal(SB) + STx (x3out) + + RET