plugin: sm2 sm9 use BP carefully

This commit is contained in:
Sun Yimin 2023-09-11 08:47:02 +08:00 committed by GitHub
parent 719bca92db
commit 429c2db8aa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 24 deletions

View File

@ -13,16 +13,15 @@
/* ---------------------------------------*/ /* ---------------------------------------*/
// func p256Sqr(res, in *p256Element, n int) // func p256Sqr(res, in *p256Element, n int)
TEXT ·p256Sqr(SB),NOSPLIT,$8-24 TEXT ·p256Sqr(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr MOVQ res+0(FP), res_ptr
MOVQ in+8(FP), x_ptr MOVQ in+8(FP), x_ptr
MOVQ n+16(FP), BX MOVQ n+16(FP), BP
CMPB ·supportBMI2+0(SB), $0x01 CMPB ·supportBMI2+0(SB), $0x01
JEQ sqrBMI2 JEQ sqrBMI2
sqrLoop: sqrLoop:
MOVQ BX, (SP)
// y[1:] * y[0] // y[1:] * y[0]
MOVQ (8*0)(x_ptr), t0 MOVQ (8*0)(x_ptr), t0
@ -106,13 +105,11 @@ sqrLoop:
p256SqrMontReduce() p256SqrMontReduce()
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr) p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE sqrLoop JNE sqrLoop
RET RET
sqrBMI2: sqrBMI2:
MOVQ BX, (SP)
// y[1:] * y[0] // y[1:] * y[0]
MOVQ (8*0)(x_ptr), DX MOVQ (8*0)(x_ptr), DX
@ -177,23 +174,21 @@ sqrBMI2:
p256SqrMontReduce() p256SqrMontReduce()
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr) p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE sqrBMI2 JNE sqrBMI2
RET RET
/* ---------------------------------------*/ /* ---------------------------------------*/
// func p256OrdSqr(res, in *p256OrdElement, n int) // func p256OrdSqr(res, in *p256OrdElement, n int)
TEXT ·p256OrdSqr(SB),NOSPLIT,$8-24 TEXT ·p256OrdSqr(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr MOVQ res+0(FP), res_ptr
MOVQ in+8(FP), x_ptr MOVQ in+8(FP), x_ptr
MOVQ n+16(FP), BX MOVQ n+16(FP), BP
CMPB ·supportBMI2+0(SB), $0x01 CMPB ·supportBMI2+0(SB), $0x01
JEQ ordSqrLoopBMI2 JEQ ordSqrLoopBMI2
ordSqrLoop: ordSqrLoop:
MOVQ BX, (SP)
// y[1:] * y[0] // y[1:] * y[0]
MOVQ (8*0)(x_ptr), t0 MOVQ (8*0)(x_ptr), t0
@ -406,14 +401,12 @@ ordSqrLoop:
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr) p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE ordSqrLoop JNE ordSqrLoop
RET RET
ordSqrLoopBMI2: ordSqrLoopBMI2:
MOVQ BX, (SP)
// y[1:] * y[0] // y[1:] * y[0]
MOVQ (8*0)(x_ptr), DX MOVQ (8*0)(x_ptr), DX
MULXQ (8*1)(x_ptr), acc1, acc2 MULXQ (8*1)(x_ptr), acc1, acc2
@ -587,8 +580,7 @@ ordSqrLoopBMI2:
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr) p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE ordSqrLoopBMI2 JNE ordSqrLoopBMI2
RET RET

View File

@ -5,16 +5,15 @@
#include "gfp_macros_amd64.s" #include "gfp_macros_amd64.s"
// func gfpSqr(res, in *gfP, n int) // func gfpSqr(res, in *gfP, n int)
TEXT ·gfpSqr(SB),NOSPLIT,$24-8 TEXT ·gfpSqr(SB),NOSPLIT,$0
MOVQ res+0(FP), res_ptr MOVQ res+0(FP), res_ptr
MOVQ in+8(FP), x_ptr MOVQ in+8(FP), x_ptr
MOVQ n+16(FP), BX MOVQ n+16(FP), BP
CMPB ·supportADX(SB), $0 CMPB ·supportADX(SB), $0
JE gfpSqrLoop JE gfpSqrLoop
gfpSqrLoopAdx: gfpSqrLoopAdx:
MOVQ BX, (SP)
XORQ acc0, acc0 XORQ acc0, acc0
XORQ y_ptr, y_ptr XORQ y_ptr, y_ptr
// y[1:] * y[0] // y[1:] * y[0]
@ -174,14 +173,12 @@ gfpSqrLoopAdx:
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr)) storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE gfpSqrLoopAdx JNE gfpSqrLoopAdx
RET RET
gfpSqrLoop: gfpSqrLoop:
MOVQ BX, (SP)
// y[1:] * y[0] // y[1:] * y[0]
MOVQ (8*0)(x_ptr), t0 MOVQ (8*0)(x_ptr), t0
@ -416,8 +413,7 @@ gfpSqrLoop:
gfpCarry(acc0,acc1,acc2,acc3, acc4,acc5,y_ptr,BX,t0) gfpCarry(acc0,acc1,acc2,acc3, acc4,acc5,y_ptr,BX,t0)
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr)) storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
MOVQ res_ptr, x_ptr MOVQ res_ptr, x_ptr
MOVQ (SP), BX DECQ BP
DECQ BX
JNE gfpSqrLoop JNE gfpSqrLoop
RET RET