mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
plugin: sm2 sm9 use BP carefully
This commit is contained in:
parent
719bca92db
commit
429c2db8aa
@ -13,16 +13,15 @@
|
|||||||
|
|
||||||
/* ---------------------------------------*/
|
/* ---------------------------------------*/
|
||||||
// func p256Sqr(res, in *p256Element, n int)
|
// func p256Sqr(res, in *p256Element, n int)
|
||||||
TEXT ·p256Sqr(SB),NOSPLIT,$8-24
|
TEXT ·p256Sqr(SB),NOSPLIT,$0
|
||||||
MOVQ res+0(FP), res_ptr
|
MOVQ res+0(FP), res_ptr
|
||||||
MOVQ in+8(FP), x_ptr
|
MOVQ in+8(FP), x_ptr
|
||||||
MOVQ n+16(FP), BX
|
MOVQ n+16(FP), BP
|
||||||
|
|
||||||
CMPB ·supportBMI2+0(SB), $0x01
|
CMPB ·supportBMI2+0(SB), $0x01
|
||||||
JEQ sqrBMI2
|
JEQ sqrBMI2
|
||||||
|
|
||||||
sqrLoop:
|
sqrLoop:
|
||||||
MOVQ BX, (SP)
|
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
MOVQ (8*0)(x_ptr), t0
|
MOVQ (8*0)(x_ptr), t0
|
||||||
|
|
||||||
@ -106,13 +105,11 @@ sqrLoop:
|
|||||||
p256SqrMontReduce()
|
p256SqrMontReduce()
|
||||||
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE sqrLoop
|
JNE sqrLoop
|
||||||
RET
|
RET
|
||||||
|
|
||||||
sqrBMI2:
|
sqrBMI2:
|
||||||
MOVQ BX, (SP)
|
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
MOVQ (8*0)(x_ptr), DX
|
MOVQ (8*0)(x_ptr), DX
|
||||||
|
|
||||||
@ -177,23 +174,21 @@ sqrBMI2:
|
|||||||
p256SqrMontReduce()
|
p256SqrMontReduce()
|
||||||
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
p256PrimReduce(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE sqrBMI2
|
JNE sqrBMI2
|
||||||
RET
|
RET
|
||||||
|
|
||||||
/* ---------------------------------------*/
|
/* ---------------------------------------*/
|
||||||
// func p256OrdSqr(res, in *p256OrdElement, n int)
|
// func p256OrdSqr(res, in *p256OrdElement, n int)
|
||||||
TEXT ·p256OrdSqr(SB),NOSPLIT,$8-24
|
TEXT ·p256OrdSqr(SB),NOSPLIT,$0
|
||||||
MOVQ res+0(FP), res_ptr
|
MOVQ res+0(FP), res_ptr
|
||||||
MOVQ in+8(FP), x_ptr
|
MOVQ in+8(FP), x_ptr
|
||||||
MOVQ n+16(FP), BX
|
MOVQ n+16(FP), BP
|
||||||
|
|
||||||
CMPB ·supportBMI2+0(SB), $0x01
|
CMPB ·supportBMI2+0(SB), $0x01
|
||||||
JEQ ordSqrLoopBMI2
|
JEQ ordSqrLoopBMI2
|
||||||
|
|
||||||
ordSqrLoop:
|
ordSqrLoop:
|
||||||
MOVQ BX, (SP)
|
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
MOVQ (8*0)(x_ptr), t0
|
MOVQ (8*0)(x_ptr), t0
|
||||||
|
|
||||||
@ -406,14 +401,12 @@ ordSqrLoop:
|
|||||||
|
|
||||||
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE ordSqrLoop
|
JNE ordSqrLoop
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
ordSqrLoopBMI2:
|
ordSqrLoopBMI2:
|
||||||
MOVQ BX, (SP)
|
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
MOVQ (8*0)(x_ptr), DX
|
MOVQ (8*0)(x_ptr), DX
|
||||||
MULXQ (8*1)(x_ptr), acc1, acc2
|
MULXQ (8*1)(x_ptr), acc1, acc2
|
||||||
@ -587,8 +580,7 @@ ordSqrLoopBMI2:
|
|||||||
|
|
||||||
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
p256OrdReduceInline(acc0, acc1, acc2, acc3, t0, acc4, acc5, y_ptr, BX, res_ptr)
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE ordSqrLoopBMI2
|
JNE ordSqrLoopBMI2
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
@ -5,16 +5,15 @@
|
|||||||
#include "gfp_macros_amd64.s"
|
#include "gfp_macros_amd64.s"
|
||||||
|
|
||||||
// func gfpSqr(res, in *gfP, n int)
|
// func gfpSqr(res, in *gfP, n int)
|
||||||
TEXT ·gfpSqr(SB),NOSPLIT,$24-8
|
TEXT ·gfpSqr(SB),NOSPLIT,$0
|
||||||
MOVQ res+0(FP), res_ptr
|
MOVQ res+0(FP), res_ptr
|
||||||
MOVQ in+8(FP), x_ptr
|
MOVQ in+8(FP), x_ptr
|
||||||
MOVQ n+16(FP), BX
|
MOVQ n+16(FP), BP
|
||||||
|
|
||||||
CMPB ·supportADX(SB), $0
|
CMPB ·supportADX(SB), $0
|
||||||
JE gfpSqrLoop
|
JE gfpSqrLoop
|
||||||
|
|
||||||
gfpSqrLoopAdx:
|
gfpSqrLoopAdx:
|
||||||
MOVQ BX, (SP)
|
|
||||||
XORQ acc0, acc0
|
XORQ acc0, acc0
|
||||||
XORQ y_ptr, y_ptr
|
XORQ y_ptr, y_ptr
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
@ -174,14 +173,12 @@ gfpSqrLoopAdx:
|
|||||||
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
|
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
|
||||||
|
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE gfpSqrLoopAdx
|
JNE gfpSqrLoopAdx
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
gfpSqrLoop:
|
gfpSqrLoop:
|
||||||
MOVQ BX, (SP)
|
|
||||||
// y[1:] * y[0]
|
// y[1:] * y[0]
|
||||||
MOVQ (8*0)(x_ptr), t0
|
MOVQ (8*0)(x_ptr), t0
|
||||||
|
|
||||||
@ -416,8 +413,7 @@ gfpSqrLoop:
|
|||||||
gfpCarry(acc0,acc1,acc2,acc3, acc4,acc5,y_ptr,BX,t0)
|
gfpCarry(acc0,acc1,acc2,acc3, acc4,acc5,y_ptr,BX,t0)
|
||||||
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
|
storeBlock(acc0,acc1,acc2,acc3, 0(res_ptr))
|
||||||
MOVQ res_ptr, x_ptr
|
MOVQ res_ptr, x_ptr
|
||||||
MOVQ (SP), BX
|
DECQ BP
|
||||||
DECQ BX
|
|
||||||
JNE gfpSqrLoop
|
JNE gfpSqrLoop
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user