mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 04:36:19 +08:00
optimize sm2 p256 amd64 implementation, reduce multiplication
This commit is contained in:
parent
4ff0c4547f
commit
381476a913
@ -848,24 +848,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
|||||||
ADDQ t1, acc1
|
ADDQ t1, acc1
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
ADDQ AX, acc1
|
ADDQ AX, acc1
|
||||||
ADCQ $0, DX
|
ADCQ DX, acc2
|
||||||
MOVQ DX, t1
|
ADCQ $0, acc3
|
||||||
|
ADCQ t0, acc4
|
||||||
MOVQ p256ord<>+0x10(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc2
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc2
|
|
||||||
ADCQ $0, DX
|
|
||||||
MOVQ DX, t1
|
|
||||||
|
|
||||||
MOVQ p256ord<>+0x18(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc3
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc3
|
|
||||||
ADCQ DX, acc4
|
|
||||||
ADCQ $0, acc5
|
ADCQ $0, acc5
|
||||||
|
|
||||||
|
MOVQ t0, AX
|
||||||
|
MOVQ t0, DX
|
||||||
|
SHLQ $32, AX
|
||||||
|
SHRQ $32, DX
|
||||||
|
|
||||||
|
SUBQ t0, acc2
|
||||||
|
SBBQ AX, acc3
|
||||||
|
SBBQ DX, acc4
|
||||||
|
SBBQ $0, acc5
|
||||||
// x * y[1]
|
// x * y[1]
|
||||||
MOVQ (8*1)(y_ptr), t0
|
MOVQ (8*1)(y_ptr), t0
|
||||||
|
|
||||||
@ -914,24 +910,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
|||||||
ADDQ t1, acc2
|
ADDQ t1, acc2
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
ADDQ AX, acc2
|
ADDQ AX, acc2
|
||||||
ADCQ $0, DX
|
ADCQ DX, acc3
|
||||||
MOVQ DX, t1
|
ADCQ $0, acc4
|
||||||
|
ADCQ t0, acc5
|
||||||
MOVQ p256ord<>+0x10(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc3
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc3
|
|
||||||
ADCQ $0, DX
|
|
||||||
MOVQ DX, t1
|
|
||||||
|
|
||||||
MOVQ p256ord<>+0x18(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc4
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc4
|
|
||||||
ADCQ DX, acc5
|
|
||||||
ADCQ $0, acc0
|
ADCQ $0, acc0
|
||||||
|
|
||||||
|
MOVQ t0, AX
|
||||||
|
MOVQ t0, DX
|
||||||
|
SHLQ $32, AX
|
||||||
|
SHRQ $32, DX
|
||||||
|
|
||||||
|
SUBQ t0, acc3
|
||||||
|
SBBQ AX, acc4
|
||||||
|
SBBQ DX, acc5
|
||||||
|
SBBQ $0, acc0
|
||||||
// x * y[2]
|
// x * y[2]
|
||||||
MOVQ (8*2)(y_ptr), t0
|
MOVQ (8*2)(y_ptr), t0
|
||||||
|
|
||||||
@ -980,24 +972,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
|||||||
ADDQ t1, acc3
|
ADDQ t1, acc3
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
ADDQ AX, acc3
|
ADDQ AX, acc3
|
||||||
ADCQ $0, DX
|
ADCQ DX, acc4
|
||||||
MOVQ DX, t1
|
ADCQ $0, acc5
|
||||||
|
ADCQ t0, acc0
|
||||||
MOVQ p256ord<>+0x10(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc4
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc4
|
|
||||||
ADCQ $0, DX
|
|
||||||
MOVQ DX, t1
|
|
||||||
|
|
||||||
MOVQ p256ord<>+0x18(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc5
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc5
|
|
||||||
ADCQ DX, acc0
|
|
||||||
ADCQ $0, acc1
|
ADCQ $0, acc1
|
||||||
|
|
||||||
|
MOVQ t0, AX
|
||||||
|
MOVQ t0, DX
|
||||||
|
SHLQ $32, AX
|
||||||
|
SHRQ $32, DX
|
||||||
|
|
||||||
|
SUBQ t0, acc4
|
||||||
|
SBBQ AX, acc5
|
||||||
|
SBBQ DX, acc0
|
||||||
|
SBBQ $0, acc1
|
||||||
// x * y[3]
|
// x * y[3]
|
||||||
MOVQ (8*3)(y_ptr), t0
|
MOVQ (8*3)(y_ptr), t0
|
||||||
|
|
||||||
@ -1046,24 +1034,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
|||||||
ADDQ t1, acc4
|
ADDQ t1, acc4
|
||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
ADDQ AX, acc4
|
ADDQ AX, acc4
|
||||||
ADCQ $0, DX
|
ADCQ DX, acc5
|
||||||
MOVQ DX, t1
|
ADCQ $0, acc0
|
||||||
|
ADCQ t0, acc1
|
||||||
MOVQ p256ord<>+0x10(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc5
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc5
|
|
||||||
ADCQ $0, DX
|
|
||||||
MOVQ DX, t1
|
|
||||||
|
|
||||||
MOVQ p256ord<>+0x18(SB), AX
|
|
||||||
MULQ t0
|
|
||||||
ADDQ t1, acc0
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc0
|
|
||||||
ADCQ DX, acc1
|
|
||||||
ADCQ $0, acc2
|
ADCQ $0, acc2
|
||||||
|
|
||||||
|
MOVQ t0, AX
|
||||||
|
MOVQ t0, DX
|
||||||
|
SHLQ $32, AX
|
||||||
|
SHRQ $32, DX
|
||||||
|
|
||||||
|
SUBQ t0, acc5
|
||||||
|
SBBQ AX, acc0
|
||||||
|
SBBQ DX, acc1
|
||||||
|
SBBQ $0, acc2
|
||||||
// Copy result [255:0]
|
// Copy result [255:0]
|
||||||
MOVQ acc4, x_ptr
|
MOVQ acc4, x_ptr
|
||||||
MOVQ acc5, acc3
|
MOVQ acc5, acc3
|
||||||
|
Loading…
x
Reference in New Issue
Block a user