mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
optimize sm2 p256 amd64 implementation, reduce multiplication
This commit is contained in:
parent
4ff0c4547f
commit
381476a913
@ -848,24 +848,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
||||
ADDQ t1, acc1
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc1
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x10(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x18(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ DX, acc4
|
||||
ADCQ DX, acc2
|
||||
ADCQ $0, acc3
|
||||
ADCQ t0, acc4
|
||||
ADCQ $0, acc5
|
||||
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc2
|
||||
SBBQ AX, acc3
|
||||
SBBQ DX, acc4
|
||||
SBBQ $0, acc5
|
||||
// x * y[1]
|
||||
MOVQ (8*1)(y_ptr), t0
|
||||
|
||||
@ -914,24 +910,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
||||
ADDQ t1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x10(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x18(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ DX, acc5
|
||||
ADCQ DX, acc3
|
||||
ADCQ $0, acc4
|
||||
ADCQ t0, acc5
|
||||
ADCQ $0, acc0
|
||||
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc3
|
||||
SBBQ AX, acc4
|
||||
SBBQ DX, acc5
|
||||
SBBQ $0, acc0
|
||||
// x * y[2]
|
||||
MOVQ (8*2)(y_ptr), t0
|
||||
|
||||
@ -980,24 +972,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x10(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x18(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ DX, acc0
|
||||
ADCQ DX, acc4
|
||||
ADCQ $0, acc5
|
||||
ADCQ t0, acc0
|
||||
ADCQ $0, acc1
|
||||
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc4
|
||||
SBBQ AX, acc5
|
||||
SBBQ DX, acc0
|
||||
SBBQ $0, acc1
|
||||
// x * y[3]
|
||||
MOVQ (8*3)(y_ptr), t0
|
||||
|
||||
@ -1046,24 +1034,20 @@ TEXT ·p256OrdMul(SB),NOSPLIT,$0
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x10(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
|
||||
MOVQ p256ord<>+0x18(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc0
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc0
|
||||
ADCQ DX, acc1
|
||||
ADCQ DX, acc5
|
||||
ADCQ $0, acc0
|
||||
ADCQ t0, acc1
|
||||
ADCQ $0, acc2
|
||||
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc5
|
||||
SBBQ AX, acc0
|
||||
SBBQ DX, acc1
|
||||
SBBQ $0, acc2
|
||||
// Copy result [255:0]
|
||||
MOVQ acc4, x_ptr
|
||||
MOVQ acc5, acc3
|
||||
|
Loading…
x
Reference in New Issue
Block a user