mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
MAGIC - optimize p256Mul p256FromMont
This commit is contained in:
parent
aa6b436714
commit
371ea2f4b3
@ -388,26 +388,24 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
||||
MOVQ DX, acc4
|
||||
XORQ acc5, acc5
|
||||
// First reduction step
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc0
|
||||
MOVQ acc0, AX
|
||||
MOVQ acc0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc0, acc1
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc1
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc0
|
||||
ADDQ t1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc0
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ DX, acc4
|
||||
ADCQ $0, acc2
|
||||
SUBQ AX, acc1
|
||||
SBBQ DX, acc2
|
||||
|
||||
MOVQ acc0, t1
|
||||
ADDQ acc0, acc3
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc3
|
||||
SBBQ DX, t1
|
||||
SUBQ acc0, acc3
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, acc5
|
||||
XORQ acc0, acc0
|
||||
|
||||
@ -444,26 +442,24 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
||||
ADCQ DX, acc5
|
||||
ADCQ $0, acc0
|
||||
// Second reduction step
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc1
|
||||
MOVQ acc1, AX
|
||||
MOVQ acc1, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc1
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc1
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ DX, acc5
|
||||
ADCQ $0, acc3
|
||||
SUBQ AX, acc2
|
||||
SBBQ DX, acc3
|
||||
|
||||
MOVQ acc1, t1
|
||||
ADDQ acc1, acc4
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc4
|
||||
SBBQ DX, t1
|
||||
SUBQ acc1, acc4
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, acc0
|
||||
XORQ acc1, acc1
|
||||
|
||||
@ -500,26 +496,24 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
||||
ADCQ DX, acc0
|
||||
ADCQ $0, acc1
|
||||
// Third reduction step
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc2
|
||||
MOVQ acc2, AX
|
||||
MOVQ acc2, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc2, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc2
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc2
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ DX, acc0
|
||||
ADCQ $0, acc4
|
||||
SUBQ AX, acc3
|
||||
SBBQ DX, acc4
|
||||
|
||||
MOVQ acc2, t1
|
||||
ADDQ acc2, acc5
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc5
|
||||
SBBQ DX, t1
|
||||
SUBQ acc2, acc5
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc0
|
||||
ADCQ $0, acc1
|
||||
XORQ acc2, acc2
|
||||
// x * y[3]
|
||||
@ -555,26 +549,24 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
||||
ADCQ DX, acc1
|
||||
ADCQ $0, acc2
|
||||
// Last reduction step
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc3
|
||||
MOVQ acc3, AX
|
||||
MOVQ acc3, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc3, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc3
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc3
|
||||
ADDQ t1, acc0
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc0
|
||||
ADCQ DX, acc1
|
||||
ADCQ $0, acc5
|
||||
SUBQ AX, acc4
|
||||
SBBQ DX, acc5
|
||||
|
||||
MOVQ acc3, t1
|
||||
ADDQ acc3, acc0
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc0
|
||||
SBBQ DX, t1
|
||||
SUBQ acc3, acc0
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc1
|
||||
ADCQ $0, acc2
|
||||
// Copy result [255:0]
|
||||
MOVQ acc4, x_ptr
|
||||
@ -613,93 +605,85 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
||||
|
||||
// Only reduce, no multiplications are needed
|
||||
// First stage
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc0
|
||||
MOVQ acc0, AX
|
||||
MOVQ acc0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc0, acc1
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc1
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc0
|
||||
ADDQ t1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc0
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ DX, acc4
|
||||
ADCQ $0, acc2
|
||||
SUBQ AX, acc1
|
||||
SBBQ DX, acc2
|
||||
|
||||
MOVQ acc0, t1
|
||||
ADDQ acc0, acc3
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc3
|
||||
SBBQ DX, t1
|
||||
SUBQ acc0, acc3
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc4
|
||||
XORQ acc5, acc5
|
||||
|
||||
// Second stage
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc1
|
||||
MOVQ acc1, AX
|
||||
MOVQ acc1, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc1, acc2
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc2
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc1
|
||||
ADDQ t1, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc1
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ DX, acc5
|
||||
ADCQ $0, acc3
|
||||
SUBQ AX, acc2
|
||||
SBBQ DX, acc3
|
||||
|
||||
MOVQ acc1, t1
|
||||
ADDQ acc1, acc4
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc4
|
||||
SBBQ DX, t1
|
||||
SUBQ acc1, acc4
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc5
|
||||
XORQ acc0, acc0
|
||||
// Third stage
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc2
|
||||
MOVQ acc2, AX
|
||||
MOVQ acc2, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc2, acc3
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc3
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc2
|
||||
ADDQ t1, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc2
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ DX, acc0
|
||||
ADCQ $0, acc4
|
||||
SUBQ AX, acc3
|
||||
SBBQ DX, acc4
|
||||
|
||||
MOVQ acc2, t1
|
||||
ADDQ acc2, acc5
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc5
|
||||
SBBQ DX, t1
|
||||
SUBQ acc2, acc5
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc0
|
||||
XORQ acc1, acc1
|
||||
// Last stage
|
||||
MOVQ p256p<>+0x08(SB), AX
|
||||
MULQ acc3
|
||||
MOVQ acc3, AX
|
||||
MOVQ acc3, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
ADDQ acc3, acc4
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc4
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x010(SB), AX
|
||||
MULQ acc3
|
||||
ADDQ t1, acc5
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc5
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, t1
|
||||
MOVQ p256p<>+0x018(SB), AX
|
||||
MULQ acc3
|
||||
ADDQ t1, acc0
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc0
|
||||
ADCQ DX, acc1
|
||||
ADCQ $0, acc5
|
||||
SUBQ AX, acc4
|
||||
SBBQ DX, acc5
|
||||
|
||||
MOVQ acc3, t1
|
||||
ADDQ acc3, acc0
|
||||
ADCQ $0, t1
|
||||
SUBQ AX, acc0
|
||||
SBBQ DX, t1
|
||||
SUBQ acc3, acc0
|
||||
SBBQ $0, t1
|
||||
ADDQ t1, acc1
|
||||
|
||||
MOVQ acc4, x_ptr
|
||||
MOVQ acc5, acc3
|
||||
|
Loading…
x
Reference in New Issue
Block a user