mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
Rollback reduction process with multiple and addition
This commit is contained in:
parent
996ab5047f
commit
49e0071a8a
@ -246,82 +246,94 @@ sqrLoop:
|
|||||||
ADCQ DX, t1
|
ADCQ DX, t1
|
||||||
MOVQ t1, x_ptr
|
MOVQ t1, x_ptr
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, t1
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
|
MULQ acc0
|
||||||
MOVQ t1, AX
|
ADDQ acc0, acc1
|
||||||
MOVQ t1, DX
|
ADCQ $0, DX
|
||||||
SHLQ $32, AX
|
ADDQ AX, acc1
|
||||||
SHRQ $32, DX
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADCQ $0, acc2
|
MULQ acc0
|
||||||
SUBQ AX, acc1
|
|
||||||
SBBQ DX, acc2
|
|
||||||
|
|
||||||
ADDQ t1, acc3
|
|
||||||
ADCQ $0, acc0
|
|
||||||
SUBQ AX, acc3
|
|
||||||
SBBQ DX, acc0
|
|
||||||
SUBQ t1, acc3
|
|
||||||
SBBQ $0, acc0
|
|
||||||
// Second reduction step
|
|
||||||
MOVQ acc1, t1
|
|
||||||
|
|
||||||
MOVQ t1, AX
|
|
||||||
MOVQ t1, DX
|
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ t1, acc2
|
ADDQ t1, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc2
|
ADDQ AX, acc2
|
||||||
SBBQ DX, acc3
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc0
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
ADCQ $0, acc1
|
MULQ acc0
|
||||||
SUBQ AX, acc0
|
|
||||||
SBBQ DX, acc1
|
|
||||||
SUBQ t1, acc0
|
|
||||||
SBBQ $0, acc1
|
|
||||||
// Third reduction step
|
|
||||||
MOVQ acc2, t1
|
|
||||||
|
|
||||||
MOVQ t1, AX
|
|
||||||
MOVQ t1, DX
|
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ t1, acc3
|
ADDQ t1, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc3
|
ADDQ AX, acc3
|
||||||
SBBQ DX, acc0
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, acc0
|
||||||
|
// Second reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ acc1, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc2
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, acc1
|
||||||
|
// Third reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ acc2, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc2
|
||||||
ADDQ t1, acc1
|
ADDQ t1, acc1
|
||||||
ADCQ $0, acc2
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc1
|
ADDQ AX, acc1
|
||||||
SBBQ DX, acc2
|
ADCQ $0, DX
|
||||||
SUBQ t1, acc1
|
MOVQ DX, acc2
|
||||||
SBBQ $0, acc2
|
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
XORQ t0, t0
|
XORQ t0, t0
|
||||||
MOVQ acc3, t1
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
|
MULQ acc3
|
||||||
MOVQ t1, AX
|
ADDQ acc3, acc0
|
||||||
MOVQ t1, DX
|
ADCQ $0, DX
|
||||||
SHLQ $32, AX
|
ADDQ AX, acc0
|
||||||
SHRQ $32, DX
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc0
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADCQ $0, acc1
|
MULQ acc3
|
||||||
SUBQ AX, acc0
|
ADDQ t1, acc1
|
||||||
SBBQ DX, acc1
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc1
|
||||||
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc3
|
||||||
ADDQ t1, acc2
|
ADDQ t1, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc2
|
ADDQ AX, acc2
|
||||||
SBBQ DX, acc3
|
ADCQ $0, DX
|
||||||
SUBQ t1, acc2
|
MOVQ DX, acc3
|
||||||
SBBQ $0, acc3
|
|
||||||
|
|
||||||
// Add bits [511:256] of the sqr result
|
// Add bits [511:256] of the sqr result
|
||||||
ADCQ acc4, acc0
|
ADCQ acc4, acc0
|
||||||
@ -388,24 +400,26 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
|||||||
MOVQ DX, acc4
|
MOVQ DX, acc4
|
||||||
XORQ acc5, acc5
|
XORQ acc5, acc5
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc0, DX
|
MULQ acc0
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc0, acc1
|
ADDQ acc0, acc1
|
||||||
ADCQ $0, acc2
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc1
|
ADDQ AX, acc1
|
||||||
SBBQ DX, acc2
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc0, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc0, acc3
|
MULQ acc0
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc2
|
||||||
SUBQ AX, acc3
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc2
|
||||||
SUBQ acc0, acc3
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc4
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ DX, acc4
|
||||||
ADCQ $0, acc5
|
ADCQ $0, acc5
|
||||||
XORQ acc0, acc0
|
XORQ acc0, acc0
|
||||||
|
|
||||||
@ -442,24 +456,26 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
|||||||
ADCQ DX, acc5
|
ADCQ DX, acc5
|
||||||
ADCQ $0, acc0
|
ADCQ $0, acc0
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MOVQ acc1, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc1, DX
|
MULQ acc1
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc1, acc2
|
ADDQ acc1, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc2
|
ADDQ AX, acc2
|
||||||
SBBQ DX, acc3
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc1, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc1, acc4
|
MULQ acc1
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc3
|
||||||
SUBQ AX, acc4
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc3
|
||||||
SUBQ acc1, acc4
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc5
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc4
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc4
|
||||||
|
ADCQ DX, acc5
|
||||||
ADCQ $0, acc0
|
ADCQ $0, acc0
|
||||||
XORQ acc1, acc1
|
XORQ acc1, acc1
|
||||||
|
|
||||||
@ -496,24 +512,26 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
|||||||
ADCQ DX, acc0
|
ADCQ DX, acc0
|
||||||
ADCQ $0, acc1
|
ADCQ $0, acc1
|
||||||
// Third reduction step
|
// Third reduction step
|
||||||
MOVQ acc2, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc2, DX
|
MULQ acc2
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc2, acc3
|
ADDQ acc2, acc3
|
||||||
ADCQ $0, acc4
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc3
|
ADDQ AX, acc3
|
||||||
SBBQ DX, acc4
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc2, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc2, acc5
|
MULQ acc2
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc4
|
||||||
SUBQ AX, acc5
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc4
|
||||||
SUBQ acc2, acc5
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc0
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc5
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc5
|
||||||
|
ADCQ DX, acc0
|
||||||
ADCQ $0, acc1
|
ADCQ $0, acc1
|
||||||
XORQ acc2, acc2
|
XORQ acc2, acc2
|
||||||
// x * y[3]
|
// x * y[3]
|
||||||
@ -549,24 +567,26 @@ TEXT ·p256Mul(SB),NOSPLIT,$0
|
|||||||
ADCQ DX, acc1
|
ADCQ DX, acc1
|
||||||
ADCQ $0, acc2
|
ADCQ $0, acc2
|
||||||
// Last reduction step
|
// Last reduction step
|
||||||
MOVQ acc3, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc3, DX
|
MULQ acc3
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc3, acc4
|
ADDQ acc3, acc4
|
||||||
ADCQ $0, acc5
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc4
|
ADDQ AX, acc4
|
||||||
SBBQ DX, acc5
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc3, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc3, acc0
|
MULQ acc3
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc5
|
||||||
SUBQ AX, acc0
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc5
|
||||||
SUBQ acc3, acc0
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc1
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
|
ADCQ DX, acc1
|
||||||
ADCQ $0, acc2
|
ADCQ $0, acc2
|
||||||
// Copy result [255:0]
|
// Copy result [255:0]
|
||||||
MOVQ acc4, x_ptr
|
MOVQ acc4, x_ptr
|
||||||
@ -605,85 +625,93 @@ TEXT ·p256FromMont(SB),NOSPLIT,$0
|
|||||||
|
|
||||||
// Only reduce, no multiplications are needed
|
// Only reduce, no multiplications are needed
|
||||||
// First stage
|
// First stage
|
||||||
MOVQ acc0, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc0, DX
|
MULQ acc0
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc0, acc1
|
ADDQ acc0, acc1
|
||||||
ADCQ $0, acc2
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc1
|
ADDQ AX, acc1
|
||||||
SBBQ DX, acc2
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc0, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc0, acc3
|
MULQ acc0
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc2
|
||||||
SUBQ AX, acc3
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc2
|
||||||
SUBQ acc0, acc3
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc4
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc0
|
||||||
|
ADDQ t1, acc3
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc3
|
||||||
|
ADCQ DX, acc4
|
||||||
XORQ acc5, acc5
|
XORQ acc5, acc5
|
||||||
|
|
||||||
// Second stage
|
// Second stage
|
||||||
MOVQ acc1, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc1, DX
|
MULQ acc1
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc1, acc2
|
ADDQ acc1, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc2
|
ADDQ AX, acc2
|
||||||
SBBQ DX, acc3
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc1, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc1, acc4
|
MULQ acc1
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc3
|
||||||
SUBQ AX, acc4
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc3
|
||||||
SUBQ acc1, acc4
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc5
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc1
|
||||||
|
ADDQ t1, acc4
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc4
|
||||||
|
ADCQ DX, acc5
|
||||||
XORQ acc0, acc0
|
XORQ acc0, acc0
|
||||||
// Third stage
|
// Third stage
|
||||||
MOVQ acc2, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc2, DX
|
MULQ acc2
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc2, acc3
|
ADDQ acc2, acc3
|
||||||
ADCQ $0, acc4
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc3
|
ADDQ AX, acc3
|
||||||
SBBQ DX, acc4
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc2, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc2, acc5
|
MULQ acc2
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc4
|
||||||
SUBQ AX, acc5
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc4
|
||||||
SUBQ acc2, acc5
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc0
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ t1, acc5
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc5
|
||||||
|
ADCQ DX, acc0
|
||||||
XORQ acc1, acc1
|
XORQ acc1, acc1
|
||||||
// Last stage
|
// Last stage
|
||||||
MOVQ acc3, AX
|
MOVQ p256p<>+0x08(SB), AX
|
||||||
MOVQ acc3, DX
|
MULQ acc3
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
ADDQ acc3, acc4
|
ADDQ acc3, acc4
|
||||||
ADCQ $0, acc5
|
ADCQ $0, DX
|
||||||
SUBQ AX, acc4
|
ADDQ AX, acc4
|
||||||
SBBQ DX, acc5
|
ADCQ $0, DX
|
||||||
|
MOVQ DX, t1
|
||||||
MOVQ acc3, t1
|
MOVQ p256p<>+0x010(SB), AX
|
||||||
ADDQ acc3, acc0
|
MULQ acc3
|
||||||
ADCQ $0, t1
|
ADDQ t1, acc5
|
||||||
SUBQ AX, acc0
|
ADCQ $0, DX
|
||||||
SBBQ DX, t1
|
ADDQ AX, acc5
|
||||||
SUBQ acc3, acc0
|
ADCQ $0, DX
|
||||||
SBBQ $0, t1
|
MOVQ DX, t1
|
||||||
ADDQ t1, acc1
|
MOVQ p256p<>+0x018(SB), AX
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ t1, acc0
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc0
|
||||||
|
ADCQ DX, acc1
|
||||||
|
|
||||||
MOVQ acc4, x_ptr
|
MOVQ acc4, x_ptr
|
||||||
MOVQ acc5, acc3
|
MOVQ acc5, acc3
|
||||||
@ -1563,81 +1591,93 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$0
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, acc7
|
MOVQ mul1, acc7
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, hlp
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc0
|
||||||
MOVQ hlp, mul0
|
ADDQ acc0, acc1
|
||||||
MOVQ hlp, mul1
|
ADCQ $0, mul1
|
||||||
SHLQ $32, mul0
|
ADDQ mul0, acc1
|
||||||
SHRQ $32, mul1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc1
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
ADCQ $0, acc2
|
MULQ acc0
|
||||||
SUBQ mul0, acc1
|
ADDQ hlp, acc2
|
||||||
SBBQ mul1, acc2
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc2
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc0
|
||||||
ADDQ hlp, acc3
|
ADDQ hlp, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
SBBQ mul1, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ hlp, acc3
|
MOVQ mul1, acc0
|
||||||
SBBQ $0, acc0
|
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MOVQ acc1, hlp
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc1
|
||||||
MOVQ hlp, mul0
|
ADDQ acc1, acc2
|
||||||
MOVQ hlp, mul1
|
ADCQ $0, mul1
|
||||||
SHLQ $32, mul0
|
ADDQ mul0, acc2
|
||||||
SHRQ $32, mul1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc2
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
ADCQ $0, acc3
|
MULQ acc1
|
||||||
SUBQ mul0, acc2
|
|
||||||
SBBQ mul1, acc3
|
|
||||||
|
|
||||||
ADDQ hlp, acc0
|
|
||||||
ADCQ $0, acc1
|
|
||||||
SUBQ mul0, acc0
|
|
||||||
SBBQ mul1, acc1
|
|
||||||
SUBQ hlp, acc0
|
|
||||||
SBBQ $0, acc1
|
|
||||||
// Third reduction step
|
|
||||||
MOVQ acc2, hlp
|
|
||||||
|
|
||||||
MOVQ hlp, mul0
|
|
||||||
MOVQ hlp, mul1
|
|
||||||
SHLQ $32, mul0
|
|
||||||
SHRQ $32, mul1
|
|
||||||
|
|
||||||
ADDQ hlp, acc3
|
ADDQ hlp, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
SBBQ mul1, acc0
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc1
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
ADCQ $0, acc2
|
MULQ acc1
|
||||||
SUBQ mul0, acc1
|
|
||||||
SBBQ mul1, acc2
|
|
||||||
SUBQ hlp, acc1
|
|
||||||
SBBQ $0, acc2
|
|
||||||
// Last reduction step
|
|
||||||
MOVQ acc3, hlp
|
|
||||||
|
|
||||||
MOVQ hlp, mul0
|
|
||||||
MOVQ hlp, mul1
|
|
||||||
SHLQ $32, mul0
|
|
||||||
SHRQ $32, mul1
|
|
||||||
|
|
||||||
ADDQ hlp, acc0
|
ADDQ hlp, acc0
|
||||||
ADCQ $0, acc1
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc0
|
ADDQ mul0, acc0
|
||||||
SBBQ mul1, acc1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, acc1
|
||||||
|
// Third reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ acc2, acc3
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc3
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ hlp, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ hlp, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, acc2
|
||||||
|
// Last reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ acc3, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ hlp, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
ADDQ hlp, acc2
|
ADDQ hlp, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc2
|
ADDQ mul0, acc2
|
||||||
SBBQ mul1, acc3
|
ADCQ $0, mul1
|
||||||
SUBQ hlp, acc2
|
MOVQ mul1, acc3
|
||||||
SBBQ $0, acc3
|
|
||||||
MOVQ $0, BP
|
MOVQ $0, BP
|
||||||
// Add bits [511:256] of the result
|
// Add bits [511:256] of the result
|
||||||
ADCQ acc0, acc4
|
ADCQ acc0, acc4
|
||||||
@ -1737,81 +1777,93 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$0
|
|||||||
ADCQ mul0, t2
|
ADCQ mul0, t2
|
||||||
ADCQ DX, t3
|
ADCQ DX, t3
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, hlp
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc0
|
||||||
MOVQ hlp, mul0
|
ADDQ acc0, acc1
|
||||||
MOVQ hlp, mul1
|
ADCQ $0, mul1
|
||||||
SHLQ $32, mul0
|
ADDQ mul0, acc1
|
||||||
SHRQ $32, mul1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc1
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
ADCQ $0, acc2
|
MULQ acc0
|
||||||
SUBQ mul0, acc1
|
ADDQ hlp, acc2
|
||||||
SBBQ mul1, acc2
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc2
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc0
|
||||||
ADDQ hlp, acc3
|
ADDQ hlp, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
SBBQ mul1, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ hlp, acc3
|
MOVQ mul1, acc0
|
||||||
SBBQ $0, acc0
|
|
||||||
// Second reduction step
|
// Second reduction step
|
||||||
MOVQ acc1, hlp
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc1
|
||||||
MOVQ hlp, mul0
|
ADDQ acc1, acc2
|
||||||
MOVQ hlp, mul1
|
ADCQ $0, mul1
|
||||||
SHLQ $32, mul0
|
ADDQ mul0, acc2
|
||||||
SHRQ $32, mul1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc2
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
ADCQ $0, acc3
|
MULQ acc1
|
||||||
SUBQ mul0, acc2
|
|
||||||
SBBQ mul1, acc3
|
|
||||||
|
|
||||||
ADDQ hlp, acc0
|
|
||||||
ADCQ $0, acc1
|
|
||||||
SUBQ mul0, acc0
|
|
||||||
SBBQ mul1, acc1
|
|
||||||
SUBQ hlp, acc0
|
|
||||||
SBBQ $0, acc1
|
|
||||||
// Third reduction step
|
|
||||||
MOVQ acc2, hlp
|
|
||||||
|
|
||||||
MOVQ hlp, mul0
|
|
||||||
MOVQ hlp, mul1
|
|
||||||
SHLQ $32, mul0
|
|
||||||
SHRQ $32, mul1
|
|
||||||
|
|
||||||
ADDQ hlp, acc3
|
ADDQ hlp, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
SBBQ mul1, acc0
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
ADDQ hlp, acc1
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
ADCQ $0, acc2
|
MULQ acc1
|
||||||
SUBQ mul0, acc1
|
|
||||||
SBBQ mul1, acc2
|
|
||||||
SUBQ hlp, acc1
|
|
||||||
SBBQ $0, acc2
|
|
||||||
// Last reduction step
|
|
||||||
MOVQ acc3, hlp
|
|
||||||
|
|
||||||
MOVQ hlp, mul0
|
|
||||||
MOVQ hlp, mul1
|
|
||||||
SHLQ $32, mul0
|
|
||||||
SHRQ $32, mul1
|
|
||||||
|
|
||||||
ADDQ hlp, acc0
|
ADDQ hlp, acc0
|
||||||
ADCQ $0, acc1
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc0
|
ADDQ mul0, acc0
|
||||||
SBBQ mul1, acc1
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, acc1
|
||||||
|
// Third reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ acc2, acc3
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc3
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ hlp, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc2
|
||||||
|
ADDQ hlp, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, acc2
|
||||||
|
// Last reduction step
|
||||||
|
MOVQ p256p<>+0x08(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ acc3, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc0
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x010(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
|
ADDQ hlp, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
ADDQ mul0, acc1
|
||||||
|
ADCQ $0, mul1
|
||||||
|
MOVQ mul1, hlp
|
||||||
|
MOVQ p256p<>+0x018(SB), mul0
|
||||||
|
MULQ acc3
|
||||||
ADDQ hlp, acc2
|
ADDQ hlp, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, mul1
|
||||||
SUBQ mul0, acc2
|
ADDQ mul0, acc2
|
||||||
SBBQ mul1, acc3
|
ADCQ $0, mul1
|
||||||
SUBQ hlp, acc2
|
MOVQ mul1, acc3
|
||||||
SBBQ $0, acc3
|
|
||||||
MOVQ $0, BP
|
MOVQ $0, BP
|
||||||
// Add bits [511:256] of the result
|
// Add bits [511:256] of the result
|
||||||
ADCQ acc0, t0
|
ADCQ acc0, t0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user