mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
internal/sm2ec: optiomization for ADX usage and supplement comments
This commit is contained in:
parent
bbbf2612bc
commit
a173646017
@ -2095,6 +2095,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
CMPB ·supportBMI2+0(SB), $0x01
|
CMPB ·supportBMI2+0(SB), $0x01
|
||||||
JEQ internalMulBMI2
|
JEQ internalMulBMI2
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc4
|
||||||
MOVQ acc4, mul0
|
MOVQ acc4, mul0
|
||||||
MULQ t0
|
MULQ t0
|
||||||
MOVQ mul0, acc0
|
MOVQ mul0, acc0
|
||||||
@ -2118,6 +2119,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, acc4
|
MOVQ mul1, acc4
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc5
|
||||||
MOVQ acc5, mul0
|
MOVQ acc5, mul0
|
||||||
MULQ t0
|
MULQ t0
|
||||||
ADDQ mul0, acc1
|
ADDQ mul0, acc1
|
||||||
@ -2148,6 +2150,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, acc5
|
MOVQ mul1, acc5
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc6
|
||||||
MOVQ acc6, mul0
|
MOVQ acc6, mul0
|
||||||
MULQ t0
|
MULQ t0
|
||||||
ADDQ mul0, acc2
|
ADDQ mul0, acc2
|
||||||
@ -2178,6 +2181,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, acc6
|
MOVQ mul1, acc6
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc7
|
||||||
MOVQ acc7, mul0
|
MOVQ acc7, mul0
|
||||||
MULQ t0
|
MULQ t0
|
||||||
ADDQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
@ -2207,6 +2211,8 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
ADDQ mul0, acc6
|
ADDQ mul0, acc6
|
||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, acc7
|
MOVQ mul1, acc7
|
||||||
|
|
||||||
|
// T = [acc7, acc6, acc5, acc4, acc3, acc2, acc1, acc0]
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, mul0
|
MOVQ acc0, mul0
|
||||||
MOVQ acc0, mul1
|
MOVQ acc0, mul1
|
||||||
@ -2292,22 +2298,23 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
|
|||||||
CMOVQCS acc3, acc7
|
CMOVQCS acc3, acc7
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
internalMulBMI2:
|
internalMulBMI2:
|
||||||
|
// [t3, t2, t1, t0] * acc4
|
||||||
MOVQ acc4, mul1
|
MOVQ acc4, mul1
|
||||||
MULXQ t0, acc0, acc1
|
MULXQ t0, acc0, acc1
|
||||||
|
|
||||||
MULXQ t1, mul0, acc2
|
MULXQ t1, mul0, acc2
|
||||||
ADDQ mul0, acc1
|
ADDQ mul0, acc1
|
||||||
ADCQ $0, acc2
|
|
||||||
|
|
||||||
MULXQ t2, mul0, acc3
|
MULXQ t2, mul0, acc3
|
||||||
ADDQ mul0, acc2
|
ADCQ mul0, acc2
|
||||||
ADCQ $0, acc3
|
|
||||||
|
|
||||||
MULXQ t3, mul0, acc4
|
MULXQ t3, mul0, acc4
|
||||||
ADDQ mul0, acc3
|
ADCQ mul0, acc3
|
||||||
ADCQ $0, acc4
|
ADCQ $0, acc4
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc5
|
||||||
MOVQ acc5, mul1
|
MOVQ acc5, mul1
|
||||||
MULXQ t0, mul0, hlp
|
MULXQ t0, mul0, hlp
|
||||||
ADDQ mul0, acc1
|
ADDQ mul0, acc1
|
||||||
@ -2328,6 +2335,7 @@ internalMulBMI2:
|
|||||||
ADDQ mul0, acc4
|
ADDQ mul0, acc4
|
||||||
ADCQ $0, acc5
|
ADCQ $0, acc5
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc6
|
||||||
MOVQ acc6, mul1
|
MOVQ acc6, mul1
|
||||||
MULXQ t0, mul0, hlp
|
MULXQ t0, mul0, hlp
|
||||||
ADDQ mul0, acc2
|
ADDQ mul0, acc2
|
||||||
@ -2348,6 +2356,7 @@ internalMulBMI2:
|
|||||||
ADDQ mul0, acc5
|
ADDQ mul0, acc5
|
||||||
ADCQ $0, acc6
|
ADCQ $0, acc6
|
||||||
|
|
||||||
|
// [t3, t2, t1, t0] * acc7
|
||||||
MOVQ acc7, mul1
|
MOVQ acc7, mul1
|
||||||
MULXQ t0, mul0, hlp
|
MULXQ t0, mul0, hlp
|
||||||
ADDQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
@ -2368,6 +2377,7 @@ internalMulBMI2:
|
|||||||
ADDQ mul0, acc6
|
ADDQ mul0, acc6
|
||||||
ADCQ $0, acc7
|
ADCQ $0, acc7
|
||||||
|
|
||||||
|
// T = [acc7, acc6, acc5, acc4, acc3, acc2, acc1, acc0]
|
||||||
// First reduction step
|
// First reduction step
|
||||||
MOVQ acc0, mul0
|
MOVQ acc0, mul0
|
||||||
MOVQ acc0, mul1
|
MOVQ acc0, mul1
|
||||||
@ -2544,6 +2554,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
|
|||||||
CMPB ·supportBMI2+0(SB), $0x01
|
CMPB ·supportBMI2+0(SB), $0x01
|
||||||
JEQ internalSqrBMI2
|
JEQ internalSqrBMI2
|
||||||
|
|
||||||
|
// [acc7, acc6, acc5] * acc4
|
||||||
MOVQ acc4, mul0
|
MOVQ acc4, mul0
|
||||||
MULQ acc5
|
MULQ acc5
|
||||||
MOVQ mul0, acc1
|
MOVQ mul0, acc1
|
||||||
@ -2561,6 +2572,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, t0
|
MOVQ mul1, t0
|
||||||
|
|
||||||
|
// [acc7, acc6] * acc5
|
||||||
MOVQ acc5, mul0
|
MOVQ acc5, mul0
|
||||||
MULQ acc6
|
MULQ acc6
|
||||||
ADDQ mul0, acc3
|
ADDQ mul0, acc3
|
||||||
@ -2575,6 +2587,7 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ $0, mul1
|
ADCQ $0, mul1
|
||||||
MOVQ mul1, t1
|
MOVQ mul1, t1
|
||||||
|
|
||||||
|
// acc7 * acc6
|
||||||
MOVQ acc6, mul0
|
MOVQ acc6, mul0
|
||||||
MULQ acc7
|
MULQ acc7
|
||||||
ADDQ mul0, t1
|
ADDQ mul0, t1
|
||||||
@ -2615,64 +2628,70 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
|
|||||||
ADCQ mul0, t2
|
ADCQ mul0, t2
|
||||||
ADCQ DX, t3
|
ADCQ DX, t3
|
||||||
|
|
||||||
|
// T = [t3, t2, t1, t0, acc3, acc2, acc1, acc0]
|
||||||
sm2P256SqrReductionInternal()
|
sm2P256SqrReductionInternal()
|
||||||
RET
|
RET
|
||||||
|
|
||||||
internalSqrBMI2:
|
internalSqrBMI2:
|
||||||
|
XORQ t3, t3
|
||||||
|
|
||||||
|
// [acc7, acc6, acc5] * acc4
|
||||||
MOVQ acc4, mul1
|
MOVQ acc4, mul1
|
||||||
MULXQ acc5, acc1, acc2
|
MULXQ acc5, acc1, acc2
|
||||||
|
|
||||||
MULXQ acc6, mul0, acc3
|
MULXQ acc6, mul0, acc3
|
||||||
ADDQ mul0, acc2
|
ADOXQ mul0, acc2
|
||||||
|
|
||||||
MULXQ acc7, mul0, t0
|
MULXQ acc7, mul0, t0
|
||||||
ADCQ mul0, acc3
|
ADOXQ mul0, acc3
|
||||||
ADCQ $0, t0
|
ADOXQ t3, t0
|
||||||
|
|
||||||
|
// [acc7, acc6] * acc5
|
||||||
MOVQ acc5, mul1
|
MOVQ acc5, mul1
|
||||||
MULXQ acc6, mul0, hlp
|
MULXQ acc6, mul0, hlp
|
||||||
ADDQ mul0, acc3
|
ADOXQ mul0, acc3
|
||||||
ADCQ hlp, t0
|
|
||||||
|
|
||||||
MULXQ acc7, mul0, t1
|
MULXQ acc7, mul0, t1
|
||||||
ADCQ $0, t1
|
ADCXQ hlp, mul0
|
||||||
ADDQ mul0, t0
|
ADOXQ mul0, t0
|
||||||
|
ADCXQ t3, t1
|
||||||
|
|
||||||
|
// acc7 * acc6
|
||||||
MOVQ acc6, mul1
|
MOVQ acc6, mul1
|
||||||
MULXQ acc7, mul0, t2
|
MULXQ acc7, mul0, t2
|
||||||
ADCQ mul0, t1
|
ADOXQ mul0, t1
|
||||||
ADCQ $0, t2
|
ADOXQ t3, t2
|
||||||
XORQ t3, t3
|
|
||||||
|
|
||||||
// *2
|
// *2
|
||||||
ADDQ acc1, acc1
|
ADOXQ acc1, acc1
|
||||||
ADCQ acc2, acc2
|
ADOXQ acc2, acc2
|
||||||
ADCQ acc3, acc3
|
ADOXQ acc3, acc3
|
||||||
ADCQ t0, t0
|
ADOXQ t0, t0
|
||||||
ADCQ t1, t1
|
ADOXQ t1, t1
|
||||||
ADCQ t2, t2
|
ADOXQ t2, t2
|
||||||
ADCQ $0, t3
|
ADOXQ t3, t3
|
||||||
|
|
||||||
// Missing products
|
// Missing products
|
||||||
MOVQ acc4, mul1
|
MOVQ acc4, mul1
|
||||||
MULXQ mul1, acc0, acc4
|
MULXQ mul1, acc0, acc4
|
||||||
ADDQ acc4, acc1
|
ADCXQ acc4, acc1
|
||||||
|
|
||||||
MOVQ acc5, mul1
|
MOVQ acc5, mul1
|
||||||
MULXQ mul1, mul0, acc4
|
MULXQ mul1, mul0, acc4
|
||||||
ADCQ mul0, acc2
|
ADCXQ mul0, acc2
|
||||||
ADCQ acc4, acc3
|
ADCXQ acc4, acc3
|
||||||
|
|
||||||
MOVQ acc6, mul1
|
MOVQ acc6, mul1
|
||||||
MULXQ mul1, mul0, acc4
|
MULXQ mul1, mul0, acc4
|
||||||
ADCQ mul0, t0
|
ADCXQ mul0, t0
|
||||||
ADCQ acc4, t1
|
ADCXQ acc4, t1
|
||||||
|
|
||||||
MOVQ acc7, mul1
|
MOVQ acc7, mul1
|
||||||
MULXQ mul1, mul0, acc4
|
MULXQ mul1, mul0, acc4
|
||||||
ADCQ mul0, t2
|
ADCXQ mul0, t2
|
||||||
ADCQ acc4, t3
|
ADCXQ acc4, t3
|
||||||
|
|
||||||
|
// T = [t3, t2, t1, t0, acc3, acc2, acc1, acc0]
|
||||||
sm2P256SqrReductionInternal()
|
sm2P256SqrReductionInternal()
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user