internal/sm2ec: not use ADX first

This commit is contained in:
Sun Yimin 2023-07-21 18:06:22 +08:00 committed by GitHub
parent 5b5b26c095
commit 76131e6438
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2095,7 +2095,6 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
CMPB ·supportBMI2+0(SB), $0x01 CMPB ·supportBMI2+0(SB), $0x01
JEQ internalMulBMI2 JEQ internalMulBMI2
// [t3, t2, t1, t0] * acc4
MOVQ acc4, mul0 MOVQ acc4, mul0
MULQ t0 MULQ t0
MOVQ mul0, acc0 MOVQ mul0, acc0
@ -2119,7 +2118,6 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, acc4 MOVQ mul1, acc4
// [t3, t2, t1, t0] * acc5
MOVQ acc5, mul0 MOVQ acc5, mul0
MULQ t0 MULQ t0
ADDQ mul0, acc1 ADDQ mul0, acc1
@ -2150,7 +2148,6 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, acc5 MOVQ mul1, acc5
// [t3, t2, t1, t0] * acc6
MOVQ acc6, mul0 MOVQ acc6, mul0
MULQ t0 MULQ t0
ADDQ mul0, acc2 ADDQ mul0, acc2
@ -2181,7 +2178,6 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, acc6 MOVQ mul1, acc6
// [t3, t2, t1, t0] * acc7
MOVQ acc7, mul0 MOVQ acc7, mul0
MULQ t0 MULQ t0
ADDQ mul0, acc3 ADDQ mul0, acc3
@ -2211,8 +2207,6 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
ADDQ mul0, acc6 ADDQ mul0, acc6
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, acc7 MOVQ mul1, acc7
// T = [acc7, acc6, acc5, acc4, acc3, acc2, acc1, acc0]
// First reduction step // First reduction step
MOVQ acc0, mul0 MOVQ acc0, mul0
MOVQ acc0, mul1 MOVQ acc0, mul1
@ -2298,9 +2292,7 @@ TEXT sm2P256MulInternal(SB),NOSPLIT,$8
CMOVQCS acc3, acc7 CMOVQCS acc3, acc7
RET RET
internalMulBMI2: internalMulBMI2:
// [t3, t2, t1, t0] * acc4
MOVQ acc4, mul1 MOVQ acc4, mul1
MULXQ t0, acc0, acc1 MULXQ t0, acc0, acc1
@ -2314,7 +2306,6 @@ internalMulBMI2:
ADCQ mul0, acc3 ADCQ mul0, acc3
ADCQ $0, acc4 ADCQ $0, acc4
// [t3, t2, t1, t0] * acc5
MOVQ acc5, mul1 MOVQ acc5, mul1
MULXQ t0, mul0, hlp MULXQ t0, mul0, hlp
ADDQ mul0, acc1 ADDQ mul0, acc1
@ -2335,7 +2326,6 @@ internalMulBMI2:
ADDQ mul0, acc4 ADDQ mul0, acc4
ADCQ $0, acc5 ADCQ $0, acc5
// [t3, t2, t1, t0] * acc6
MOVQ acc6, mul1 MOVQ acc6, mul1
MULXQ t0, mul0, hlp MULXQ t0, mul0, hlp
ADDQ mul0, acc2 ADDQ mul0, acc2
@ -2356,7 +2346,6 @@ internalMulBMI2:
ADDQ mul0, acc5 ADDQ mul0, acc5
ADCQ $0, acc6 ADCQ $0, acc6
// [t3, t2, t1, t0] * acc7
MOVQ acc7, mul1 MOVQ acc7, mul1
MULXQ t0, mul0, hlp MULXQ t0, mul0, hlp
ADDQ mul0, acc3 ADDQ mul0, acc3
@ -2377,7 +2366,6 @@ internalMulBMI2:
ADDQ mul0, acc6 ADDQ mul0, acc6
ADCQ $0, acc7 ADCQ $0, acc7
// T = [acc7, acc6, acc5, acc4, acc3, acc2, acc1, acc0]
// First reduction step // First reduction step
MOVQ acc0, mul0 MOVQ acc0, mul0
MOVQ acc0, mul1 MOVQ acc0, mul1
@ -2554,7 +2542,6 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
CMPB ·supportBMI2+0(SB), $0x01 CMPB ·supportBMI2+0(SB), $0x01
JEQ internalSqrBMI2 JEQ internalSqrBMI2
// [acc7, acc6, acc5] * acc4
MOVQ acc4, mul0 MOVQ acc4, mul0
MULQ acc5 MULQ acc5
MOVQ mul0, acc1 MOVQ mul0, acc1
@ -2572,7 +2559,6 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, t0 MOVQ mul1, t0
// [acc7, acc6] * acc5
MOVQ acc5, mul0 MOVQ acc5, mul0
MULQ acc6 MULQ acc6
ADDQ mul0, acc3 ADDQ mul0, acc3
@ -2587,7 +2573,6 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
ADCQ $0, mul1 ADCQ $0, mul1
MOVQ mul1, t1 MOVQ mul1, t1
// acc7 * acc6
MOVQ acc6, mul0 MOVQ acc6, mul0
MULQ acc7 MULQ acc7
ADDQ mul0, t1 ADDQ mul0, t1
@ -2628,70 +2613,64 @@ TEXT sm2P256SqrInternal(SB),NOSPLIT,$8
ADCQ mul0, t2 ADCQ mul0, t2
ADCQ DX, t3 ADCQ DX, t3
// T = [t3, t2, t1, t0, acc3, acc2, acc1, acc0]
sm2P256SqrReductionInternal() sm2P256SqrReductionInternal()
RET RET
internalSqrBMI2: internalSqrBMI2:
XORQ t3, t3
// [acc7, acc6, acc5] * acc4
MOVQ acc4, mul1 MOVQ acc4, mul1
MULXQ acc5, acc1, acc2 MULXQ acc5, acc1, acc2
MULXQ acc6, mul0, acc3 MULXQ acc6, mul0, acc3
ADOXQ mul0, acc2 ADDQ mul0, acc2
MULXQ acc7, mul0, t0 MULXQ acc7, mul0, t0
ADOXQ mul0, acc3 ADCQ mul0, acc3
ADOXQ t3, t0 ADCQ $0, t0
// [acc7, acc6] * acc5
MOVQ acc5, mul1 MOVQ acc5, mul1
MULXQ acc6, mul0, hlp MULXQ acc6, mul0, hlp
ADOXQ mul0, acc3 ADDQ mul0, acc3
ADCQ hlp, t0
MULXQ acc7, mul0, t1 MULXQ acc7, mul0, t1
ADCXQ hlp, mul0 ADCQ $0, t1
ADOXQ mul0, t0 ADDQ mul0, t0
ADCXQ t3, t1
// acc7 * acc6
MOVQ acc6, mul1 MOVQ acc6, mul1
MULXQ acc7, mul0, t2 MULXQ acc7, mul0, t2
ADOXQ mul0, t1 ADCQ mul0, t1
ADOXQ t3, t2 ADCQ $0, t2
XORQ t3, t3
// *2 // *2
ADOXQ acc1, acc1 ADDQ acc1, acc1
ADOXQ acc2, acc2 ADCQ acc2, acc2
ADOXQ acc3, acc3 ADCQ acc3, acc3
ADOXQ t0, t0 ADCQ t0, t0
ADOXQ t1, t1 ADCQ t1, t1
ADOXQ t2, t2 ADCQ t2, t2
ADOXQ t3, t3 ADCQ $0, t3
// Missing products // Missing products
MOVQ acc4, mul1 MOVQ acc4, mul1
MULXQ mul1, acc0, acc4 MULXQ mul1, acc0, acc4
ADCXQ acc4, acc1 ADDQ acc4, acc1
MOVQ acc5, mul1 MOVQ acc5, mul1
MULXQ mul1, mul0, acc4 MULXQ mul1, mul0, acc4
ADCXQ mul0, acc2 ADCQ mul0, acc2
ADCXQ acc4, acc3 ADCQ acc4, acc3
MOVQ acc6, mul1 MOVQ acc6, mul1
MULXQ mul1, mul0, acc4 MULXQ mul1, mul0, acc4
ADCXQ mul0, t0 ADCQ mul0, t0
ADCXQ acc4, t1 ADCQ acc4, t1
MOVQ acc7, mul1 MOVQ acc7, mul1
MULXQ mul1, mul0, acc4 MULXQ mul1, mul0, acc4
ADCXQ mul0, t2 ADCQ mul0, t2
ADCXQ acc4, t3 ADCQ acc4, t3
// T = [t3, t2, t1, t0, acc3, acc2, acc1, acc0]
sm2P256SqrReductionInternal() sm2P256SqrReductionInternal()
RET RET