mirror of
https://github.com/emmansun/gmsm.git
synced 2025-05-11 03:26:17 +08:00
Updated SM2 WWMM (2) (markdown)
parent
3dbe3ed846
commit
9bec042da7
@ -399,7 +399,6 @@ $t_0=0+0$
|
||||
### 方案二:(移位、加法、减法)
|
||||
因为Order素数不是MFMM,所以这个方案其实优势不大、甚至没有优势,尤其是在使用**使用MULXQ/ADCXQ/ADOXQ**的情况下。
|
||||
移位针对 $O_3$ $O_2$ 乘法
|
||||
这里加法没有溢出风险:Y不可能是0xFFFFFFFFFFFFFFFF。
|
||||
|
||||
$T_2=T_1 \ast O=Y \ast O= Y \ast 2^{256}-(Y \ast 2^{32}) \ast 2^{192} - Y \ast 2^{128} + (Y \ast O_1) \ast 2^{64} + (Y \ast O_0)$
|
||||
|
||||
@ -407,12 +406,11 @@ $T_3=T + T_2=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + t_4 \ast 2
|
||||
$T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{256}+(t_3 - Y \ast 2^{32}) \ast 2^{192} + (t_2 - Y) \ast 2^{128} + (t_1 + Y \ast O_1) \ast 2^{64} + (t_0 + Y \ast O_0) $
|
||||
|
||||
```asm
|
||||
// First reduction step, [ord3, ord2, ord1, ord0] = [1, -0x100000000, -1, ord1, ord0]
|
||||
// First reduction step
|
||||
MOVQ acc0, AX
|
||||
MULQ p256ordK0<>(SB)
|
||||
MOVQ AX, t0 // Y = t0 = (k0 * acc0) mod 2^64
|
||||
// calculate the positive part first: [1, 0, 0, ord1, ord0] * t0 + [0, acc3, acc2, acc1, acc0]
|
||||
// the result is [acc0, acc3, acc2, acc1], last lowest limb is dropped.
|
||||
|
||||
MOVQ p256ord<>+0x00(SB), AX
|
||||
MULQ t0
|
||||
ADDQ AX, acc0 // (carry1, acc0) = acc0 + L(t0 * ord0)
|
||||
@ -420,6 +418,16 @@ $T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{25
|
||||
MOVQ DX, t1 // t1 = carry1 + H(t0 * ord0)
|
||||
MOVQ t0, acc0 // acc0 = t0
|
||||
|
||||
// calculate the negative part: [acc0, acc3, acc2, acc1] - [0, 0x100000000, 1, 0] * t0
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc2
|
||||
SBBQ AX, acc3
|
||||
SBBQ DX, acc0
|
||||
|
||||
MOVQ p256ord<>+0x08(SB), AX
|
||||
MULQ t0
|
||||
ADDQ t1, acc1 // (carry2, acc1) = acc1 + t1
|
||||
@ -429,15 +437,6 @@ $T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{25
|
||||
ADCQ DX, acc2
|
||||
ADCQ $0, acc3
|
||||
ADCQ $0, acc0
|
||||
// calculate the negative part: [acc0, acc3, acc2, acc1] - [0, 0x100000000, 1, 0] * t0
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
SHRQ $32, DX
|
||||
|
||||
SUBQ t0, acc2
|
||||
SBBQ AX, acc3
|
||||
SBBQ DX, acc0
|
||||
|
||||
```
|
||||
乘法: 3
|
||||
@ -611,16 +610,7 @@ $t_5=t_5 - 0$
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, BX
|
||||
|
||||
MOVQ p256ord<>+0x08(SB), AX
|
||||
MULQ t0
|
||||
ADDQ BX, acc1
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc1
|
||||
ADCQ DX, acc2
|
||||
ADCQ $0, acc3
|
||||
ADCQ t0, acc4
|
||||
ADCQ $0, acc5
|
||||
|
||||
MOVQ t0, acc0
|
||||
MOVQ t0, AX
|
||||
MOVQ t0, DX
|
||||
SHLQ $32, AX
|
||||
@ -628,8 +618,17 @@ $t_5=t_5 - 0$
|
||||
|
||||
SUBQ t0, acc2
|
||||
SBBQ AX, acc3
|
||||
SBBQ DX, acc4
|
||||
SBBQ $0, acc5
|
||||
SBBQ DX, acc0
|
||||
|
||||
MOVQ p256ord<>+0x08(SB), AX
|
||||
MULQ t0
|
||||
ADDQ BX, acc1
|
||||
ADCQ $0, DX
|
||||
ADDQ AX, acc1
|
||||
ADCQ DX, acc2
|
||||
ADCQ $0, acc3
|
||||
ADCQ acc0, acc4
|
||||
ADCQ $0, acc5
|
||||
```
|
||||
乘法: 3
|
||||
移位:2
|
||||
@ -674,7 +673,7 @@ $t_5=t_5 - 0$
|
||||
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
||||
| 方案一 | 5 | 0 | 15 | 0 |
|
||||
| 方案一(MULX/ADCX/ADOX) | 5 | 0 | 10 | 0 |
|
||||
| 方案二 | 3 | 2 | 9 | 4 |
|
||||
| 方案二 | 3 | 2 | 9 | 3 |
|
||||
| 方案二(MULX) | 3 | 2 | 8 | 4 |
|
||||
|
||||
看来在支持**MULXQ/ADCXQ/ADOXQ**的情况下,使用方案一(MULX/ADCX/ADOX)更好!
|
||||
|
Loading…
x
Reference in New Issue
Block a user