mirror of
https://github.com/emmansun/gmsm.git
synced 2025-05-11 11:36:18 +08:00
Updated SM2 WWMM (2) (markdown)
parent
3dbe3ed846
commit
9bec042da7
@ -399,7 +399,6 @@ $t_0=0+0$
|
|||||||
### 方案二:(移位、加法、减法)
|
### 方案二:(移位、加法、减法)
|
||||||
因为Order素数不是MFMM,所以这个方案其实优势不大、甚至没有优势,尤其是在使用**使用MULXQ/ADCXQ/ADOXQ**的情况下。
|
因为Order素数不是MFMM,所以这个方案其实优势不大、甚至没有优势,尤其是在使用**使用MULXQ/ADCXQ/ADOXQ**的情况下。
|
||||||
移位针对 $O_3$ $O_2$ 乘法
|
移位针对 $O_3$ $O_2$ 乘法
|
||||||
这里加法没有溢出风险:Y不可能是0xFFFFFFFFFFFFFFFF。
|
|
||||||
|
|
||||||
$T_2=T_1 \ast O=Y \ast O= Y \ast 2^{256}-(Y \ast 2^{32}) \ast 2^{192} - Y \ast 2^{128} + (Y \ast O_1) \ast 2^{64} + (Y \ast O_0)$
|
$T_2=T_1 \ast O=Y \ast O= Y \ast 2^{256}-(Y \ast 2^{32}) \ast 2^{192} - Y \ast 2^{128} + (Y \ast O_1) \ast 2^{64} + (Y \ast O_0)$
|
||||||
|
|
||||||
@ -407,12 +406,11 @@ $T_3=T + T_2=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + t_4 \ast 2
|
|||||||
$T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{256}+(t_3 - Y \ast 2^{32}) \ast 2^{192} + (t_2 - Y) \ast 2^{128} + (t_1 + Y \ast O_1) \ast 2^{64} + (t_0 + Y \ast O_0) $
|
$T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{256}+(t_3 - Y \ast 2^{32}) \ast 2^{192} + (t_2 - Y) \ast 2^{128} + (t_1 + Y \ast O_1) \ast 2^{64} + (t_0 + Y \ast O_0) $
|
||||||
|
|
||||||
```asm
|
```asm
|
||||||
// First reduction step, [ord3, ord2, ord1, ord0] = [1, -0x100000000, -1, ord1, ord0]
|
// First reduction step
|
||||||
MOVQ acc0, AX
|
MOVQ acc0, AX
|
||||||
MULQ p256ordK0<>(SB)
|
MULQ p256ordK0<>(SB)
|
||||||
MOVQ AX, t0 // Y = t0 = (k0 * acc0) mod 2^64
|
MOVQ AX, t0 // Y = t0 = (k0 * acc0) mod 2^64
|
||||||
// calculate the positive part first: [1, 0, 0, ord1, ord0] * t0 + [0, acc3, acc2, acc1, acc0]
|
|
||||||
// the result is [acc0, acc3, acc2, acc1], last lowest limb is dropped.
|
|
||||||
MOVQ p256ord<>+0x00(SB), AX
|
MOVQ p256ord<>+0x00(SB), AX
|
||||||
MULQ t0
|
MULQ t0
|
||||||
ADDQ AX, acc0 // (carry1, acc0) = acc0 + L(t0 * ord0)
|
ADDQ AX, acc0 // (carry1, acc0) = acc0 + L(t0 * ord0)
|
||||||
@ -420,6 +418,16 @@ $T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{25
|
|||||||
MOVQ DX, t1 // t1 = carry1 + H(t0 * ord0)
|
MOVQ DX, t1 // t1 = carry1 + H(t0 * ord0)
|
||||||
MOVQ t0, acc0 // acc0 = t0
|
MOVQ t0, acc0 // acc0 = t0
|
||||||
|
|
||||||
|
// calculate the negative part: [acc0, acc3, acc2, acc1] - [0, 0x100000000, 1, 0] * t0
|
||||||
|
MOVQ t0, AX
|
||||||
|
MOVQ t0, DX
|
||||||
|
SHLQ $32, AX
|
||||||
|
SHRQ $32, DX
|
||||||
|
|
||||||
|
SUBQ t0, acc2
|
||||||
|
SBBQ AX, acc3
|
||||||
|
SBBQ DX, acc0
|
||||||
|
|
||||||
MOVQ p256ord<>+0x08(SB), AX
|
MOVQ p256ord<>+0x08(SB), AX
|
||||||
MULQ t0
|
MULQ t0
|
||||||
ADDQ t1, acc1 // (carry2, acc1) = acc1 + t1
|
ADDQ t1, acc1 // (carry2, acc1) = acc1 + t1
|
||||||
@ -429,15 +437,6 @@ $T_3=t_7 \ast 2^{448} + t_6 \ast 2^{384} + t_5 \ast 2^{320} + (t_4+Y) \ast 2^{25
|
|||||||
ADCQ DX, acc2
|
ADCQ DX, acc2
|
||||||
ADCQ $0, acc3
|
ADCQ $0, acc3
|
||||||
ADCQ $0, acc0
|
ADCQ $0, acc0
|
||||||
// calculate the negative part: [acc0, acc3, acc2, acc1] - [0, 0x100000000, 1, 0] * t0
|
|
||||||
MOVQ t0, AX
|
|
||||||
MOVQ t0, DX
|
|
||||||
SHLQ $32, AX
|
|
||||||
SHRQ $32, DX
|
|
||||||
|
|
||||||
SUBQ t0, acc2
|
|
||||||
SBBQ AX, acc3
|
|
||||||
SBBQ DX, acc0
|
|
||||||
|
|
||||||
```
|
```
|
||||||
乘法: 3
|
乘法: 3
|
||||||
@ -611,16 +610,7 @@ $t_5=t_5 - 0$
|
|||||||
ADCQ $0, DX
|
ADCQ $0, DX
|
||||||
MOVQ DX, BX
|
MOVQ DX, BX
|
||||||
|
|
||||||
MOVQ p256ord<>+0x08(SB), AX
|
MOVQ t0, acc0
|
||||||
MULQ t0
|
|
||||||
ADDQ BX, acc1
|
|
||||||
ADCQ $0, DX
|
|
||||||
ADDQ AX, acc1
|
|
||||||
ADCQ DX, acc2
|
|
||||||
ADCQ $0, acc3
|
|
||||||
ADCQ t0, acc4
|
|
||||||
ADCQ $0, acc5
|
|
||||||
|
|
||||||
MOVQ t0, AX
|
MOVQ t0, AX
|
||||||
MOVQ t0, DX
|
MOVQ t0, DX
|
||||||
SHLQ $32, AX
|
SHLQ $32, AX
|
||||||
@ -628,8 +618,17 @@ $t_5=t_5 - 0$
|
|||||||
|
|
||||||
SUBQ t0, acc2
|
SUBQ t0, acc2
|
||||||
SBBQ AX, acc3
|
SBBQ AX, acc3
|
||||||
SBBQ DX, acc4
|
SBBQ DX, acc0
|
||||||
SBBQ $0, acc5
|
|
||||||
|
MOVQ p256ord<>+0x08(SB), AX
|
||||||
|
MULQ t0
|
||||||
|
ADDQ BX, acc1
|
||||||
|
ADCQ $0, DX
|
||||||
|
ADDQ AX, acc1
|
||||||
|
ADCQ DX, acc2
|
||||||
|
ADCQ $0, acc3
|
||||||
|
ADCQ acc0, acc4
|
||||||
|
ADCQ $0, acc5
|
||||||
```
|
```
|
||||||
乘法: 3
|
乘法: 3
|
||||||
移位:2
|
移位:2
|
||||||
@ -674,7 +673,7 @@ $t_5=t_5 - 0$
|
|||||||
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
| ----------- | ----------- | ----------- | ----------- | ----------- |
|
||||||
| 方案一 | 5 | 0 | 15 | 0 |
|
| 方案一 | 5 | 0 | 15 | 0 |
|
||||||
| 方案一(MULX/ADCX/ADOX) | 5 | 0 | 10 | 0 |
|
| 方案一(MULX/ADCX/ADOX) | 5 | 0 | 10 | 0 |
|
||||||
| 方案二 | 3 | 2 | 9 | 4 |
|
| 方案二 | 3 | 2 | 9 | 3 |
|
||||||
| 方案二(MULX) | 3 | 2 | 8 | 4 |
|
| 方案二(MULX) | 3 | 2 | 8 | 4 |
|
||||||
|
|
||||||
看来在支持**MULXQ/ADCXQ/ADOXQ**的情况下,使用方案一(MULX/ADCX/ADOX)更好!
|
看来在支持**MULXQ/ADCXQ/ADOXQ**的情况下,使用方案一(MULX/ADCX/ADOX)更好!
|
||||||
|
Loading…
x
Reference in New Issue
Block a user