Updated MFMM (markdown)

Sun Yimin 2021-12-19 21:10:03 +08:00
parent fe929e491c
commit 7d1219c4d5

27
MFMM.md

@ -63,7 +63,7 @@ acc0, acc1, acc2, acc3, acc4, acc5是64位寄存器
UMULH acc0, const1, acc0 // acc0 = H(acc0 * p3) UMULH acc0, const1, acc0 // acc0 = H(acc0 * p3)
ADCS t0, acc2 // (carry2, acc2) = carry1 + acc2 + H(acc0 * 2^32) ADCS t0, acc2 // (carry2, acc2) = carry1 + acc2 + H(acc0 * 2^32)
ADCS t1, acc3 // (carry3, acc3) = carry2 + acc3 + L(acc0 * p3) ADCS t1, acc3 // (carry3, acc3) = carry2 + acc3 + L(acc0 * p3)
ADC $0, acc0 // acc0 = carry3 + H(acc0 * p3), why? ADC $0, acc0 // acc0 = carry3 + H(acc0 * p3), why? 猜测后续有优化
SM2曲线 SM2曲线
p = 0x fffffffeffffffff ffffffffffffffff ffffffff00000000 ffffffffffffffff p = 0x fffffffeffffffff ffffffffffffffff ffffffff00000000 ffffffffffffffff
@ -73,10 +73,33 @@ acc0, acc1, acc2, acc3, acc4, acc5是64位寄存器
= (2^64 - 2^32 ) * 2^192 + ( - 2^32 + 1) * 2^64 - 1 = (2^64 - 2^32 ) * 2^192 + ( - 2^32 + 1) * 2^64 - 1
= 2^256 + (-2^32) * 2^192 + (1-2^32)*2^64 - 1 = 2^256 + (-2^32) * 2^192 + (1-2^32)*2^64 - 1
p = p3 * 2^192 + p2*2^128 + p1 * 2^64 + 2^64 - 1 p = p3 * 2^192 + p2*2^128 + p1 * 2^64 + 2^64 - 1
(tmp + acc0 * p) / 2^64 = acc4 * 2^192 + (acc3 + acc0*p3) * 2^128 + (acc2 + acc0*p2) * 2^64 + acc1 + acc0*p1 + acc0 (tmp + acc0 * p) / 2^64 = acc4 * 2^192 + (acc3 + acc0*p3) * 2^128 + (acc2 + acc0*p2) * 2^64 + acc1 + acc0*p1 + acc0
amd64 汇编表示为:
MOVQ p256p<>+0x08(SB), AX
MULQ acc0
ADDQ acc0, acc1 // (carry1, acc1) = acc0 + acc1
ADCQ $0, DX // DX = carry1 + H(acc0 * p1)
ADDQ AX, acc1 // (carry2, acc1) = acc0 + acc1 + L(acc0*p1)
ADCQ $0, DX // DX = DX + carry2
MOVQ DX, t1 // t1 = H(acc0 * p1) + carry1 + carry2
MOVQ p256p<>+0x010(SB), AX
MULQ acc0
ADDQ t1, acc2 // (carry3, acc2) = t1 + acc2
ADCQ $0, DX // DX = carry3 + H(acc0 * p2)
ADDQ AX, acc2 // (carry4, acc2) = L(acc0 * p2) + L(t1 + acc2)
ADCQ $0, DX // DX = DX + carry4
MOVQ DX, t1 // t1 = H(acc0 * p2) + carry3 + carry4
MOVQ p256p<>+0x018(SB), AX
MULQ acc0
ADDQ t1, acc3 // (carry5, acc3) = t1 + acc3
ADCQ $0, DX // DX = carry5 + H(acc0 * p3)
ADDQ AX, acc3 // (carry6, acc3) = L(acc0 * p3) + L(t1 + acc3)
ADCQ DX, acc4 // (carry7, acc4) = acc4 + DX + carry6
ADCQ $0, acc5 // acc5 = carry7
XORQ acc0, acc0
====== ======
用加减替代乘法,但存在潜在风险,进位/借位处理太复杂,所以该实现已经被回滚 用加减替代乘法,但存在潜在风险,进位/借位处理太复杂,所以该实现已经被回滚
p*acc0 = acc0*2^256 -(acc0*2^32)*2^192 + (acc0 - acc0*2^32)*2^64 - acc0 p*acc0 = acc0*2^256 -(acc0*2^32)*2^192 + (acc0 - acc0*2^32)*2^64 - acc0