From 7d1219c4d53b92d9df70887fb7104f1eab1faf48 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sun, 19 Dec 2021 21:10:03 +0800 Subject: [PATCH] Updated MFMM (markdown) --- MFMM.md | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/MFMM.md b/MFMM.md index 6cb14cd..a73a12b 100644 --- a/MFMM.md +++ b/MFMM.md @@ -63,7 +63,7 @@ acc0, acc1, acc2, acc3, acc4, acc5是64位寄存器 UMULH acc0, const1, acc0 // acc0 = H(acc0 * p3) ADCS t0, acc2 // (carry2, acc2) = carry1 + acc2 + H(acc0 * 2^32) ADCS t1, acc3 // (carry3, acc3) = carry2 + acc3 + L(acc0 * p3) - ADC $0, acc0 // acc0 = carry3 + H(acc0 * p3), why? + ADC $0, acc0 // acc0 = carry3 + H(acc0 * p3), why? 猜测后续有优化 SM2曲线 p = 0x fffffffeffffffff ffffffffffffffff ffffffff00000000 ffffffffffffffff @@ -73,10 +73,33 @@ acc0, acc1, acc2, acc3, acc4, acc5是64位寄存器 = (2^64 - 2^32 ) * 2^192 + ( - 2^32 + 1) * 2^64 - 1 = 2^256 + (-2^32) * 2^192 + (1-2^32)*2^64 - 1 - p = p3 * 2^192 + p2*2^128 + p1 * 2^64 + 2^64 - 1 (tmp + acc0 * p) / 2^64 = acc4 * 2^192 + (acc3 + acc0*p3) * 2^128 + (acc2 + acc0*p2) * 2^64 + acc1 + acc0*p1 + acc0 + amd64 汇编表示为: + MOVQ p256p<>+0x08(SB), AX + MULQ acc0 + ADDQ acc0, acc1 // (carry1, acc1) = acc0 + acc1 + ADCQ $0, DX // DX = carry1 + H(acc0 * p1) + ADDQ AX, acc1 // (carry2, acc1) = acc0 + acc1 + L(acc0*p1) + ADCQ $0, DX // DX = DX + carry2 + MOVQ DX, t1 // t1 = H(acc0 * p1) + carry1 + carry2 + MOVQ p256p<>+0x010(SB), AX + MULQ acc0 + ADDQ t1, acc2 // (carry3, acc2) = t1 + acc2 + ADCQ $0, DX // DX = carry3 + H(acc0 * p2) + ADDQ AX, acc2 // (carry4, acc2) = L(acc0 * p2) + L(t1 + acc2) + ADCQ $0, DX // DX = DX + carry4 + MOVQ DX, t1 // t1 = H(acc0 * p2) + carry3 + carry4 + MOVQ p256p<>+0x018(SB), AX + MULQ acc0 + ADDQ t1, acc3 // (carry5, acc3) = t1 + acc3 + ADCQ $0, DX // DX = carry5 + H(acc0 * p3) + ADDQ AX, acc3 // (carry6, acc3) = L(acc0 * p3) + L(t1 + acc3) + ADCQ DX, acc4 // (carry7, acc4) = acc4 + DX + carry6 + ADCQ $0, acc5 // acc5 = carry7 + XORQ acc0, acc0 + ====== 用加减替代乘法,但存在潜在风险,进位/借位处理太复杂,所以该实现已经被回滚 p*acc0 = acc0*2^256 -(acc0*2^32)*2^192 + (acc0 - acc0*2^32)*2^64 - acc0