gmsm/internal/sm2ec/p256_asm_ord.go
2024-08-29 10:02:16 +08:00

192 lines
5.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//go:build (amd64 || arm64 || s390x || ppc64le) && !purego
package sm2ec
import "errors"
// Montgomery multiplication modulo org(G). Sets res = in1 * in2 * R⁻¹.
//
//go:noescape
func p256OrdMul(res, in1, in2 *p256OrdElement)
// Montgomery square modulo org(G), repeated n times (n >= 1).
//
//go:noescape
func p256OrdSqr(res, in *p256OrdElement, n int)
// This code operates in the Montgomery domain where R = 2²⁵⁶ mod n and n is
// the order of the scalar field. Elements in the Montgomery domain take the
// form a×R and p256OrdMul calculates (a × b × R⁻¹) mod n. RR is R in the
// domain, or R×R mod n, thus p256OrdMul(x, RR) gives x×R, i.e. converts x
// into the Montgomery domain.
var RR = &p256OrdElement{0x901192af7c114f20, 0x3464504ade6fa2fa, 0x620fc84c3affe0d4, 0x1eb5e412a22b3d3b}
// P256OrdInverse, sets out to in⁻¹ mod org(G). If in is zero, out will be zero.
// n-2 =
// 1111111111111111111111111111111011111111111111111111111111111111
// 1111111111111111111111111111111111111111111111111111111111111111
// 0111001000000011110111110110101100100001110001100000010100101011
// 0101001110111011111101000000100100111001110101010100000100100001
func P256OrdInverse(k []byte) ([]byte, error) {
if len(k) != 32 {
return nil, errors.New("invalid scalar length")
}
x := new(p256OrdElement)
p256OrdBigToLittle(x, (*[32]byte)(k))
p256OrdMul(x, x, RR)
// Inversion is implemented as exponentiation with exponent p 2.
// The sequence of 41 multiplications and 253 squarings is derived from the
// following addition chain generated with github.com/mmcloughlin/addchain v0.4.0.
//
// _10 = 2*1
// _11 = 1 + _10
// _100 = 1 + _11
// _101 = 1 + _100
// _111 = _10 + _101
// _1001 = _10 + _111
// _1101 = _100 + _1001
// _1111 = _10 + _1101
// _11110 = 2*_1111
// _11111 = 1 + _11110
// _111110 = 2*_11111
// _111111 = 1 + _111110
// _1111110 = 2*_111111
// i20 = _1111110 << 6 + _1111110
// x18 = i20 << 5 + _111111
// x31 = x18 << 13 + i20 + 1
// i42 = 2*x31
// i44 = i42 << 2
// i140 = ((i44 << 32 + i44) << 29 + i42) << 33
// i150 = ((i44 + i140 + _111) << 4 + _111) << 3
// i170 = ((1 + i150) << 11 + _1111) << 6 + _11111
// i183 = ((i170 << 5 + _1101) << 3 + _11) << 3
// i198 = ((1 + i183) << 7 + _111) << 5 + _11
// i219 = ((i198 << 9 + _101) << 5 + _101) << 5
// i231 = ((_1101 + i219) << 5 + _1001) << 4 + _1101
// i244 = ((i231 << 2 + _11) << 7 + _111111) << 2
// i262 = ((1 + i244) << 10 + _1001) << 5 + _111
// i277 = ((i262 << 5 + _111) << 4 + _101) << 4
// return ((_101 + i277) << 9 + _1001) << 5 + 1
//
var z = new(p256OrdElement)
var t0 = new(p256OrdElement)
var t1 = new(p256OrdElement)
var t2 = new(p256OrdElement)
var t3 = new(p256OrdElement)
var t4 = new(p256OrdElement)
var t5 = new(p256OrdElement)
var t6 = new(p256OrdElement)
var t7 = new(p256OrdElement)
var t8 = new(p256OrdElement)
var t9 = new(p256OrdElement)
p256OrdSqr(t3, x, 1)
p256OrdMul(z, x, t3)
p256OrdMul(t4, x, z)
p256OrdMul(t1, x, t4)
p256OrdMul(t2, t3, t1)
p256OrdMul(t0, t3, t2)
p256OrdMul(t4, t4, t0)
p256OrdMul(t6, t3, t4)
p256OrdSqr(t3, t6, 1)
p256OrdMul(t5, x, t3)
p256OrdSqr(t3, t5, 1)
p256OrdMul(t3, x, t3)
p256OrdSqr(t7, t3, 1)
p256OrdSqr(t8, t7, 6)
p256OrdMul(t7, t7, t8)
p256OrdSqr(t8, t7, 5)
p256OrdMul(t8, t3, t8)
p256OrdSqr(t8, t8, 13)
p256OrdMul(t7, t7, t8)
p256OrdMul(t7, x, t7)
p256OrdSqr(t8, t7, 1)
p256OrdSqr(t7, t8, 2)
p256OrdSqr(t9, t7, 32)
p256OrdMul(t9, t7, t9)
p256OrdSqr(t9, t9, 29)
p256OrdMul(t8, t8, t9)
p256OrdSqr(t8, t8, 33)
p256OrdMul(t7, t7, t8)
p256OrdMul(t7, t2, t7)
p256OrdSqr(t7, t7, 4)
p256OrdMul(t7, t2, t7)
p256OrdSqr(t7, t7, 3)
p256OrdMul(t7, x, t7)
p256OrdSqr(t7, t7, 11)
p256OrdMul(t6, t6, t7)
p256OrdSqr(t6, t6, 6)
p256OrdMul(t5, t5, t6)
p256OrdSqr(t5, t5, 5)
p256OrdMul(t5, t4, t5)
p256OrdSqr(t5, t5, 3)
p256OrdMul(t5, z, t5)
p256OrdSqr(t5, t5, 3)
p256OrdMul(t5, x, t5)
p256OrdSqr(t5, t5, 7)
p256OrdMul(t5, t2, t5)
p256OrdSqr(t5, t5, 5)
p256OrdMul(t5, z, t5)
p256OrdSqr(t5, t5, 9)
p256OrdMul(t5, t1, t5)
p256OrdSqr(t5, t5, 5)
p256OrdMul(t5, t1, t5)
p256OrdSqr(t5, t5, 5)
p256OrdMul(t5, t4, t5)
p256OrdSqr(t5, t5, 5)
p256OrdMul(t5, t0, t5)
p256OrdSqr(t5, t5, 4)
p256OrdMul(t4, t4, t5)
p256OrdSqr(t4, t4, 2)
p256OrdMul(t4, z, t4)
p256OrdSqr(t4, t4, 7)
p256OrdMul(t3, t3, t4)
p256OrdSqr(t3, t3, 2)
p256OrdMul(t3, x, t3)
p256OrdSqr(t3, t3, 10)
p256OrdMul(t3, t0, t3)
p256OrdSqr(t3, t3, 5)
p256OrdMul(t3, t2, t3)
p256OrdSqr(t3, t3, 5)
p256OrdMul(t2, t2, t3)
p256OrdSqr(t2, t2, 4)
p256OrdMul(t2, t1, t2)
p256OrdSqr(t2, t2, 4)
p256OrdMul(t1, t1, t2)
p256OrdSqr(t1, t1, 9)
p256OrdMul(t0, t0, t1)
p256OrdSqr(t0, t0, 5)
p256OrdMul(z, x, t0)
return p256OrderFromMont(z), nil
}
// P256OrdMul multiplication modulo org(G).
func P256OrdMul(in1, in2 []byte) ([]byte, error) {
if len(in1) != 32 || len(in2) != 32 {
return nil, errors.New("invalid scalar length")
}
x1 := new(p256OrdElement)
p256OrdBigToLittle(x1, (*[32]byte)(in1))
p256OrdMul(x1, x1, RR)
x2 := new(p256OrdElement)
p256OrdBigToLittle(x2, (*[32]byte)(in2))
p256OrdMul(x2, x2, RR)
res := new(p256OrdElement)
p256OrdMul(res, x1, x2)
return p256OrderFromMont(res), nil
}
func p256OrderFromMont(in *p256OrdElement) []byte {
// Montgomery multiplication by R⁻¹, or 1 outside the domain as R⁻¹×R = 1,
// converts a Montgomery value out of the domain.
one := &p256OrdElement{1}
p256OrdMul(in, in, one)
var xOut [32]byte
p256OrdLittleToBig(&xOut, in)
return xOut[:]
}