internal/sm2ec: s390x p256OrdSqr

This commit is contained in:
Sun Yimin 2024-08-23 17:43:18 +08:00 committed by GitHub
parent 807c505e0c
commit 9195b3049d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 191 additions and 37 deletions

View File

@ -546,11 +546,7 @@ loop_select:
#undef CPOOL #undef CPOOL
// --------------------------------------- // ---------------------------------------
// sm2p256OrdMulInternal
// func p256OrdMul(res, in1, in2 *p256OrdElement)
#define res_ptr R1
#define x_ptr R2
#define y_ptr R3
#define X0 V0 #define X0 V0
#define X1 V1 #define X1 V1
#define Y0 V2 #define Y0 V2
@ -575,31 +571,7 @@ loop_select:
#define MK0 V30 #define MK0 V30
#define K0 V31 #define K0 V31
TEXT ·p256OrdMul(SB), NOSPLIT, $0 TEXT sm2p256OrdMulInternal<>(SB), NOSPLIT, $0-0
MOVD res+0(FP), res_ptr
MOVD in1+8(FP), x_ptr
MOVD in2+16(FP), y_ptr
VZERO T2
MOVD $p256ordK0<>+0x00(SB), R4
// VLEF $3, 0(R4), K0
WORD $0xE7F40000
BYTE $0x38
BYTE $0x03
MOVD $p256ord<>+0x00(SB), R4
VL 16(R4), M0
VL 0(R4), M1
VL (0*16)(x_ptr), X0
VPDI $0x4, X0, X0, X0
VL (1*16)(x_ptr), X1
VPDI $0x4, X1, X1, X1
VL (0*16)(y_ptr), Y0
VPDI $0x4, Y0, Y0, Y0
VL (1*16)(y_ptr), Y1
VPDI $0x4, Y1, Y1, Y1
// ---------------------------------------------------------------------------/ // ---------------------------------------------------------------------------/
VREPF $3, Y0, YDIG VREPF $3, Y0, YDIG
VMLF X0, YDIG, ADD1 VMLF X0, YDIG, ADD1
@ -615,6 +587,7 @@ TEXT ·p256OrdMul(SB), NOSPLIT, $0
VMALF M1, MK0, ADD2, RED2 VMALF M1, MK0, ADD2, RED2
VMALHF M1, MK0, ADD2, RED2H VMALHF M1, MK0, ADD2, RED2H
VZERO T2
VSLDB $12, RED2, RED1, RED1 VSLDB $12, RED2, RED1, RED1
VSLDB $12, T2, RED2, RED2 VSLDB $12, T2, RED2, RED2
@ -897,15 +870,8 @@ TEXT ·p256OrdMul(SB), NOSPLIT, $0
VSEL T0, ADD1, T2, T0 VSEL T0, ADD1, T2, T0
VSEL T1, ADD2, T2, T1 VSEL T1, ADD2, T2, T1
VPDI $0x4, T0, T0, T0
VST T0, (0*16)(res_ptr)
VPDI $0x4, T1, T1, T1
VST T1, (1*16)(res_ptr)
RET RET
#undef res_ptr
#undef x_ptr
#undef y_ptr
#undef X0 #undef X0
#undef X1 #undef X1
#undef Y0 #undef Y0
@ -931,6 +897,143 @@ TEXT ·p256OrdMul(SB), NOSPLIT, $0
#undef MK0 #undef MK0
#undef K0 #undef K0
// ---------------------------------------
// Parameters
#define X0 V0
#define X1 V1
#define Y0 V2
#define Y1 V3
TEXT sm2p256OrdSqrInternal<>(SB), NOFRAME|NOSPLIT, $0
VLR X0, Y0
VLR X1, Y1
BR sm2p256OrdMulInternal<>(SB)
#undef X0
#undef X1
#undef Y0
#undef Y1
// ---------------------------------------
// func p256OrdMul(res, in1, in2 *p256OrdElement)
#define res_ptr R1
#define x_ptr R2
#define y_ptr R3
#define X0 V0
#define X1 V1
#define Y0 V2
#define Y1 V3
#define M0 V4
#define M1 V5
#define T0 V6
#define T1 V7
TEXT ·p256OrdMul(SB), NOSPLIT, $0
MOVD res+0(FP), res_ptr
MOVD in1+8(FP), x_ptr
MOVD in2+16(FP), y_ptr
MOVD $p256ordK0<>+0x00(SB), R4
// VLEF $3, 0(R4), K0
WORD $0xE7F40000
BYTE $0x38
BYTE $0x03
MOVD $p256ord<>+0x00(SB), R4
VL 16(R4), M0
VL 0(R4), M1
VL (0*16)(x_ptr), X0
VPDI $0x4, X0, X0, X0
VL (1*16)(x_ptr), X1
VPDI $0x4, X1, X1, X1
VL (0*16)(y_ptr), Y0
VPDI $0x4, Y0, Y0, Y0
VL (1*16)(y_ptr), Y1
VPDI $0x4, Y1, Y1, Y1
CALL sm2p256OrdMulInternal<>(SB)
VPDI $0x4, T0, T0, T0
VST T0, (0*16)(res_ptr)
VPDI $0x4, T1, T1, T1
VST T1, (1*16)(res_ptr)
RET
#undef res_ptr
#undef x_ptr
#undef y_ptr
#undef X0
#undef X1
#undef Y0
#undef Y1
#undef M0
#undef M1
#undef T0
#undef T1
// ---------------------------------------
// func p256OrdSqr(res, in *p256OrdElement, n int)
#define res_ptr R1
#define x_ptr R2
#define COUNT R5
#define N R6
#define X0 V0
#define X1 V1
#define M0 V4
#define M1 V5
#define T0 V6
#define T1 V7
TEXT ·p256OrdSqr(SB), NOSPLIT, $0
MOVD res+0(FP), res_ptr
MOVD in+8(FP), x_ptr
MOVD n+16(FP), N
MOVD $0, COUNT
MOVD $p256ordK0<>+0x00(SB), R4
// VLEF $3, 0(R4), K0
WORD $0xE7F40000
BYTE $0x38
BYTE $0x03
MOVD $p256ord<>+0x00(SB), R4
VL 16(R4), M0
VL 0(R4), M1
VL (0*16)(x_ptr), X0
VPDI $0x4, X0, X0, X0
VL (1*16)(x_ptr), X1
VPDI $0x4, X1, X1, X1
loop:
CALL sm2p256OrdSqrInternal<>(SB)
VLR T0, X0
VLR T1, X1
ADDW $1, COUNT
CMPW COUNT, N
BLT loop
VPDI $0x4, T0, T0, T0
VST T0, (0*16)(res_ptr)
VPDI $0x4, T1, T1, T1
VST T1, (1*16)(res_ptr)
RET
#undef res_ptr
#undef x_ptr
#undef COUNT
#undef N
#undef X0
#undef X1
#undef M0
#undef M1
#undef T0
#undef T1
TEXT ·p256Mul(SB), NOSPLIT, $0 TEXT ·p256Mul(SB), NOSPLIT, $0
RET RET

View File

@ -58,3 +58,8 @@ func p256OrdReduce(s *p256OrdElement)
// //
//go:noescape //go:noescape
func p256OrdMul(res, in1, in2 *p256OrdElement) func p256OrdMul(res, in1, in2 *p256OrdElement)
// Montgomery square modulo org(G), repeated n times (n >= 1).
//
//go:noescape
func p256OrdSqr(res, in *p256OrdElement, n int)

View File

@ -153,3 +153,49 @@ func TestFuzzyP256OrdMul(t *testing.T) {
p256OrdMulTest(t, x, y, p, r) p256OrdMulTest(t, x, y, p, r)
} }
} }
func p256OrderSqrTest(t *testing.T, x, p, r *big.Int) {
x1 := new(big.Int).Mul(x, r)
x1 = x1.Mod(x1, p)
ax := new(p256OrdElement)
res2 := new(p256OrdElement)
fromBig((*[4]uint64)(ax), x1)
p256OrdSqr(res2, ax, 1)
resInt := new(big.Int).SetBytes(p256OrderFromMont(res2))
expected := new(big.Int).Mul(x, x)
expected = expected.Mod(expected, p)
if resInt.Cmp(expected) != 0 {
t.FailNow()
}
}
func TestP256OrdSqrOrdMinus1(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
pMinus1 := new(big.Int).Sub(p, big.NewInt(1))
p256OrderSqrTest(t, pMinus1, p, r)
}
func TestFuzzyP256OrdSqr(t *testing.T) {
p, _ := new(big.Int).SetString("FFFFFFFEFFFFFFFFFFFFFFFFFFFFFFFF7203DF6B21C6052B53BBF40939D54123", 16)
r, _ := new(big.Int).SetString("10000000000000000000000000000000000000000000000000000000000000000", 16)
var scalar1 [32]byte
var timeout *time.Timer
if testing.Short() {
timeout = time.NewTimer(10 * time.Millisecond)
} else {
timeout = time.NewTimer(2 * time.Second)
}
for {
select {
case <-timeout.C:
return
default:
}
io.ReadFull(rand.Reader, scalar1[:])
x := new(big.Int).SetBytes(scalar1[:])
p256OrderSqrTest(t, x, p, r)
}
}