diff --git a/internal/sm2ec/p256_asm_loong64.s b/internal/sm2ec/p256_asm_loong64.s index 14e7ca6..d18823b 100644 --- a/internal/sm2ec/p256_asm_loong64.s +++ b/internal/sm2ec/p256_asm_loong64.s @@ -20,6 +20,7 @@ #define t1 R15 #define t2 R16 #define t3 R17 +#define t4 R18 DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff DATA p256p<>+0x08(SB)/8, $0xffffffff00000000 @@ -74,6 +75,120 @@ TEXT ·p256BigToLittle(SB),NOSPLIT,$0 RET +/* ---------------------------------------*/ +// func p256MovCond(res, a, b *SM2P256Point, cond int) +TEXT ·p256MovCond(SB),NOSPLIT,$0 + MOVV res+0(FP), res_ptr + MOVV a+8(FP), x_ptr + MOVV b+16(FP), y_ptr + MOVV cond+24(FP), t0 + + // Load a.x + MOVV (8*0)(x_ptr), acc0 + MOVV (8*1)(x_ptr), acc1 + MOVV (8*2)(x_ptr), acc2 + MOVV (8*3)(x_ptr), acc3 + + // Load b.x + MOVV (8*0)(y_ptr), t1 + MOVV (8*1)(y_ptr), t2 + MOVV (8*2)(y_ptr), t3 + MOVV (8*3)(y_ptr), t4 + + // Conditional move + MASKNEZ t0, t1, t1 + MASKEQZ t0, acc0, acc0 + OR t1, acc0 + + MASKNEZ t0, t2, t2 + MASKEQZ t0, acc1, acc1 + OR t2, acc1 + + MASKNEZ t0, t3, t3 + MASKEQZ t0, acc2, acc2 + OR t3, acc2 + + MASKNEZ t0, t4, t4 + MASKEQZ t0, acc3, acc3 + OR t4, acc3 + + // Store result + MOVV acc0, (8*0)(res_ptr) + MOVV acc1, (8*1)(res_ptr) + MOVV acc2, (8*2)(res_ptr) + MOVV acc3, (8*3)(res_ptr) + + // Load a.y + MOVV (8*4)(x_ptr), acc0 + MOVV (8*5)(x_ptr), acc1 + MOVV (8*6)(x_ptr), acc2 + MOVV (8*7)(x_ptr), acc3 + + // Load b.y + MOVV (8*4)(y_ptr), t1 + MOVV (8*5)(y_ptr), t2 + MOVV (8*6)(y_ptr), t3 + MOVV (8*7)(y_ptr), t4 + + // Conditional move + MASKNEZ t0, t1, t1 + MASKEQZ t0, acc0, acc0 + OR t1, acc0 + + MASKNEZ t0, t2, t2 + MASKEQZ t0, acc1, acc1 + OR t2, acc1 + + MASKNEZ t0, t3, t3 + MASKEQZ t0, acc2, acc2 + OR t3, acc2 + + MASKNEZ t0, t4, t4 + MASKEQZ t0, acc3, acc3 + OR t4, acc3 + + // Store result + MOVV acc0, (8*4)(res_ptr) + MOVV acc1, (8*5)(res_ptr) + MOVV acc2, (8*6)(res_ptr) + MOVV acc3, (8*7)(res_ptr) + + // Load a.z + MOVV (8*8)(x_ptr), acc0 + MOVV (8*9)(x_ptr), acc1 + MOVV (8*10)(x_ptr), acc2 + MOVV (8*11)(x_ptr), acc3 + + // Load b.z + MOVV (8*8)(y_ptr), t1 + MOVV (8*9)(y_ptr), t2 + MOVV (8*10)(y_ptr), t3 + MOVV (8*11)(y_ptr), t4 + + // Conditional move + MASKNEZ t0, t1, t1 + MASKEQZ t0, acc0, acc0 + OR t1, acc0 + + MASKNEZ t0, t2, t2 + MASKEQZ t0, acc1, acc1 + OR t2, acc1 + + MASKNEZ t0, t3, t3 + MASKEQZ t0, acc2, acc2 + OR t3, acc2 + + MASKNEZ t0, t4, t4 + MASKEQZ t0, acc3, acc3 + OR t4, acc3 + + // Store result + MOVV acc0, (8*8)(res_ptr) + MOVV acc1, (8*9)(res_ptr) + MOVV acc2, (8*10)(res_ptr) + MOVV acc3, (8*11)(res_ptr) + RET + /* ---------------------------------------*/ // func p256NegCond(val *p256Element, cond int) TEXT ·p256NegCond(SB),NOSPLIT,$0 diff --git a/internal/sm2ec/sm2p256_asm_loong64.go b/internal/sm2ec/sm2p256_asm_loong64.go index 5414d8d..69ef60b 100644 --- a/internal/sm2ec/sm2p256_asm_loong64.go +++ b/internal/sm2ec/sm2p256_asm_loong64.go @@ -4,12 +4,23 @@ package sm2ec // domain (with R 2²⁵⁶) as four limbs in little-endian order value. type p256Element [4]uint64 +type SM2P256Point1 struct { + // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point + // at infinity can be represented by any set of coordinates with Z = 0. + x, y, z p256Element +} + //go:noescape func p256BigToLittle(res *p256Element, in *[32]byte) //go:noescape func p256LittleToBig(res *[32]byte, in *p256Element) +// If cond is 0, sets res = b, otherwise sets res = a. +// +//go:noescape +func p256MovCond(res, a, b *SM2P256Point1, cond int) + // If cond is not 0, sets val = -val mod p. // //go:noescape diff --git a/internal/sm2ec/sm2p256_asm_loong64_test.go b/internal/sm2ec/sm2p256_asm_loong64_test.go index 5266168..32ab7cd 100644 --- a/internal/sm2ec/sm2p256_asm_loong64_test.go +++ b/internal/sm2ec/sm2p256_asm_loong64_test.go @@ -5,6 +5,7 @@ package sm2ec import ( "bytes" "encoding/binary" + "reflect" "testing" ) @@ -67,3 +68,35 @@ func TestP256NegCond(t *testing.T) { } } } + +func newPoint(x, y, z uint64) *SM2P256Point1 { + return &SM2P256Point1{ + x: p256Element{x, x + 1, x + 2, x + 3}, + y: p256Element{y, y + 1, y + 2, y + 3}, + z: p256Element{z, z + 1, z + 2, z + 3}, + } +} + +func TestP256MovCond(t *testing.T) { + a := newPoint(10, 20, 30) + b := newPoint(100, 200, 300) + var res SM2P256Point1 + + // cond == 0: res = b + p256MovCond(&res, a, b, 0) + if !reflect.DeepEqual(res, *b) { + t.Errorf("cond=0: got %+v, want %+v", res, *b) + } + + // cond != 0: res = a + p256MovCond(&res, a, b, 1) + if !reflect.DeepEqual(res, *a) { + t.Errorf("cond=1: got %+v, want %+v", res, *a) + } + + // cond < 0: res = a (should treat any nonzero as true) + p256MovCond(&res, a, b, -123) + if !reflect.DeepEqual(res, *a) { + t.Errorf("cond=-123: got %+v, want %+v", res, *a) + } +}