diff --git a/internal/sm2ec/p256_asm_loong64.s b/internal/sm2ec/p256_asm_loong64.s index 4fdaec6..5f0218a 100644 --- a/internal/sm2ec/p256_asm_loong64.s +++ b/internal/sm2ec/p256_asm_loong64.s @@ -1287,7 +1287,69 @@ TEXT ·p256Select(SB),NOSPLIT,$0 TEXT ·p256SelectAffine(SB),NOSPLIT,$0 RET +/* ---------------------------------------*/ +// func p256Mul(res, in1, in2 *p256Element) +TEXT ·p256Mul(SB),NOSPLIT,$0 + MOVV res+0(FP), res_ptr + MOVV in1+8(FP), x_ptr + MOVV in2+16(FP), y_ptr + MOVV (8*0)(x_ptr), y0 + MOVV (8*1)(x_ptr), y1 + MOVV (8*2)(x_ptr), y2 + MOVV (8*3)(x_ptr), y3 + + MOVV (8*0)(y_ptr), x0 + MOVV (8*1)(y_ptr), x1 + MOVV (8*2)(y_ptr), x2 + MOVV (8*3)(y_ptr), x3 + + CALL sm2P256MulInternal<>(SB) + + MOVV x0, (8*0)(res_ptr) + MOVV x1, (8*1)(res_ptr) + MOVV x2, (8*2)(res_ptr) + MOVV x3, (8*3)(res_ptr) + RET + /* ---------------------------------------*/ // (x3, x2, x1, x0) = (y3, y2, y1, y0) - (x3, x2, x1, x0) TEXT sm2P256Subinternal<>(SB),NOSPLIT,$0 + SGTU x0, y0, t0 + SUBV x0, y0, acc0 + // SBCS x1, y1 + SGTU x1, y1, t1 + SUBV x1, y1, acc1 + SGTU t0, acc1, t2 + SUBV t0, acc1, acc1 + OR t1, t2, t0 + // SBCS x2, y2 + SGTU x2, y2, t1 + SUBV x2, y2, acc2 + SGTU t0, acc2, t2 + SUBV t0, acc2, acc2 + OR t1, t2, t0 + // SBCS x3, y3 + SGTU x3, y3, t1 + SUBV x3, y3, acc3 + SGTU t0, acc3, t2 + SUBV t0, acc3, acc3 + OR t1, t2, t0 + + MOVV $1, t1 + MASKEQZ t0, t1, t1 + MOVV p256one<>+0x08(SB), t2 + MASKEQZ t0, t2, t3 + ADDV $1, t2, t2 + MASKEQZ t0, t2, t2 + + SGTU t1, acc0, t4 + SUBV t1, acc0, x0 + ADDV t1, t3, t3 // no carry + SGTU t3, acc1, t1 + SUBV t3, acc1, x1 + SGTU t1, acc2, t4 + SUBV t1, acc2, x2 + ADDV t4, t2, t1 // no carry + SUBV t1, acc3, x3 + RET diff --git a/internal/sm2ec/sm2p256_asm_loong64.go b/internal/sm2ec/sm2p256_asm_loong64.go index de0d612..16c8b3e 100644 --- a/internal/sm2ec/sm2p256_asm_loong64.go +++ b/internal/sm2ec/sm2p256_asm_loong64.go @@ -53,3 +53,6 @@ func p256FromMont(res, in *p256Element) // //go:noescape func p256OrdReduce(s *p256OrdElement) + +//go:noescape +func p256Sub(res, in1, in2 *p256Element) diff --git a/internal/sm2ec/sm2p256_asm_loong64_test.go b/internal/sm2ec/sm2p256_asm_loong64_test.go index eee02fa..cb867ab 100644 --- a/internal/sm2ec/sm2p256_asm_loong64_test.go +++ b/internal/sm2ec/sm2p256_asm_loong64_test.go @@ -229,32 +229,64 @@ func TestFuzzyP256Sqr(t *testing.T) { func TestP256OrdReduce(t *testing.T) { p256Ord := &p256OrdElement{0x53bbf40939d54123, 0x7203df6b21c6052b, 0xffffffffffffffff, 0xfffffffeffffffff} - // s < p256Ord - var s1 p256OrdElement - copy(s1[:], p256Ord[:]) - s1[0] -= 1 // s1 = p256Ord - 1 - s1Orig := s1 - p256OrdReduce(&s1) - if s1 != s1Orig { - t.Errorf("p256OrdReduce changed s when s < p256Ord: got %x, want %x", s1, s1Orig) - } + // s < p256Ord + var s1 p256OrdElement + copy(s1[:], p256Ord[:]) + s1[0] -= 1 // s1 = p256Ord - 1 + s1Orig := s1 + p256OrdReduce(&s1) + if s1 != s1Orig { + t.Errorf("p256OrdReduce changed s when s < p256Ord: got %x, want %x", s1, s1Orig) + } - // s >= p256Ord - var s2 p256OrdElement - copy(s2[:], p256Ord[:]) - // s2 = p256Ord - p256OrdReduce(&s2) - zero := p256OrdElement{} - if s2 != zero { - t.Errorf("p256OrdReduce failed for s == p256Ord: got %x, want 0", s2) - } + // s >= p256Ord + var s2 p256OrdElement + copy(s2[:], p256Ord[:]) + // s2 = p256Ord + p256OrdReduce(&s2) + zero := p256OrdElement{} + if s2 != zero { + t.Errorf("p256OrdReduce failed for s == p256Ord: got %x, want 0", s2) + } - // s2 = p256Ord + 1 - copy(s2[:], p256Ord[:]) - s2[0] += 1 - p256OrdReduce(&s2) - one := p256OrdElement{1, 0, 0, 0} - if s2 != one { - t.Errorf("p256OrdReduce failed for s == p256Ord+1: got %x, want %x", s2, one) - } + // s2 = p256Ord + 1 + copy(s2[:], p256Ord[:]) + s2[0] += 1 + p256OrdReduce(&s2) + one := p256OrdElement{1, 0, 0, 0} + if s2 != one { + t.Errorf("p256OrdReduce failed for s == p256Ord+1: got %x, want %x", s2, one) + } +} + +func TestP256Sub(t *testing.T) { + // in1 > in2 + in1 := p256Element{5, 0, 0, 0} + in2 := p256Element{3, 0, 0, 0} + var res p256Element + p256Sub(&res, &in1, &in2) + want := p256Element{2, 0, 0, 0} + if !reflect.DeepEqual(res, want) { + t.Errorf("in1 > in2: got %v, want %v", res, want) + } + + // in1 == in2 + in1 = p256Element{7, 8, 9, 10} + in2 = p256Element{7, 8, 9, 10} + p256Sub(&res, &in1, &in2) + want = p256Element{0, 0, 0, 0} + if !reflect.DeepEqual(res, want) { + t.Errorf("in1 == in2: got %v, want %v", res, want) + } + + // in1 < in2 + in1 = p256Element{1, 0, 0, 0} + in2 = p256Element{2, 0, 0, 0} + p256Sub(&res, &in1, &in2) + // 1 - 2 mod 2^64 = 0xFFFFFFFFFFFFFFFF + want = p256Element{0xfffffffffffffffe, 0xffffffff00000000, + 0xffffffffffffffff, 0xfffffffeffffffff} + if !reflect.DeepEqual(res, want) { + t.Errorf("in1 < in2: got %v, want %v", res, want) + } }