From e1fd84717cf073084bf1e19079be0d4509b5ec6b Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Fri, 10 Oct 2025 10:57:49 +0800 Subject: [PATCH] remove loong64 from this branch first --- .github/workflows/test_loong64.yml | 2 +- internal/sm2ec/p256_asm_loong64.s | 324 --------------------- internal/sm2ec/sm2p256_asm_loong64.go | 36 --- internal/sm2ec/sm2p256_asm_loong64_test.go | 104 ------- 4 files changed, 1 insertion(+), 465 deletions(-) delete mode 100644 internal/sm2ec/p256_asm_loong64.s delete mode 100644 internal/sm2ec/sm2p256_asm_loong64.go delete mode 100644 internal/sm2ec/sm2p256_asm_loong64_test.go diff --git a/.github/workflows/test_loong64.yml b/.github/workflows/test_loong64.yml index 05c51cf..2d646ab 100644 --- a/.github/workflows/test_loong64.yml +++ b/.github/workflows/test_loong64.yml @@ -17,7 +17,7 @@ jobs: test: strategy: matrix: - go-version: [1.25.x] + go-version: [1.24.x] arch: [loong64] runs-on: ubuntu-latest steps: diff --git a/internal/sm2ec/p256_asm_loong64.s b/internal/sm2ec/p256_asm_loong64.s deleted file mode 100644 index 7f10e5a..0000000 --- a/internal/sm2ec/p256_asm_loong64.s +++ /dev/null @@ -1,324 +0,0 @@ -// Copyright 2025 Sun Yimin. All rights reserved. -// Use of this source code is governed by a MIT-style -// license that can be found in the LICENSE file. - -//go:build go1.25 && !purego - -#include "textflag.h" - -#define ZERO R0 -#define res_ptr R4 -#define x_ptr R5 -#define y_ptr R6 - -#define acc0 R7 -#define acc1 R8 -#define acc2 R9 -#define acc3 R10 -#define acc4 R11 -#define acc5 R12 -#define t0 R13 -#define t1 R14 -#define t2 R15 -#define t3 R16 -#define t4 R17 - -DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff -DATA p256p<>+0x08(SB)/8, $0xffffffff00000000 -DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff -DATA p256p<>+0x18(SB)/8, $0xfffffffeffffffff -DATA p256ordK0<>+0x00(SB)/8, $0x327f9e8872350975 -DATA p256ord<>+0x00(SB)/8, $0x53bbf40939d54123 -DATA p256ord<>+0x08(SB)/8, $0x7203df6b21c6052b -DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff -DATA p256ord<>+0x18(SB)/8, $0xfffffffeffffffff -DATA p256one<>+0x00(SB)/8, $0x0000000000000001 -DATA p256one<>+0x08(SB)/8, $0x00000000ffffffff -DATA p256one<>+0x10(SB)/8, $0x0000000000000000 -DATA p256one<>+0x18(SB)/8, $0x0000000100000000 -GLOBL p256p<>(SB), RODATA, $32 -GLOBL p256ordK0<>(SB), RODATA, $8 -GLOBL p256ord<>(SB), RODATA, $32 -GLOBL p256one<>(SB), RODATA, $32 - -/* ---------------------------------------*/ -// func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement) -TEXT ·p256OrdLittleToBig(SB),NOSPLIT,$0 - JMP ·p256BigToLittle(SB) -/* ---------------------------------------*/ -// func p256OrdBigToLittle(res *p256OrdElement, in *[32]byte) -TEXT ·p256OrdBigToLittle(SB),NOSPLIT,$0 - JMP ·p256BigToLittle(SB) -/* ---------------------------------------*/ -// func p256LittleToBig(res *[32]byte, in *p256Element) -TEXT ·p256LittleToBig(SB),NOSPLIT,$0 - JMP ·p256BigToLittle(SB) -/* ---------------------------------------*/ -// func p256BigToLittle(res *p256Element, in *[32]byte) -TEXT ·p256BigToLittle(SB),NOSPLIT,$0 - MOVV res+0(FP), res_ptr - MOVV in+8(FP), x_ptr - - MOVV (8*0)(x_ptr), acc0 - MOVV (8*1)(x_ptr), acc1 - MOVV (8*2)(x_ptr), acc2 - MOVV (8*3)(x_ptr), acc3 - - REVBV acc0, acc0 - REVBV acc1, acc1 - REVBV acc2, acc2 - REVBV acc3, acc3 - - MOVV acc3, (8*0)(res_ptr) - MOVV acc2, (8*1)(res_ptr) - MOVV acc1, (8*2)(res_ptr) - MOVV acc0, (8*3)(res_ptr) - - RET - -/* ---------------------------------------*/ -// func p256MovCond(res, a, b *SM2P256Point, cond int) -TEXT ·p256MovCond(SB),NOSPLIT,$0 - MOVV res+0(FP), res_ptr - MOVV a+8(FP), x_ptr - MOVV b+16(FP), y_ptr - MOVV cond+24(FP), t0 - - MOVV ·supportLSX+0(SB), t1 - BEQ t1, ZERO, basic_path - - MOVV ·supportLASX+0(SB), t1 - BEQ t1, ZERO, lsx_path - - XVMOVQ t0, X0.V4 - XVXORV X1, X1, X1 - XVSEQV X0, X1, X0 - - XVMOVQ (32*0)(x_ptr), X1 - XVMOVQ (32*1)(x_ptr), X2 - XVMOVQ (32*2)(x_ptr), X3 - - XVANDNV X1, X0, X1 - XVANDNV X2, X0, X2 - XVANDNV X3, X0, X3 - - XVMOVQ (32*0)(y_ptr), X4 - XVMOVQ (32*1)(y_ptr), X5 - XVMOVQ (32*2)(y_ptr), X6 - - XVANDV X4, X0, X4 - XVANDV X5, X0, X5 - XVANDV X6, X0, X6 - - XVORV X1, X4, X1 - XVORV X2, X5, X2 - XVORV X3, X6, X3 - - XVMOVQ X1, (32*0)(res_ptr) - XVMOVQ X2, (32*1)(res_ptr) - XVMOVQ X3, (32*2)(res_ptr) - - RET - -lsx_path: - VMOVQ t0, V0.V2 - VXORV V1, V1, V1 - VSEQV V0, V1, V0 - - VMOVQ (16*0)(x_ptr), V1 - VMOVQ (16*1)(x_ptr), V2 - VMOVQ (16*2)(x_ptr), V3 - VMOVQ (16*3)(x_ptr), V4 - VMOVQ (16*4)(x_ptr), V5 - VMOVQ (16*5)(x_ptr), V6 - VANDNV V1, V0, V1 - VANDNV V2, V0, V2 - VANDNV V3, V0, V3 - VANDNV V4, V0, V4 - VANDNV V5, V0, V5 - VANDNV V6, V0, V6 - - VMOVQ (16*0)(y_ptr), V7 - VMOVQ (16*1)(y_ptr), V8 - VMOVQ (16*2)(y_ptr), V9 - VMOVQ (16*3)(y_ptr), V10 - VMOVQ (16*4)(y_ptr), V11 - VMOVQ (16*5)(y_ptr), V12 - VANDV V7, V0, V7 - VANDV V8, V0, V8 - VANDV V9, V0, V9 - VANDV V10, V0, V10 - VANDV V11, V0, V11 - VANDV V12, V0, V12 - - VORV V1, V7, V1 - VORV V2, V8, V2 - VORV V3, V9, V3 - VORV V4, V10, V4 - VORV V5, V11, V5 - VORV V6, V12, V6 - - VMOVQ V1, (16*0)(res_ptr) - VMOVQ V2, (16*1)(res_ptr) - VMOVQ V3, (16*2)(res_ptr) - VMOVQ V4, (16*3)(res_ptr) - VMOVQ V5, (16*4)(res_ptr) - VMOVQ V6, (16*5)(res_ptr) - - RET - -basic_path: - // Load a.x - MOVV (8*0)(x_ptr), acc0 - MOVV (8*1)(x_ptr), acc1 - MOVV (8*2)(x_ptr), acc2 - MOVV (8*3)(x_ptr), acc3 - - // Load b.x - MOVV (8*0)(y_ptr), t1 - MOVV (8*1)(y_ptr), t2 - MOVV (8*2)(y_ptr), t3 - MOVV (8*3)(y_ptr), t4 - - // Conditional move - MASKNEZ t0, t1, t1 - MASKEQZ t0, acc0, acc0 - OR t1, acc0 - - MASKNEZ t0, t2, t2 - MASKEQZ t0, acc1, acc1 - OR t2, acc1 - - MASKNEZ t0, t3, t3 - MASKEQZ t0, acc2, acc2 - OR t3, acc2 - - MASKNEZ t0, t4, t4 - MASKEQZ t0, acc3, acc3 - OR t4, acc3 - - // Store result - MOVV acc0, (8*0)(res_ptr) - MOVV acc1, (8*1)(res_ptr) - MOVV acc2, (8*2)(res_ptr) - MOVV acc3, (8*3)(res_ptr) - - // Load a.y - MOVV (8*4)(x_ptr), acc0 - MOVV (8*5)(x_ptr), acc1 - MOVV (8*6)(x_ptr), acc2 - MOVV (8*7)(x_ptr), acc3 - - // Load b.y - MOVV (8*4)(y_ptr), t1 - MOVV (8*5)(y_ptr), t2 - MOVV (8*6)(y_ptr), t3 - MOVV (8*7)(y_ptr), t4 - - // Conditional move - MASKNEZ t0, t1, t1 - MASKEQZ t0, acc0, acc0 - OR t1, acc0 - - MASKNEZ t0, t2, t2 - MASKEQZ t0, acc1, acc1 - OR t2, acc1 - - MASKNEZ t0, t3, t3 - MASKEQZ t0, acc2, acc2 - OR t3, acc2 - - MASKNEZ t0, t4, t4 - MASKEQZ t0, acc3, acc3 - OR t4, acc3 - - // Store result - MOVV acc0, (8*4)(res_ptr) - MOVV acc1, (8*5)(res_ptr) - MOVV acc2, (8*6)(res_ptr) - MOVV acc3, (8*7)(res_ptr) - - // Load a.z - MOVV (8*8)(x_ptr), acc0 - MOVV (8*9)(x_ptr), acc1 - MOVV (8*10)(x_ptr), acc2 - MOVV (8*11)(x_ptr), acc3 - - // Load b.z - MOVV (8*8)(y_ptr), t1 - MOVV (8*9)(y_ptr), t2 - MOVV (8*10)(y_ptr), t3 - MOVV (8*11)(y_ptr), t4 - - // Conditional move - MASKNEZ t0, t1, t1 - MASKEQZ t0, acc0, acc0 - OR t1, acc0 - - MASKNEZ t0, t2, t2 - MASKEQZ t0, acc1, acc1 - OR t2, acc1 - - MASKNEZ t0, t3, t3 - MASKEQZ t0, acc2, acc2 - OR t3, acc2 - - MASKNEZ t0, t4, t4 - MASKEQZ t0, acc3, acc3 - OR t4, acc3 - - // Store result - MOVV acc0, (8*8)(res_ptr) - MOVV acc1, (8*9)(res_ptr) - MOVV acc2, (8*10)(res_ptr) - MOVV acc3, (8*11)(res_ptr) - RET - -/* ---------------------------------------*/ -// func p256NegCond(val *p256Element, cond int) -TEXT ·p256NegCond(SB),NOSPLIT,$0 - MOVV val+0(FP), res_ptr - MOVV cond+8(FP), t0 - // acc = poly - MOVV $-1, acc0 - MOVV p256p<>+0x08(SB), acc1 - MOVV $-1, acc2 - MOVV p256p<>+0x18(SB), acc3 - // Load the original value - MOVV (8*0)(res_ptr), acc4 - MOVV (8*1)(res_ptr), x_ptr - MOVV (8*2)(res_ptr), y_ptr - MOVV (8*3)(res_ptr), acc5 - - // Speculatively subtract - SUBV acc4, acc0 - SGTU x_ptr, acc1, t1 - SUBV x_ptr, acc1 - SUBV y_ptr, acc2 - SGTU t1, acc2, t2 - SUBV t1, acc2 - SUBV acc5, acc3 - SUBV t2, acc3 - - MASKNEZ t0, acc4, acc4 - MASKEQZ t0, acc0, acc0 - OR acc4, acc0 - - MASKNEZ t0, x_ptr, x_ptr - MASKEQZ t0, acc1, acc1 - OR x_ptr, acc1 - - MASKNEZ t0, y_ptr, y_ptr - MASKEQZ t0, acc2, acc2 - OR y_ptr, acc2 - - MASKNEZ t0, acc5, acc5 - MASKEQZ t0, acc3, acc3 - OR acc5, acc3 - - MOVV acc0, (8*0)(res_ptr) - MOVV acc1, (8*1)(res_ptr) - MOVV acc2, (8*2)(res_ptr) - MOVV acc3, (8*3)(res_ptr) - - RET diff --git a/internal/sm2ec/sm2p256_asm_loong64.go b/internal/sm2ec/sm2p256_asm_loong64.go deleted file mode 100644 index 2287c5a..0000000 --- a/internal/sm2ec/sm2p256_asm_loong64.go +++ /dev/null @@ -1,36 +0,0 @@ -//go:build go1.25 - -package sm2ec - -import ( - "github.com/emmansun/gmsm/internal/deps/cpu" -) - -// p256Element is a P-256 base field element in [0, P-1] in the Montgomery -// domain (with R 2²⁵⁶) as four limbs in little-endian order value. -type p256Element [4]uint64 - -type SM2P256Point1 struct { - // (X:Y:Z) are Jacobian coordinates where x = X/Z² and y = Y/Z³. The point - // at infinity can be represented by any set of coordinates with Z = 0. - x, y, z p256Element -} - -var supportLSX = cpu.Loong64.HasLSX -var supportLASX = cpu.Loong64.HasLASX - -//go:noescape -func p256BigToLittle(res *p256Element, in *[32]byte) - -//go:noescape -func p256LittleToBig(res *[32]byte, in *p256Element) - -// If cond is 0, sets res = b, otherwise sets res = a. -// -//go:noescape -func p256MovCond(res, a, b *SM2P256Point1, cond int) - -// If cond is not 0, sets val = -val mod p. -// -//go:noescape -func p256NegCond(val *p256Element, cond int) diff --git a/internal/sm2ec/sm2p256_asm_loong64_test.go b/internal/sm2ec/sm2p256_asm_loong64_test.go deleted file mode 100644 index f423a74..0000000 --- a/internal/sm2ec/sm2p256_asm_loong64_test.go +++ /dev/null @@ -1,104 +0,0 @@ -//go:build loong64 && go1.25 && !purego - -package sm2ec - -import ( - "bytes" - "encoding/binary" - "fmt" - "reflect" - "testing" -) - -func TestP256BigToLittle(t *testing.T) { - // 构造一个已知的 32 字节大端输入 - var in [32]byte - for i := 0; i < 32; i++ { - in[i] = byte(i + 1) - } - var out p256Element - - p256BigToLittle(&out, &in) - - // 检查每个 limb 是否为小端解包 - for i := 0; i < 4; i++ { - expected := binary.BigEndian.Uint64(in[i*8 : (i+1)*8]) - k := 3 - i // 逆序存储 - if out[k] != expected { - t.Errorf("limb %d: got 0x%x, want 0x%x", k, out[k], expected) - } - } - - // 逆操作测试 - var back [32]byte - p256LittleToBig(&back, &out) - if !bytes.Equal(in[:], back[:]) { - t.Errorf("p256LittleToBig(p256BigToLittle(...)) mismatch\nin: %x\nback: %x", in, back) - } -} - -func TestP256NegCond(t *testing.T) { - var tests = []struct { - input p256Element - cond int - expected p256Element - }{ - { - input: p256Element{1, 0, 0, 0}, - cond: 1, - expected: p256Element{0xfffffffffffffffe, 0xffffffff00000000, 0xffffffffffffffff, 0xfffffffeffffffff}, - }, - { - input: p256Element{1, 0, 0, 0}, - cond: 0, - expected: p256Element{1, 0, 0, 0}, - }, - { - input: p256Element{0x1, 0xffffffff00000001, 0xfffffffffffffffe, 0xfffffffeffffffff}, - cond: 1, - expected: p256Element{0xfffffffffffffffe, 0xffffffffffffffff, 0, 0}, - }, - } - - for i, test := range tests { - var result p256Element - copy(result[:], test.input[:]) - p256NegCond(&result, test.cond) - if result != test.expected { - t.Errorf("test %d: got %x, want %x", i, result, test.expected) - } - } -} - -func newPoint(x, y, z uint64) *SM2P256Point1 { - return &SM2P256Point1{ - x: p256Element{x, x + 1, x + 2, x + 3}, - y: p256Element{y, y + 1, y + 2, y + 3}, - z: p256Element{z, z + 1, z + 2, z + 3}, - } -} - -func TestP256MovCond(t *testing.T) { - fmt.Printf("supportLSX=%v, supportLASX=%v\n", supportLSX, supportLASX) - a := newPoint(10, 20, 30) - b := newPoint(100, 200, 300) - var res SM2P256Point1 - - // cond == 0: res = b - p256MovCond(&res, a, b, 0) - if !reflect.DeepEqual(res, *b) { - t.Errorf("cond=0: got %+v, want %+v", res, *b) - } - - // cond != 0: res = a - p256MovCond(&res, a, b, 1) - if !reflect.DeepEqual(res, *a) { - t.Errorf("cond=1: got %+v, want %+v", res, *a) - } - - // cond < 0: res = a (should treat any nonzero as true) - p256MovCond(&res, a, b, -123) - if !reflect.DeepEqual(res, *a) { - t.Errorf("cond=-123: got %+v, want %+v", res, *a) - } -}