From 0ef82b1be546d809b7bc068d082320b531291f9e Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 5 Oct 2024 11:51:31 +0800 Subject: [PATCH] sm9/bn256: gfp ppc64x add/sub --- .github/workflows/test_ppc64.yaml | 6 + sm9/bn256/gfp_ppc64x.go | 33 +++++ sm9/bn256/gfp_ppc64x.s | 221 ++++++++++++++++++++++++++++++ sm9/bn256/gfp_ppc64x_test.go | 136 ++++++++++++++++++ 4 files changed, 396 insertions(+) create mode 100644 sm9/bn256/gfp_ppc64x.go create mode 100644 sm9/bn256/gfp_ppc64x.s create mode 100644 sm9/bn256/gfp_ppc64x_test.go diff --git a/.github/workflows/test_ppc64.yaml b/.github/workflows/test_ppc64.yaml index 1de1d6a..7d566cb 100644 --- a/.github/workflows/test_ppc64.yaml +++ b/.github/workflows/test_ppc64.yaml @@ -36,6 +36,12 @@ jobs: GOARCH: ${{ matrix.arch }} GOPPC64: ${{ matrix.ppc64 }} + - name: Test bn256 + run: go test -v ./sm9/bn256/... + env: + GOARCH: ${{ matrix.arch }} + GOPPC64: ${{ matrix.ppc64 }} + - name: Test ZUC run: go test -v ./zuc/... env: diff --git a/sm9/bn256/gfp_ppc64x.go b/sm9/bn256/gfp_ppc64x.go new file mode 100644 index 0000000..b6e2636 --- /dev/null +++ b/sm9/bn256/gfp_ppc64x.go @@ -0,0 +1,33 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +package bn256 + +// Set c = p - a, if c == p, then c = 0 +// It seems this function's performance is worse than gfpSub with zero. +// +//go:noescape +func gfpNegAsm(c, a *gfP) + +// Set c = a + b, if c >= p, then c = c - p +// +//go:noescape +func gfpAddAsm(c, a, b *gfP) + +// Set c = a + a +// +//go:noescape +func gfpDoubleAsm(c, a *gfP) + +// Set c = a + a + a +// +//go:noescape +func gfpTripleAsm(c, a *gfP) + +// Set c = a - b, if c is negative, then c = c + p +// +//go:noescape +func gfpSubAsm(c, a, b *gfP) diff --git a/sm9/bn256/gfp_ppc64x.s b/sm9/bn256/gfp_ppc64x.s new file mode 100644 index 0000000..8c092b0 --- /dev/null +++ b/sm9/bn256/gfp_ppc64x.s @@ -0,0 +1,221 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +#include "textflag.h" + +#define X1L V0 +#define X1H V1 +#define Y1L V2 +#define Y1H V3 +#define T1L V4 +#define T1H V5 +#define SEL V6 +#define ZERO V7 +#define CAR1 V8 +#define CAR2 V9 +#define TT0 V10 +#define TT1 V11 + +#define PL V30 +#define PH V31 + +TEXT ·gfpNegAsm(SB),0,$0-16 + MOVD c+0(FP), R3 + MOVD a+8(FP), R4 + + MOVD $16, R5 + LXVD2X (R4)(R0), Y1L + LXVD2X (R4)(R5), Y1H + + XXPERMDI Y1H, Y1H, $2, Y1H + XXPERMDI Y1L, Y1L, $2, Y1L + + MOVD $·p2+0(SB), R6 + LXVD2X (R6)(R0), PL + LXVD2X (R6)(R5), PH + + XXPERMDI PH, PH, $2, PH + XXPERMDI PL, PL, $2, PL + + VSUBCUQ PL, Y1L, CAR1 // subtract part2 giving carry + VSUBUQM PL, Y1L, T1L // subtract part2 giving result + VSUBEUQM PH, Y1H, CAR1, T1H // subtract part1 using carry from part2 + + VSUBCUQ Y1L, PL, CAR1 + VSUBUQM Y1L, PL, Y1L + VSUBECUQ Y1H, PH, CAR1, SEL + VSUBEUQM Y1H, PH, CAR1, Y1H + + VSEL T1H, Y1H, SEL, Y1H + VSEL T1L, Y1L, SEL, Y1L + + XXPERMDI Y1H, Y1H, $2, Y1H + XXPERMDI Y1L, Y1L, $2, Y1L + + STXVD2X Y1L, (R0+R3) + STXVD2X Y1H, (R5+R3) + RET + + +#define gfpSubInternal(T1, T0, X1, X0, Y1, Y0) \ + VSPLTISB $0, ZERO \ // VZERO + VSUBCUQ X0, Y0, CAR1 \ + VSUBUQM X0, Y0, T0 \ + VSUBECUQ X1, Y1, CAR1, SEL1 \ + VSUBEUQM X1, Y1, CAR1, T1 \ + VSUBUQM ZERO, SEL1, SEL1 \ // VSQ + \ + VADDCUQ T0, PL, CAR1 \ // VACCQ + VADDUQM T0, PL, TT0 \ // VAQ + VADDEUQM T1, PH, CAR1, TT1 \ // VACQ + \ + VSEL TT0, T0, SEL1, T0 \ + VSEL TT1, T1, SEL1, T1 \ + +TEXT ·gfpSubAsm(SB),0,$0-24 + MOVD c+0(FP), R3 + MOVD a+8(FP), R4 + MOVD b+16(FP), R5 + + MOVD $16, R6 + LXVD2X (R4)(R0), X1L + LXVD2X (R4)(R6), X1H + XXPERMDI X1H, X1H, $2, X1H + XXPERMDI X1L, X1L, $2, X1L + + LXVD2X (R5)(R0), Y1L + LXVD2X (R5)(R6), X1H + XXPERMDI Y1H, Y1H, $2, Y1H + XXPERMDI Y1L, Y1L, $2, Y1L + + MOVD $·p2+0(SB), R7 + LXVD2X (R7)(R0), PL + LXVD2X (R7)(R5), PH + XXPERMDI PH, PH, $2, PH + XXPERMDI PL, PL, $2, PL + + gfpSubInternal(T1, T0, X1H, X1L, Y1H, Y1L) + + XXPERMDI T1, T1, $2, T1 + XXPERMDI T0, T0, $2, T0 + + STXVD2X T0, (R0+R3) + STXVD2X T1, (R6+R3) + RET + +#define gfpAddInternal(T1, T0, X1, X0, Y1, Y0) \ + VADDCUQ X0, Y0, CAR1 \ + VADDUQM X0, Y0, T0 \ + VADDECUQ X1, Y1, CAR1, T2 \ // VACCCQ + VADDEUQM X1, Y1, CAR1, T1 \ + \ + VSUBCUQ T0, PL, CAR1 \ // VSCBIQ + VSUBUQM T0, PL, TT0 \ + VSUBECUQ T1, PH, CAR1, CAR2 \ // VSBCBIQ + VSUBEUQM T1, PH, CAR1, TT1 \ // VSBIQ + VSUBEUQM T2, ZERO, CAR2, SEL1 \ + \ + VSEL TT0, T0, SEL1, T0 \ + VSEL TT1, T1, SEL1, T1 + +TEXT ·gfpAddAsm(SB),0,$0-24 + MOVD c+0(FP), R3 + MOVD a+8(FP), R4 + MOVD b+16(FP), R5 + + MOVD $16, R6 + LXVD2X (R4)(R0), X1L + LXVD2X (R4)(R6), X1H + XXPERMDI X1H, X1H, $2, X1H + XXPERMDI X1L, X1L, $2, X1L + + LXVD2X (R5)(R0), Y1L + LXVD2X (R5)(R6), X1H + XXPERMDI Y1H, Y1H, $2, Y1H + XXPERMDI Y1L, Y1L, $2, Y1L + + MOVD $·p2+0(SB), R7 + LXVD2X (R7)(R0), PL + LXVD2X (R7)(R5), PH + XXPERMDI PH, PH, $2, PH + XXPERMDI PL, PL, $2, PL + + VSPLTISB $0, ZERO + + gfpAddInternal(T1, T0, X1H, X1L, Y1H, Y1L) + + XXPERMDI T1, T1, $2, T1 + XXPERMDI T0, T0, $2, T0 + + STXVD2X T0, (R0+R3) + STXVD2X T1, (R6+R3) + RET + +TEXT ·gfpDoubleAsm(SB),0,$0-16 + MOVD c+0(FP), R3 + MOVD a+8(FP), R4 + + MOVD $16, R6 + LXVD2X (R4)(R0), X1L + LXVD2X (R4)(R6), X1H + XXPERMDI X1H, X1H, $2, X1H + XXPERMDI X1L, X1L, $2, X1L + + MOVD $·p2+0(SB), R7 + LXVD2X (R7)(R0), PL + LXVD2X (R7)(R5), PH + XXPERMDI PH, PH, $2, PH + XXPERMDI PL, PL, $2, PL + + VSPLTISB $0, ZERO + + gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L) + + VOR T1, T1, X1H + VOR T0, T0, X1L + gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L) + + XXPERMDI T1, T1, $2, T1 + XXPERMDI T0, T0, $2, T0 + + STXVD2X T0, (R0+R3) + STXVD2X T1, (R6+R3) + RET + +TEXT ·gfpTripleAsm(SB),0,$0-16 + MOVD c+0(FP), R3 + MOVD a+8(FP), R4 + + MOVD $16, R6 + LXVD2X (R4)(R0), X1L + LXVD2X (R4)(R6), X1H + XXPERMDI X1H, X1H, $2, X1H + XXPERMDI X1L, X1L, $2, X1L + + MOVD $·p2+0(SB), R7 + LXVD2X (R7)(R0), PL + LXVD2X (R7)(R5), PH + XXPERMDI PH, PH, $2, PH + XXPERMDI PL, PL, $2, PL + + VSPLTISB $0, ZERO + + gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L) + + VOR T1, T1, X1H + VOR T0, T0, X1L + gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L) + + VOR T1, T1, X1H + VOR T0, T0, X1L + gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L) + + XXPERMDI T1, T1, $2, T1 + XXPERMDI T0, T0, $2, T0 + + STXVD2X T0, (R0+R3) + STXVD2X T1, (R6+R3) + RET diff --git a/sm9/bn256/gfp_ppc64x_test.go b/sm9/bn256/gfp_ppc64x_test.go new file mode 100644 index 0000000..4dd0100 --- /dev/null +++ b/sm9/bn256/gfp_ppc64x_test.go @@ -0,0 +1,136 @@ +// Copyright 2024 Sun Yimin. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +//go:build (ppc64 || ppc64le) && !purego + +package bn256 + +import "testing" + +func TestGfpNeg(t *testing.T) { + x := fromBigInt(bigFromHex("9093a2b979e6186f43a9b28d41ba644d533377f2ede8c66b19774bf4a9c7a596")) + got := &gfP{} + gfpSubAsm(got, zero, x) + expected := &gfP{} + gfpNegAsm(expected, x) + if *expected != *got { + t.Errorf("got %v, expected %v", got, expected) + } + gfpSubAsm(got, zero, zero) + gfpNegAsm(expected, zero) + if *expected != *got { + t.Errorf("got %v, expected %v", got, expected) + } +} + +func TestGfpBasicOperations(t *testing.T) { + x := fromBigInt(bigFromHex("85AEF3D078640C98597B6027B441A01FF1DD2C190F5E93C454806C11D8806141")) + y := fromBigInt(bigFromHex("3722755292130B08D2AAB97FD34EC120EE265948D19C17ABF9B7213BAF82D65B")) + expectedAdd := fromBigInt(bigFromHex("0691692307d370af56226e57920199fbbe10f216c67fbc9468c7f225a4b1f21f")) + expectedDouble := fromBigInt(bigFromHex("551de7a0ee24723edcf314ff72f478fac1c7c4e7044238acc3913cfbcdaf7d05")) + expectedSub := fromBigInt(bigFromHex("67b381821c52a5624f3304a8149be8461e3bc07adcb872c38aa65051ba53ba97")) + expectedNeg := fromBigInt(bigFromHex("7f1d8aad70909be90358f1d02240062433cc3a0248ded72febb879ec33ce6f22")) + expectedMul := fromBigInt(bigFromHex("3d08bbad376584e4f74bd31f78f716372b96ba8c3f939c12b8d54e79b6489e76")) + expectedMul2 := fromBigInt(bigFromHex("1df94a9e05a559ff38e0ab50cece734dc058d33738ceacaa15986a67cbff1ef6")) + + t.Parallel() + t.Run("add", func(t *testing.T) { + ret := &gfP{} + gfpAddAsm(ret, x, y) + if *expectedAdd != *ret { + t.Errorf("add not same") + } + x1 := &gfP{} + x1.Set(x) + gfpAddAsm(x1, x1, y) + if *expectedAdd != *x1 { + t.Errorf("add not same when add self") + } + }) + + t.Run("double", func(t *testing.T) { + ret := &gfP{} + gfpDoubleAsm(ret, x) + if ret.Equal(expectedDouble) != 1 { + t.Errorf("double not same, got %v, expected %v", ret, expectedDouble) + } + ret.Set(x) + gfpDoubleAsm(ret, ret) + if ret.Equal(expectedDouble) != 1 { + t.Errorf("double not same, got %v, expected %v", ret, expectedDouble) + } + }) + + t.Run("triple", func(t *testing.T) { + expected := &gfP{} + gfpAddAsm(expected, x, expectedDouble) + ret := &gfP{} + ret.Set(x) + gfpTripleAsm(ret, ret) + if ret.Equal(expected) != 1 { + t.Errorf("expected %v, got %v", expected, ret) + } + }) + + t.Run("sub", func(t *testing.T) { + ret := &gfP{} + gfpSubAsm(ret, y, x) + if *expectedSub != *ret { + t.Errorf("sub not same") + } + x1 := &gfP{} + x1.Set(x) + gfpSubAsm(x1, y, x1) + if *expectedSub != *x1 { + t.Errorf("sub not same when sub self") + } + }) + + t.Run("neg", func(t *testing.T) { + ret := &gfP{} + gfpNegAsm(ret, y) + if *expectedNeg != *ret { + t.Errorf("neg not same") + } + ret.Set(y) + gfpNegAsm(ret, ret) + if *expectedNeg != *ret { + t.Errorf("neg not same when neg self") + } + }) +/* + t.Run("mul", func(t *testing.T) { + ret := &gfP{} + gfpMul(ret, x, y) + if *expectedMul != *ret { + t.Errorf("mul not same") + } + ret.Set(x) + gfpMul(ret, ret, y) + if *expectedMul != *ret { + t.Errorf("mul not same when mul self") + } + }) + + t.Run("square", func(t *testing.T) { + ret, ret1, ret2 := &gfP{}, &gfP{}, &gfP{} + gfpMul(ret, x, y) + gfpMul(ret1, ret, ret) + if *ret1 != *expectedMul2 { + t.Errorf("mul not same") + } + gfpMul(ret1, ret1, ret1) + gfpSqr(ret2, ret, 2) + if *ret1 != *ret2 { + t.Errorf("mul/sqr not same") + } + ret2.Set(ret) + gfpSqr(ret2, ret2, 2) + if *ret1 != *ret2 { + t.Errorf("mul/sqr not same when square self") + } + }) +*/ +} +