gmsm/sm9/bn256/gfp_ppc64x.s
2024-10-05 13:42:54 +08:00

204 lines
4.0 KiB
ArmAsm

// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
#include "textflag.h"
#define X1L V0
#define X1H V1
#define Y1L V2
#define Y1H V3
#define T1L V4
#define T1H V5
#define T0 V4
#define T1 V5
#define T2 V6
#define SEL1 V7
#define ZERO V8
#define CAR1 V9
#define CAR2 V10
#define TT0 V11
#define TT1 V12
#define PL V30
#define PH V31
#define gfpSubInternal(T1, T0, X1, X0, Y1, Y0) \
VSPLTISB $0, ZERO \ // VZERO
VSUBCUQ X0, Y0, CAR1 \
VSUBUQM X0, Y0, T0 \
VSUBECUQ X1, Y1, CAR1, SEL1 \
VSUBEUQM X1, Y1, CAR1, T1 \
VSUBUQM ZERO, SEL1, SEL1 \ // VSQ
\
VADDCUQ T0, PL, CAR1 \ // VACCQ
VADDUQM T0, PL, TT0 \ // VAQ
VADDEUQM T1, PH, CAR1, TT1 \ // VACQ
\
VSEL TT0, T0, SEL1, T0 \
VSEL TT1, T1, SEL1, T1 \
TEXT ·gfpNegAsm(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD $16, R5
LXVD2X (R4)(R0), Y1L
LXVD2X (R4)(R5), Y1H
XXPERMDI Y1H, Y1H, $2, Y1H
XXPERMDI Y1L, Y1L, $2, Y1L
MOVD $·p2+0(SB), R6
LXVD2X (R6)(R0), PL
LXVD2X (R6)(R5), PH
XXPERMDI PH, PH, $2, PH
XXPERMDI PL, PL, $2, PL
VSPLTISB $0, X1L
gfpSubInternal(T1, T0, X1L, X1L, Y1H, Y1L)
XXPERMDI T1, T1, $2, T1
XXPERMDI T0, T0, $2, T0
STXVD2X T0, (R0+R3)
STXVD2X T1, (R5+R3)
RET
TEXT ·gfpSubAsm(SB),0,$0-24
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD b+16(FP), R5
MOVD $16, R6
LXVD2X (R4)(R0), X1L
LXVD2X (R4)(R6), X1H
XXPERMDI X1H, X1H, $2, X1H
XXPERMDI X1L, X1L, $2, X1L
LXVD2X (R5)(R0), Y1L
LXVD2X (R5)(R6), Y1H
XXPERMDI Y1H, Y1H, $2, Y1H
XXPERMDI Y1L, Y1L, $2, Y1L
MOVD $·p2+0(SB), R7
LXVD2X (R7)(R0), PL
LXVD2X (R7)(R6), PH
XXPERMDI PH, PH, $2, PH
XXPERMDI PL, PL, $2, PL
gfpSubInternal(T1, T0, X1H, X1L, Y1H, Y1L)
XXPERMDI T1, T1, $2, T1
XXPERMDI T0, T0, $2, T0
STXVD2X T0, (R0+R3)
STXVD2X T1, (R6+R3)
RET
#define gfpAddInternal(T1, T0, X1, X0, Y1, Y0) \
VADDCUQ X0, Y0, CAR1 \
VADDUQM X0, Y0, T0 \
VADDECUQ X1, Y1, CAR1, T2 \ // VACCCQ
VADDEUQM X1, Y1, CAR1, T1 \
\
VSUBCUQ T0, PL, CAR1 \ // VSCBIQ
VSUBUQM T0, PL, TT0 \
VSUBECUQ T1, PH, CAR1, CAR2 \ // VSBCBIQ
VSUBEUQM T1, PH, CAR1, TT1 \ // VSBIQ
VSUBEUQM T2, ZERO, CAR2, SEL1 \
\
VSEL TT0, T0, SEL1, T0 \
VSEL TT1, T1, SEL1, T1
TEXT ·gfpAddAsm(SB),0,$0-24
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD b+16(FP), R5
MOVD $16, R6
LXVD2X (R4)(R0), X1L
LXVD2X (R4)(R6), X1H
XXPERMDI X1H, X1H, $2, X1H
XXPERMDI X1L, X1L, $2, X1L
LXVD2X (R5)(R0), Y1L
LXVD2X (R5)(R6), Y1H
XXPERMDI Y1H, Y1H, $2, Y1H
XXPERMDI Y1L, Y1L, $2, Y1L
MOVD $·p2+0(SB), R7
LXVD2X (R7)(R0), PL
LXVD2X (R7)(R6), PH
XXPERMDI PH, PH, $2, PH
XXPERMDI PL, PL, $2, PL
VSPLTISB $0, ZERO
gfpAddInternal(T1, T0, X1H, X1L, Y1H, Y1L)
XXPERMDI T1, T1, $2, T1
XXPERMDI T0, T0, $2, T0
STXVD2X T0, (R0+R3)
STXVD2X T1, (R6+R3)
RET
TEXT ·gfpDoubleAsm(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD $16, R6
LXVD2X (R4)(R0), X1L
LXVD2X (R4)(R6), X1H
XXPERMDI X1H, X1H, $2, X1H
XXPERMDI X1L, X1L, $2, X1L
MOVD $·p2+0(SB), R7
LXVD2X (R7)(R0), PL
LXVD2X (R7)(R6), PH
XXPERMDI PH, PH, $2, PH
XXPERMDI PL, PL, $2, PL
VSPLTISB $0, ZERO
gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L)
XXPERMDI T1, T1, $2, T1
XXPERMDI T0, T0, $2, T0
STXVD2X T0, (R0+R3)
STXVD2X T1, (R6+R3)
RET
TEXT ·gfpTripleAsm(SB),0,$0-16
MOVD c+0(FP), R3
MOVD a+8(FP), R4
MOVD $16, R6
LXVD2X (R4)(R0), X1L
LXVD2X (R4)(R6), X1H
XXPERMDI X1H, X1H, $2, X1H
XXPERMDI X1L, X1L, $2, X1L
MOVD $·p2+0(SB), R7
LXVD2X (R7)(R0), PL
LXVD2X (R7)(R6), PH
XXPERMDI PH, PH, $2, PH
XXPERMDI PL, PL, $2, PL
VSPLTISB $0, ZERO
gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L)
gfpAddInternal(T1, T0, T1, T0, X1H, X1L)
XXPERMDI T1, T1, $2, T1
XXPERMDI T0, T0, $2, T0
STXVD2X T0, (R0+R3)
STXVD2X T1, (R6+R3)
RET