mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-25 11:46:19 +08:00
204 lines
4.0 KiB
ArmAsm
204 lines
4.0 KiB
ArmAsm
// Copyright 2024 Sun Yimin. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//go:build (ppc64 || ppc64le) && !purego
|
|
|
|
#include "textflag.h"
|
|
|
|
#define X1L V0
|
|
#define X1H V1
|
|
#define Y1L V2
|
|
#define Y1H V3
|
|
#define T1L V4
|
|
#define T1H V5
|
|
#define T0 V4
|
|
#define T1 V5
|
|
#define T2 V6
|
|
#define SEL1 V7
|
|
#define ZERO V8
|
|
#define CAR1 V9
|
|
#define CAR2 V10
|
|
#define TT0 V11
|
|
#define TT1 V12
|
|
|
|
#define PL V30
|
|
#define PH V31
|
|
|
|
#define gfpSubInternal(T1, T0, X1, X0, Y1, Y0) \
|
|
VSPLTISB $0, ZERO \ // VZERO
|
|
VSUBCUQ X0, Y0, CAR1 \
|
|
VSUBUQM X0, Y0, T0 \
|
|
VSUBECUQ X1, Y1, CAR1, SEL1 \
|
|
VSUBEUQM X1, Y1, CAR1, T1 \
|
|
VSUBUQM ZERO, SEL1, SEL1 \ // VSQ
|
|
\
|
|
VADDCUQ T0, PL, CAR1 \ // VACCQ
|
|
VADDUQM T0, PL, TT0 \ // VAQ
|
|
VADDEUQM T1, PH, CAR1, TT1 \ // VACQ
|
|
\
|
|
VSEL TT0, T0, SEL1, T0 \
|
|
VSEL TT1, T1, SEL1, T1 \
|
|
|
|
TEXT ·gfpNegAsm(SB),0,$0-16
|
|
MOVD c+0(FP), R3
|
|
MOVD a+8(FP), R4
|
|
|
|
MOVD $16, R5
|
|
LXVD2X (R4)(R0), Y1L
|
|
LXVD2X (R4)(R5), Y1H
|
|
|
|
XXPERMDI Y1H, Y1H, $2, Y1H
|
|
XXPERMDI Y1L, Y1L, $2, Y1L
|
|
|
|
MOVD $·p2+0(SB), R6
|
|
LXVD2X (R6)(R0), PL
|
|
LXVD2X (R6)(R5), PH
|
|
|
|
XXPERMDI PH, PH, $2, PH
|
|
XXPERMDI PL, PL, $2, PL
|
|
|
|
VSPLTISB $0, X1L
|
|
gfpSubInternal(T1, T0, X1L, X1L, Y1H, Y1L)
|
|
|
|
XXPERMDI T1, T1, $2, T1
|
|
XXPERMDI T0, T0, $2, T0
|
|
|
|
STXVD2X T0, (R0+R3)
|
|
STXVD2X T1, (R5+R3)
|
|
RET
|
|
|
|
TEXT ·gfpSubAsm(SB),0,$0-24
|
|
MOVD c+0(FP), R3
|
|
MOVD a+8(FP), R4
|
|
MOVD b+16(FP), R5
|
|
|
|
MOVD $16, R6
|
|
LXVD2X (R4)(R0), X1L
|
|
LXVD2X (R4)(R6), X1H
|
|
XXPERMDI X1H, X1H, $2, X1H
|
|
XXPERMDI X1L, X1L, $2, X1L
|
|
|
|
LXVD2X (R5)(R0), Y1L
|
|
LXVD2X (R5)(R6), Y1H
|
|
XXPERMDI Y1H, Y1H, $2, Y1H
|
|
XXPERMDI Y1L, Y1L, $2, Y1L
|
|
|
|
MOVD $·p2+0(SB), R7
|
|
LXVD2X (R7)(R0), PL
|
|
LXVD2X (R7)(R6), PH
|
|
XXPERMDI PH, PH, $2, PH
|
|
XXPERMDI PL, PL, $2, PL
|
|
|
|
gfpSubInternal(T1, T0, X1H, X1L, Y1H, Y1L)
|
|
|
|
XXPERMDI T1, T1, $2, T1
|
|
XXPERMDI T0, T0, $2, T0
|
|
|
|
STXVD2X T0, (R0+R3)
|
|
STXVD2X T1, (R6+R3)
|
|
RET
|
|
|
|
#define gfpAddInternal(T1, T0, X1, X0, Y1, Y0) \
|
|
VADDCUQ X0, Y0, CAR1 \
|
|
VADDUQM X0, Y0, T0 \
|
|
VADDECUQ X1, Y1, CAR1, T2 \ // VACCCQ
|
|
VADDEUQM X1, Y1, CAR1, T1 \
|
|
\
|
|
VSUBCUQ T0, PL, CAR1 \ // VSCBIQ
|
|
VSUBUQM T0, PL, TT0 \
|
|
VSUBECUQ T1, PH, CAR1, CAR2 \ // VSBCBIQ
|
|
VSUBEUQM T1, PH, CAR1, TT1 \ // VSBIQ
|
|
VSUBEUQM T2, ZERO, CAR2, SEL1 \
|
|
\
|
|
VSEL TT0, T0, SEL1, T0 \
|
|
VSEL TT1, T1, SEL1, T1
|
|
|
|
TEXT ·gfpAddAsm(SB),0,$0-24
|
|
MOVD c+0(FP), R3
|
|
MOVD a+8(FP), R4
|
|
MOVD b+16(FP), R5
|
|
|
|
MOVD $16, R6
|
|
LXVD2X (R4)(R0), X1L
|
|
LXVD2X (R4)(R6), X1H
|
|
XXPERMDI X1H, X1H, $2, X1H
|
|
XXPERMDI X1L, X1L, $2, X1L
|
|
|
|
LXVD2X (R5)(R0), Y1L
|
|
LXVD2X (R5)(R6), Y1H
|
|
XXPERMDI Y1H, Y1H, $2, Y1H
|
|
XXPERMDI Y1L, Y1L, $2, Y1L
|
|
|
|
MOVD $·p2+0(SB), R7
|
|
LXVD2X (R7)(R0), PL
|
|
LXVD2X (R7)(R6), PH
|
|
XXPERMDI PH, PH, $2, PH
|
|
XXPERMDI PL, PL, $2, PL
|
|
|
|
VSPLTISB $0, ZERO
|
|
|
|
gfpAddInternal(T1, T0, X1H, X1L, Y1H, Y1L)
|
|
|
|
XXPERMDI T1, T1, $2, T1
|
|
XXPERMDI T0, T0, $2, T0
|
|
|
|
STXVD2X T0, (R0+R3)
|
|
STXVD2X T1, (R6+R3)
|
|
RET
|
|
|
|
TEXT ·gfpDoubleAsm(SB),0,$0-16
|
|
MOVD c+0(FP), R3
|
|
MOVD a+8(FP), R4
|
|
|
|
MOVD $16, R6
|
|
LXVD2X (R4)(R0), X1L
|
|
LXVD2X (R4)(R6), X1H
|
|
XXPERMDI X1H, X1H, $2, X1H
|
|
XXPERMDI X1L, X1L, $2, X1L
|
|
|
|
MOVD $·p2+0(SB), R7
|
|
LXVD2X (R7)(R0), PL
|
|
LXVD2X (R7)(R6), PH
|
|
XXPERMDI PH, PH, $2, PH
|
|
XXPERMDI PL, PL, $2, PL
|
|
|
|
VSPLTISB $0, ZERO
|
|
|
|
gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L)
|
|
|
|
XXPERMDI T1, T1, $2, T1
|
|
XXPERMDI T0, T0, $2, T0
|
|
|
|
STXVD2X T0, (R0+R3)
|
|
STXVD2X T1, (R6+R3)
|
|
RET
|
|
|
|
TEXT ·gfpTripleAsm(SB),0,$0-16
|
|
MOVD c+0(FP), R3
|
|
MOVD a+8(FP), R4
|
|
|
|
MOVD $16, R6
|
|
LXVD2X (R4)(R0), X1L
|
|
LXVD2X (R4)(R6), X1H
|
|
XXPERMDI X1H, X1H, $2, X1H
|
|
XXPERMDI X1L, X1L, $2, X1L
|
|
|
|
MOVD $·p2+0(SB), R7
|
|
LXVD2X (R7)(R0), PL
|
|
LXVD2X (R7)(R6), PH
|
|
XXPERMDI PH, PH, $2, PH
|
|
XXPERMDI PL, PL, $2, PL
|
|
|
|
VSPLTISB $0, ZERO
|
|
|
|
gfpAddInternal(T1, T0, X1H, X1L, X1H, X1L)
|
|
gfpAddInternal(T1, T0, T1, T0, X1H, X1L)
|
|
|
|
XXPERMDI T1, T1, $2, T1
|
|
XXPERMDI T0, T0, $2, T0
|
|
|
|
STXVD2X T0, (R0+R3)
|
|
STXVD2X T1, (R6+R3)
|
|
RET
|