gmsm/sm3/sm3blocks_ppc64x.s

162 lines
3.6 KiB
ArmAsm
Raw Normal View History

2024-09-05 12:55:13 +08:00
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
#include "textflag.h"
2024-09-05 13:46:41 +08:00
// For P9 instruction emulation
2024-09-05 16:43:24 +08:00
#define ESPERMW V21 // Endian swapping permute into BE
2024-09-05 17:33:12 +08:00
#define TMP2 V22 // Temporary for STOREWORDS
2024-09-05 13:46:41 +08:00
2024-09-05 16:43:24 +08:00
DATA ·mask+0x00(SB)/8, $0x0c0d0e0f08090a0b // Permute for vector doubleword endian swap
DATA ·mask+0x08(SB)/8, $0x0405060700010203
2024-09-05 13:46:41 +08:00
DATA ·mask+0x10(SB)/8, $0x0001020310111213 // Permute for transpose matrix
DATA ·mask+0x18(SB)/8, $0x0405060714151617
DATA ·mask+0x20(SB)/8, $0x08090a0b18191a1b
DATA ·mask+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f
DATA ·mask+0x30(SB)/8, $0x0001020304050607
DATA ·mask+0x38(SB)/8, $0x1011121314151617
DATA ·mask+0x40(SB)/8, $0x08090a0b0c0d0e0f
DATA ·mask+0x48(SB)/8, $0x18191a1b1c1d1e1f
2024-09-06 08:45:15 +08:00
DATA ·mask+0x50(SB)/8, $0x0b0a09080f0e0d0c // Permute for vector doubleword endian swap
DATA ·mask+0x58(SB)/8, $0x0302010007060504
GLOBL ·mask(SB), RODATA, $96
2024-09-05 12:55:13 +08:00
#ifdef GOARCH_ppc64le
2024-09-05 13:46:41 +08:00
#define NEEDS_ESPERM
2024-09-05 16:43:24 +08:00
#define LOADWORDS(RA,RB,VT) \
2024-09-05 13:46:41 +08:00
LXVD2X (RA+RB), VT \
2024-09-05 16:43:24 +08:00
VPERM VT, VT, ESPERMW, VT
2024-09-05 12:55:13 +08:00
2024-09-05 16:43:24 +08:00
#define STOREWORDS(VS,RA,RB) \
VPERM VS, VS, ESPERMW, TMP2 \
2024-09-05 13:46:41 +08:00
STXVD2X TMP2, (RA+RB)
2024-09-05 13:31:23 +08:00
2024-09-05 12:55:13 +08:00
#else
2024-09-05 16:43:24 +08:00
#define LOADWORDS(RA,RB,VT) LXVD2X (RA+RB), VT
#define STOREWORDS(VS,RA,RB) STXVD2X VS, (RA+RB)
2024-09-05 13:03:23 +08:00
#endif // defined(GOARCH_ppc64le)
2024-09-05 12:55:13 +08:00
#define TRANSPOSE_MATRIX(T0, T1, T2, T3, M0, M1, M2, M3, TMP0, TMP1, TMP2, TMP3) \
VPERM T0, T1, M0, TMP0; \
VPERM T2, T3, M0, TMP1; \
VPERM T0, T1, M1, TMP2; \
VPERM T2, T3, M1, TMP3; \
VPERM TMP0, TMP1, M2, T0; \
VPERM TMP0, TMP1, M3, T1; \
VPERM TMP2, TMP3, M2, T2; \
VPERM TMP2, TMP3, M3, T3
// transposeMatrix(dig **[8]uint32)
TEXT ·transposeMatrix(SB),NOSPLIT,$0
MOVD dig+0(FP), R3
MOVD $8, R5
MOVD $16, R6
MOVD $24, R7
MOVD $32, R8
MOVD $48, R9
2024-09-05 13:46:41 +08:00
#ifdef NEEDS_ESPERM
MOVD $·mask(SB), R4
2024-09-05 16:43:24 +08:00
LVX (R4), ESPERMW
2024-09-05 13:46:41 +08:00
ADD $0x10, R4
#else
MOVD $·mask+0x10(SB), R4
#endif
2024-09-05 16:43:24 +08:00
LXVD2X (R0)(R4), V8
LXVD2X (R6)(R4), V9
LXVD2X (R8)(R4), V10
LXVD2X (R9)(R4), V11
2024-09-05 13:46:41 +08:00
2024-09-05 12:55:13 +08:00
MOVD (R0)(R3), R4
2024-09-06 08:19:02 +08:00
LXVW4X (R0)(R4), V0
LXVW4X (R6)(R4), V4
2024-09-05 12:55:13 +08:00
MOVD (R5)(R3), R4
2024-09-06 08:19:02 +08:00
LXVW4X (R0)(R4), V1
LXVW4X (R6)(R4), V5
2024-09-05 12:55:13 +08:00
MOVD (R6)(R3), R4
2024-09-06 08:19:02 +08:00
LXVW4X (R0)(R4), V2
LXVW4X (R6)(R4), V6
2024-09-05 12:55:13 +08:00
MOVD (R7)(R3), R4
2024-09-06 08:19:02 +08:00
LXVW4X (R0)(R4), V3
LXVW4X (R6)(R4), V7
2024-09-05 12:55:13 +08:00
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
MOVD (R0)(R3), R4
2024-09-06 09:43:11 +08:00
VSPLTISW $4, TMP2
2024-09-06 09:46:07 +08:00
VRLW V0, TMP2, V0
VRLW V4, TMP2, V4
2024-09-06 08:19:02 +08:00
STXVW4X V0, (R0)(R4)
STXVW4X V4, (R6)(R4)
2024-09-05 12:55:13 +08:00
MOVD (R5)(R3), R4
2024-09-06 08:19:02 +08:00
STXVW4X V1, (R0)(R4)
STXVW4X V5, (R6)(R4)
2024-09-05 12:55:13 +08:00
MOVD (R6)(R3), R4
2024-09-06 08:19:02 +08:00
STXVW4X V2, (R0)(R4)
STXVW4X V6, (R6)(R4)
2024-09-05 12:55:13 +08:00
MOVD (R7)(R3), R4
2024-09-06 08:19:02 +08:00
STXVW4X V3, (R0)(R4)
STXVW4X V7, (R6)(R4)
2024-09-05 12:55:13 +08:00
RET
2024-09-05 17:33:12 +08:00
2024-09-06 08:45:15 +08:00
#ifdef GOARCH_ppc64le
2024-09-06 08:48:53 +08:00
#define NEEDS_PERMW
2024-09-06 08:45:15 +08:00
#define PPC64X_STXVD2X(VS,RA,RB) \
VPERM VS, VS, ESPERMW, TMP2 \
STXVD2X TMP2, (RA+RB)
#else
2024-09-06 08:48:53 +08:00
#define PPC64X_STXVD2X(VS,RA,RB) STXVD2X VS, (RA+RB)
2024-09-06 08:45:15 +08:00
#endif // defined(GOARCH_ppc64le)
2024-09-05 17:33:12 +08:00
// func copyResultsBy4(dig *uint32, dst *byte)
TEXT ·copyResultsBy4(SB),NOSPLIT,$0
MOVD dig+0(FP), R3
MOVD dst+8(FP), R4
2024-09-06 08:45:15 +08:00
2024-09-06 08:48:53 +08:00
#ifdef NEEDS_PERMW
2024-09-06 08:52:38 +08:00
MOVD $·mask+0x50(SB), R5
2024-09-06 08:45:15 +08:00
LVX (R5), ESPERMW
#endif
2024-09-05 17:33:12 +08:00
LXVD2X (R0)(R3), V0
2024-09-06 08:45:15 +08:00
PPC64X_STXVD2X(V0, R0, R4)
2024-09-05 17:33:12 +08:00
MOVD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
ADD $16, R5
LXVD2X (R5)(R3), V0
2024-09-06 08:52:38 +08:00
PPC64X_STXVD2X(V0, R5, R4)
2024-09-05 17:33:12 +08:00
RET