mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
sm3: ppc64x, transpose matrix fix
This commit is contained in:
parent
bc4b8e691f
commit
379dbdd3fb
@ -6,26 +6,37 @@
|
|||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
DATA mask<>+0x00(SB)/8, $0x0001020310111213
|
// For P9 instruction emulation
|
||||||
DATA mask<>+0x08(SB)/8, $0x0405060714151617
|
#define ESPERM V21 // Endian swapping permute into BE
|
||||||
DATA mask<>+0x10(SB)/8, $0x08090a0b18191a1b
|
#define TMP2 V22 // Temporary for P8_STXVB16X/P8_STXVB16X
|
||||||
DATA mask<>+0x18(SB)/8, $0x0c0d0e0f1c1d1e1f
|
|
||||||
DATA mask<>+0x20(SB)/8, $0x0001020304050607
|
DATA ·mask+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // Permute for vector doubleword endian swap
|
||||||
DATA mask<>+0x28(SB)/8, $0x1011121314151617
|
DATA ·mask+0x08(SB)/8, $0x0706050403020100
|
||||||
DATA mask<>+0x30(SB)/8, $0x08090a0b0c0d0e0f
|
DATA ·mask+0x10(SB)/8, $0x0001020310111213 // Permute for transpose matrix
|
||||||
DATA mask<>+0x38(SB)/8, $0x18191a1b1c1d1e1f
|
DATA ·mask+0x18(SB)/8, $0x0405060714151617
|
||||||
GLOBL mask<>(SB), 8, $64
|
DATA ·mask+0x20(SB)/8, $0x08090a0b18191a1b
|
||||||
|
DATA ·mask+0x28(SB)/8, $0x0c0d0e0f1c1d1e1f
|
||||||
|
DATA ·mask+0x30(SB)/8, $0x0001020304050607
|
||||||
|
DATA ·mask+0x38(SB)/8, $0x1011121314151617
|
||||||
|
DATA ·mask+0x40(SB)/8, $0x08090a0b0c0d0e0f
|
||||||
|
DATA ·mask+0x48(SB)/8, $0x18191a1b1c1d1e1f
|
||||||
|
GLOBL ·mask(SB), RODATA, $80
|
||||||
|
|
||||||
#ifdef GOARCH_ppc64le
|
#ifdef GOARCH_ppc64le
|
||||||
# ifdef GOPPC64_power9
|
# ifdef GOPPC64_power9
|
||||||
#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
|
#define P8_LXVB16X(RA,RB,VT) LXVB16X (RA+RB), VT
|
||||||
#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
|
#define P8_STXVB16X(VS,RA,RB) STXVB16X VS, (RA+RB)
|
||||||
# else
|
# else
|
||||||
|
// On POWER8/ppc64le, emulate the POWER9 instructions by loading unaligned
|
||||||
|
// doublewords and byte-swapping each doubleword to emulate BE load/stores.
|
||||||
|
#define NEEDS_ESPERM
|
||||||
#define P8_LXVB16X(RA,RB,VT) \
|
#define P8_LXVB16X(RA,RB,VT) \
|
||||||
LXVD2X (RA+RB), VT
|
LXVD2X (RA+RB), VT \
|
||||||
|
VPERM VT, VT, ESPERM, VT
|
||||||
|
|
||||||
#define P8_STXVB16X(VS,RA,RB) \
|
#define P8_STXVB16X(VS,RA,RB) \
|
||||||
STXVD2X VS, (RA+RB)
|
VPERM VS, VS, ESPERM, TMP2 \
|
||||||
|
STXVD2X TMP2, (RA+RB)
|
||||||
|
|
||||||
# endif // defined(GOPPC64_power9)
|
# endif // defined(GOPPC64_power9)
|
||||||
#else
|
#else
|
||||||
@ -52,6 +63,19 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0
|
|||||||
MOVD $32, R8
|
MOVD $32, R8
|
||||||
MOVD $48, R9
|
MOVD $48, R9
|
||||||
|
|
||||||
|
#ifdef NEEDS_ESPERM
|
||||||
|
MOVD $·mask(SB), R4
|
||||||
|
LVX (R4), ESPERM
|
||||||
|
ADD $0x10, R4
|
||||||
|
#else
|
||||||
|
MOVD $·mask+0x10(SB), R4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
LVX (R0)(R4), V8
|
||||||
|
LVX (R6)(R4), V9
|
||||||
|
LVX (R8)(R4), V10
|
||||||
|
LVX (R9)(R4), V11
|
||||||
|
|
||||||
MOVD (R0)(R3), R4
|
MOVD (R0)(R3), R4
|
||||||
P8_LXVB16X(R4, R0, V0)
|
P8_LXVB16X(R4, R0, V0)
|
||||||
P8_LXVB16X(R4, R6, V4)
|
P8_LXVB16X(R4, R6, V4)
|
||||||
@ -65,12 +89,6 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0
|
|||||||
P8_LXVB16X(R4, R0, V3)
|
P8_LXVB16X(R4, R0, V3)
|
||||||
P8_LXVB16X(R4, R6, V7)
|
P8_LXVB16X(R4, R6, V7)
|
||||||
|
|
||||||
MOVD $mask<>+0x00(SB), R4
|
|
||||||
|
|
||||||
LVX (R0)(R4), V8
|
|
||||||
LVX (R6)(R4), V9
|
|
||||||
LVX (R8)(R4), V10
|
|
||||||
LVX (R9)(R4), V11
|
|
||||||
|
|
||||||
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
|
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
|
||||||
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
|
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user