sm4: ppc64x, cbc fix src and dst are same issue #249

This commit is contained in:
Sun Yimin 2024-09-13 10:34:07 +08:00 committed by GitHub
parent ee312709aa
commit fd9030c92d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 18 deletions

View File

@ -32,17 +32,9 @@
VPERM VS, VS, ESPERMW, VS \ VPERM VS, VS, ESPERMW, VS \
STXVW4X VS, (RA+RB) STXVW4X VS, (RA+RB)
#define CBC_STXVW4X(VS, VT, RA, RB) \
VPERM VS, VS, ESPERMW, VS \
VXOR VS, VT, VS \
STXVW4X VS, (RA+RB)
#else #else
#define PPC64X_LXVW4X(RA,RB,VT) LXVW4X (RA+RB), VT #define PPC64X_LXVW4X(RA,RB,VT) LXVW4X (RA+RB), VT
#define PPC64X_STXVW4X(VS, RA, RB) STXVW4X VS, (RA+RB) #define PPC64X_STXVW4X(VS, RA, RB) STXVW4X VS, (RA+RB)
#define CBC_STXVW4X(VS, VT, RA, RB) \
VXOR VS, VT, VS \
STXVW4X VS, (RA+RB)
#endif // defined(GOARCH_ppc64le) #endif // defined(GOARCH_ppc64le)
// r = s <<< n // r = s <<< n

View File

@ -29,6 +29,20 @@
#include "aesni_macros_ppc64x.s" #include "aesni_macros_ppc64x.s"
#ifdef NEEDS_PERMW
#define REVERSE32LE_8BLOCKS \
VPERM V0, V0, ESPERMW, V0 \
VPERM V1, V1, ESPERMW, V1 \
VPERM V2, V2, ESPERMW, V2 \
VPERM V3, V3, ESPERMW, V3 \
VPERM V4, V4, ESPERMW, V4 \
VPERM V5, V5, ESPERMW, V5 \
VPERM V6, V6, ESPERMW, V6 \
VPERM V7, V7, ESPERMW, V7
#else
#define REVERSE32LE_8BLOCKS
#endif
// func decryptBlocksChain(xk *uint32, dst, src []byte, iv *byte) // func decryptBlocksChain(xk *uint32, dst, src []byte, iv *byte)
TEXT ·decryptBlocksChain(SB),NOSPLIT,$0 TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
#define dstPtr R3 #define dstPtr R3
@ -105,23 +119,32 @@ loop8blocks:
TRANSPOSE_MATRIX(V0, V1, V2, V3) TRANSPOSE_MATRIX(V0, V1, V2, V3)
TRANSPOSE_MATRIX(V4, V5, V6, V7) TRANSPOSE_MATRIX(V4, V5, V6, V7)
REVERSE32LE_8BLOCKS // for ppc64le
LXVW4X (R16)(R0), TMP0 LXVW4X (R16)(R0), TMP0
LXVW4X (R16)(R7), TMP1 LXVW4X (R16)(R7), TMP1
LXVW4X (R16)(R8), TMP2 LXVW4X (R16)(R8), TMP2
LXVW4X (R16)(R9), TMP3 LXVW4X (R16)(R9), TMP3
CBC_STXVW4X(V0, TMP0, R17, R0) VXOR V0, TMP0, V0
CBC_STXVW4X(V1, TMP1, R17, R7) VXOR V1, TMP1, V1
CBC_STXVW4X(V2, TMP2, R17, R8) VXOR V2, TMP2, V2
CBC_STXVW4X(V3, TMP3, R17, R9) VXOR V3, TMP3, V3
LXVW4X (R16)(R10), TMP0 LXVW4X (R16)(R10), TMP0
LXVW4X (R16)(R11), TMP1 LXVW4X (R16)(R11), TMP1
LXVW4X (R16)(R12), TMP2 LXVW4X (R16)(R12), TMP2
LXVW4X (R16)(R14), TMP3 LXVW4X (R16)(R14), TMP3
CBC_STXVW4X(V4, TMP0, R17, R10) VXOR V4, TMP0, V4
CBC_STXVW4X(V5, TMP1, R17, R11) VXOR V5, TMP1, V5
CBC_STXVW4X(V6, TMP2, R17, R12) VXOR V6, TMP2, V6
CBC_STXVW4X(V7, TMP3, R17, R14) VXOR V7, TMP3, V7
STXVW4X V0, (R17)(R0)
STXVW4X V1, (R17)(R7)
STXVW4X V2, (R17)(R8)
STXVW4X V3, (R17)(R9)
STXVW4X V4, (R17)(R10)
STXVW4X V5, (R17)(R11)
STXVW4X V6, (R17)(R12)
STXVW4X V7, (R17)(R14)
CMP srcLen, $144 // 9 blocks CMP srcLen, $144 // 9 blocks
BGE loop8blocks BGE loop8blocks