sm4: ppc64x, cbc fix src and dst are same issue #249

This commit is contained in:
Sun Yimin 2024-09-13 10:34:07 +08:00 committed by GitHub
parent ee312709aa
commit fd9030c92d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 33 additions and 18 deletions

View File

@ -32,17 +32,9 @@
VPERM VS, VS, ESPERMW, VS \
STXVW4X VS, (RA+RB)
#define CBC_STXVW4X(VS, VT, RA, RB) \
VPERM VS, VS, ESPERMW, VS \
VXOR VS, VT, VS \
STXVW4X VS, (RA+RB)
#else
#define PPC64X_LXVW4X(RA,RB,VT) LXVW4X (RA+RB), VT
#define PPC64X_STXVW4X(VS, RA, RB) STXVW4X VS, (RA+RB)
#define CBC_STXVW4X(VS, VT, RA, RB) \
VXOR VS, VT, VS \
STXVW4X VS, (RA+RB)
#endif // defined(GOARCH_ppc64le)
// r = s <<< n

View File

@ -29,6 +29,20 @@
#include "aesni_macros_ppc64x.s"
#ifdef NEEDS_PERMW
#define REVERSE32LE_8BLOCKS \
VPERM V0, V0, ESPERMW, V0 \
VPERM V1, V1, ESPERMW, V1 \
VPERM V2, V2, ESPERMW, V2 \
VPERM V3, V3, ESPERMW, V3 \
VPERM V4, V4, ESPERMW, V4 \
VPERM V5, V5, ESPERMW, V5 \
VPERM V6, V6, ESPERMW, V6 \
VPERM V7, V7, ESPERMW, V7
#else
#define REVERSE32LE_8BLOCKS
#endif
// func decryptBlocksChain(xk *uint32, dst, src []byte, iv *byte)
TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
#define dstPtr R3
@ -105,23 +119,32 @@ loop8blocks:
TRANSPOSE_MATRIX(V0, V1, V2, V3)
TRANSPOSE_MATRIX(V4, V5, V6, V7)
REVERSE32LE_8BLOCKS // for ppc64le
LXVW4X (R16)(R0), TMP0
LXVW4X (R16)(R7), TMP1
LXVW4X (R16)(R8), TMP2
LXVW4X (R16)(R9), TMP3
CBC_STXVW4X(V0, TMP0, R17, R0)
CBC_STXVW4X(V1, TMP1, R17, R7)
CBC_STXVW4X(V2, TMP2, R17, R8)
CBC_STXVW4X(V3, TMP3, R17, R9)
VXOR V0, TMP0, V0
VXOR V1, TMP1, V1
VXOR V2, TMP2, V2
VXOR V3, TMP3, V3
LXVW4X (R16)(R10), TMP0
LXVW4X (R16)(R11), TMP1
LXVW4X (R16)(R12), TMP2
LXVW4X (R16)(R14), TMP3
CBC_STXVW4X(V4, TMP0, R17, R10)
CBC_STXVW4X(V5, TMP1, R17, R11)
CBC_STXVW4X(V6, TMP2, R17, R12)
CBC_STXVW4X(V7, TMP3, R17, R14)
VXOR V4, TMP0, V4
VXOR V5, TMP1, V5
VXOR V6, TMP2, V6
VXOR V7, TMP3, V7
STXVW4X V0, (R17)(R0)
STXVW4X V1, (R17)(R7)
STXVW4X V2, (R17)(R8)
STXVW4X V3, (R17)(R9)
STXVW4X V4, (R17)(R10)
STXVW4X V5, (R17)(R11)
STXVW4X V6, (R17)(R12)
STXVW4X V7, (R17)(R14)
CMP srcLen, $144 // 9 blocks
BGE loop8blocks