From 16219eef8ab0fc7e6378113b3be0783ec3519893 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 7 Nov 2024 17:19:53 +0800 Subject: [PATCH] ppc64x: sm4/zuc reduce VAND --- sm4/aesni_macros_ppc64x.s | 6 ++---- sm4/asm_ppc64x.s | 6 +++--- sm4/cbc_ppc64x.s | 2 +- sm4/ecb_ppc64x.s | 2 +- zuc/asm_ppc64x.s | 5 ++--- 5 files changed, 9 insertions(+), 12 deletions(-) diff --git a/sm4/aesni_macros_ppc64x.s b/sm4/aesni_macros_ppc64x.s index c2a37f8..3d7ab9b 100644 --- a/sm4/aesni_macros_ppc64x.s +++ b/sm4/aesni_macros_ppc64x.s @@ -86,8 +86,7 @@ #define AFFINE_TRANSFORM(L, H, V_FOUR, x, y, z) \ VAND NIBBLE_MASK, x, z; \ VPERM L, L, z, y; \ - VSRD x, V_FOUR, x; \ - VAND NIBBLE_MASK, x, z; \ + VSRB x, V_FOUR, z; \ VPERM H, H, z, x; \ VXOR y, x, x @@ -102,8 +101,7 @@ VNOR x, x, z; \ // z = NOT(x) VAND NIBBLE_MASK, z, z; \ VPERM L, L, z, y; \ - VSRD x, V_FOUR, x; \ - VAND NIBBLE_MASK, x, z; \ + VSRB x, V_FOUR, z; \ VPERM H, H, z, x; \ VXOR y, x, x diff --git a/sm4/asm_ppc64x.s b/sm4/asm_ppc64x.s index 54f092b..85dc631 100644 --- a/sm4/asm_ppc64x.s +++ b/sm4/asm_ppc64x.s @@ -60,7 +60,7 @@ GLOBL ·rcon(SB), RODATA, $112 // func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int) TEXT ·expandKeyAsm(SB),NOSPLIT,$0 // prepare/load constants - VSPLTISW $4, V_FOUR; + VSPLTISB $4, V_FOUR; #ifdef NEEDS_PERMW MOVD $·rcon(SB), R4 LVX (R4), ESPERMW @@ -115,7 +115,7 @@ ksLoop: // func encryptBlockAsm(xk *uint32, dst, src *byte, inst int) TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 // prepare/load constants - VSPLTISW $4, V_FOUR; + VSPLTISB $4, V_FOUR; #ifdef NEEDS_PERMW MOVD $·rcon(SB), R4 LVX (R4), ESPERMW @@ -156,7 +156,7 @@ encryptBlockLoop: // func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int) TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0 // prepare/load constants - VSPLTISW $4, V_FOUR; + VSPLTISB $4, V_FOUR; #ifdef NEEDS_PERMW MOVD $·rcon(SB), R4 LVX (R4), ESPERMW diff --git a/sm4/cbc_ppc64x.s b/sm4/cbc_ppc64x.s index 8371224..e1f9b0e 100644 --- a/sm4/cbc_ppc64x.s +++ b/sm4/cbc_ppc64x.s @@ -45,7 +45,7 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0 #define rk R5 #define srcLen R6 // prepare/load constants - VSPLTISW $4, V_FOUR; + VSPLTISB $4, V_FOUR; #ifdef NEEDS_PERMW MOVD $·rcon(SB), R4 LVX (R4), ESPERMW diff --git a/sm4/ecb_ppc64x.s b/sm4/ecb_ppc64x.s index 14b3316..8d61b07 100644 --- a/sm4/ecb_ppc64x.s +++ b/sm4/ecb_ppc64x.s @@ -30,7 +30,7 @@ TEXT ·encryptSm4Ecb(SB),NOSPLIT,$0 #define rk R5 #define srcLen R6 // prepare/load constants - VSPLTISW $4, V_FOUR; + VSPLTISB $4, V_FOUR; #ifdef NEEDS_PERMW MOVD $·rcon(SB), R4 LVX (R4), ESPERMW diff --git a/zuc/asm_ppc64x.s b/zuc/asm_ppc64x.s index b32b64d..d8e6999 100644 --- a/zuc/asm_ppc64x.s +++ b/zuc/asm_ppc64x.s @@ -42,7 +42,7 @@ GLOBL rcon<>(SB), RODATA, $160 #define P3 V30 #define LOAD_CONSTS \ - VSPLTISW $4, V_FOUR \ + VSPLTISB $4, V_FOUR \ MOVD $rcon<>+0x00(SB), R4 \ LXVD2X (R4)(R0), NIBBLE_MASK \ MOVD $0x10, R5 \ @@ -89,8 +89,7 @@ GLOBL rcon<>(SB), RODATA, $160 #define AFFINE_TRANSFORM(L, H, V_FOUR, x, y, z) \ VAND NIBBLE_MASK, x, z; \ VPERM L, L, z, y; \ - VSRD x, V_FOUR, x; \ - VAND NIBBLE_MASK, x, z; \ + VSRB x, V_FOUR, z; \ VPERM H, H, z, x; \ VXOR y, x, x