diff --git a/sm4/aesni_macros_ppc64x.s b/sm4/aesni_macros_ppc64x.s index 9d6c64f..ac0809d 100644 --- a/sm4/aesni_macros_ppc64x.s +++ b/sm4/aesni_macros_ppc64x.s @@ -55,14 +55,14 @@ // t2 = t3.S2, t2.S2, t1.S2, t0.S2 // t3 = t3.S3, t2.S3, t1.S3, t0.S3 #define PRE_TRANSPOSE_MATRIX(T0, T1, T2, T3) \ - VPERM T0, T1, M0, TMP0; \ - VPERM T2, T3, M0, TMP1; \ - VPERM T0, T1, M1, TMP2; \ - VPERM T2, T3, M1, TMP3; \ - VPERM TMP0, TMP1, M2, T0; \ - VPERM TMP0, TMP1, M3, T1; \ - VPERM TMP2, TMP3, M2, T2; \ - VPERM TMP2, TMP3, M3, T3 + VMRGEW T0, T1, TMP0; \ + VMRGEW T2, T3, TMP1; \ + VMRGOW T0, T1, TMP2; \ + VMRGOW T2, T3, TMP3; \ + XXPERMDI TMP0, TMP1, $0, T0; \ + XXPERMDI TMP0, TMP1, $3, T2; \ + XXPERMDI TMP2, TMP3, $0, T1; \ + XXPERMDI TMP2, TMP3, $3, T3 // input: from high to low // t0 = t0.S3, t0.S2, t0.S1, t0.S0 @@ -75,14 +75,14 @@ // t2 = t0.S2, t1.S2, t2.S2, t3.S2 // t3 = t0.S3, t1.S3, t2.S3, t3.S3 #define TRANSPOSE_MATRIX(T0, T1, T2, T3) \ - VPERM T1, T0, M0, TMP0; \ - VPERM T1, T0, M1, TMP1; \ - VPERM T3, T2, M0, TMP2; \ - VPERM T3, T2, M1, TMP3; \ - VPERM TMP2, TMP0, M2, T0; \ - VPERM TMP2, TMP0, M3, T1; \ - VPERM TMP3, TMP1, M2, T2; \ - VPERM TMP3, TMP1, M3, T3; \ + VMRGEW T1, T0, TMP0; \ + VMRGEW T3, T2, TMP1; \ + VMRGOW T1, T0, TMP2; \ + VMRGOW T3, T2, TMP3; \ + XXPERMDI TMP1, TMP0, $0, T0; \ + XXPERMDI TMP1, TMP0, $3, T2; \ + XXPERMDI TMP3, TMP2, $0, T1; \ + XXPERMDI TMP3, TMP2, $3, T3 // Affine Transform // parameters: