diff --git a/sm3/sm3blocks_ppc64x.s b/sm3/sm3blocks_ppc64x.s index 47ef3cb..b0854cc 100644 --- a/sm3/sm3blocks_ppc64x.s +++ b/sm3/sm3blocks_ppc64x.s @@ -70,34 +70,50 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0 LXVD2X (R9)(R4), V11 MOVD (R0)(R3), R4 - LOADWORDS(R4, R0, V0) - LOADWORDS(R4, R6, V4) + LXVW4X (R0)(R4), V0 + LXVW4X (R6)(R4), V4 + //LOADWORDS(R4, R0, V0) + //LOADWORDS(R4, R6, V4) MOVD (R5)(R3), R4 - LOADWORDS(R4, R0, V1) - LOADWORDS(R4, R6, V5) + LXVW4X (R0)(R4), V1 + LXVW4X (R6)(R4), V5 + //LOADWORDS(R4, R0, V1) + //LOADWORDS(R4, R6, V5) MOVD (R6)(R3), R4 - LOADWORDS(R4, R0, V2) - LOADWORDS(R4, R6, V6) + LXVW4X (R0)(R4), V2 + LXVW4X (R6)(R4), V6 + //LOADWORDS(R4, R0, V2) + //LOADWORDS(R4, R6, V6) MOVD (R7)(R3), R4 - LOADWORDS(R4, R0, V3) - LOADWORDS(R4, R6, V7) + LXVW4X (R0)(R4), V3 + LXVW4X (R6)(R4), V7 + //LOADWORDS(R4, R0, V3) + //LOADWORDS(R4, R6, V7) TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15) TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15) MOVD (R0)(R3), R4 - STOREWORDS(V0, R4, R0) - STOREWORDS(V4, R4, R6) + STXVW4X V0, (R0)(R4) + STXVW4X V4, (R6)(R4) + //STOREWORDS(V0, R4, R0) + //STOREWORDS(V4, R4, R6) MOVD (R5)(R3), R4 - STOREWORDS(V1, R4, R0) - STOREWORDS(V5, R4, R6) + STXVW4X V1, (R0)(R4) + STXVW4X V5, (R6)(R4) + //STOREWORDS(V1, R4, R0) + //STOREWORDS(V5, R4, R6) MOVD (R6)(R3), R4 - STOREWORDS(V2, R4, R0) - STOREWORDS(V6, R4, R6) + STXVW4X V2, (R0)(R4) + STXVW4X V6, (R6)(R4) + //STOREWORDS(V2, R4, R0) + //STOREWORDS(V6, R4, R6) MOVD (R7)(R3), R4 - STOREWORDS(V3, R4, R0) - STOREWORDS(V7, R4, R6) + STXVW4X V3, (R0)(R4) + STXVW4X V7, (R6)(R4) + //STOREWORDS(V3, R4, R0) + //STOREWORDS(V7, R4, R6) RET