diff --git a/sm3/sm3blocks_s390x.s b/sm3/sm3blocks_s390x.s index 40245a2..994bfdb 100644 --- a/sm3/sm3blocks_s390x.s +++ b/sm3/sm3blocks_s390x.s @@ -32,13 +32,17 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0 MOVD dig+0(FP), R1 MOVD (R1), R2 - VLM (R2), V0, V1 + VL (R2), V0 + VL 16(R2), V1 MOVD 8(R1), R2 - VLM (R2), V2, V3 + VL (R2), V2 + VL 16(R2), V3 MOVD 16(R1), R2 - VLM (R2), V4, V5 + VL (R2), V4 + VL 16(R2), V5 MOVD 24(R1), R2 - VLM (R2), V6, V7 + VL (R2), V6 + VL 16(R2), V7 MOVD $mask<>+0x00(SB), R2 VLM (R2), V8, V11 @@ -47,12 +51,16 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0 // TRANSPOSE_MATRIX(V1, V3, V5, V7, V8, V9, V10, V11, V12, V13, V14, V15) MOVD (R1), R2 - VSTM V0, V1, (R2) + VST V0, (R2) + VST V1, 16(R2) MOVD 8(R1), R2 - VSTM V2, V3, (R2) + VST V2, (R2) + VST V3, 16(R2) MOVD 16(R1), R2 - VSTM V4, V5, (R2) + VST V4, (R2) + VST V5, 16(R2) MOVD 24(R1), R2 - VSTM V6, V7, (R2) + VST V6, (R2) + VST V7, 16(R2) RET