From 40bc2f5f84f9fcd882995168c988b7d18809ddcb Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 3 Sep 2024 15:20:10 +0800 Subject: [PATCH] debug 1 --- sm3/sm3blocks_s390x.s | 32 +++++++++++++------------------- sm3/sm3blocks_s390x_test.go | 5 ++++- 2 files changed, 17 insertions(+), 20 deletions(-) diff --git a/sm3/sm3blocks_s390x.s b/sm3/sm3blocks_s390x.s index 114d798..b9a532b 100644 --- a/sm3/sm3blocks_s390x.s +++ b/sm3/sm3blocks_s390x.s @@ -5,6 +5,7 @@ //go:build !purego #include "textflag.h" +#include "go_asm.h" DATA mask<>+0x00(SB)/8, $0x0001020310111213 DATA mask<>+0x08(SB)/8, $0x0405060714151617 @@ -31,34 +32,27 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0 MOVD dig+0(FP), R1 MOVD (R1), R2 - VL 0(R2), V0 - VL 16(R2), V4 + VLM (R2), V0, V1 MOVD 8(R1), R2 - VL 0(R2), V1 - VL 16(R2), V5 + VLM (R2), V2, V3 MOVD 16(R1), R2 - VL 0(R2), V2 - VL 16(R2), V6 + VLM (R2), V4, V5 MOVD 24(R1), R2 - VL 0(R2), V3 - VL 16(R2), V7 + VLM (R2), V6, V7 MOVD $mask<>+0x00(SB), R2 - VLM 0(R2), V8, V11 + VLM (R2), V8, V11 - TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15) - TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15) + TRANSPOSE_MATRIX(V0, V2, V4, V6, V8, V9, V10, V11, V12, V13, V14, V15) + TRANSPOSE_MATRIX(V1, V3, V5, V7, V8, V9, V10, V11, V12, V13, V14, V15) MOVD (R1), R2 - VST V0, 0(R2) - VST V4, 16(R2) + VSTM V0, V1, (R2) MOVD 8(R1), R2 - VST V1, 0(R2) - VST V5, 16(R2) + VSTM V2, V3, (R2) MOVD 16(R1), R2 - VST V2, 0(R2) - VST V6, 16(R2) + VSTM V4, V5, (R2) MOVD 24(R1), R2 - VST V3, 0(R2) - VST V7, 16(R2) + VSTM V6, V7, (R2) + RET diff --git a/sm3/sm3blocks_s390x_test.go b/sm3/sm3blocks_s390x_test.go index eda869e..08a29ff 100644 --- a/sm3/sm3blocks_s390x_test.go +++ b/sm3/sm3blocks_s390x_test.go @@ -9,15 +9,18 @@ import ( func TestTransposeMatrix(t *testing.T) { var m [4][8]uint32 + var k uint32 = 0 for i := 0; i < 4; i++ { for j := 0; j < 8; j++ { - m[i][j] = uint32(i*4 + j) + m[i][j] = k + k++ fmt.Printf("%04x ", m[i][j]) } fmt.Println() } input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]} transposeMatrix(&input[0]) + fmt.Println() for i := 0; i < 4; i++ { for j := 0; j < 8; j++ { m[i][j] = uint32(i*4 + j)