This commit is contained in:
Sun Yimin 2024-09-03 15:20:10 +08:00 committed by GitHub
parent fda15351c7
commit 40bc2f5f84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 20 deletions

View File

@ -5,6 +5,7 @@
//go:build !purego //go:build !purego
#include "textflag.h" #include "textflag.h"
#include "go_asm.h"
DATA mask<>+0x00(SB)/8, $0x0001020310111213 DATA mask<>+0x00(SB)/8, $0x0001020310111213
DATA mask<>+0x08(SB)/8, $0x0405060714151617 DATA mask<>+0x08(SB)/8, $0x0405060714151617
@ -31,34 +32,27 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0
MOVD dig+0(FP), R1 MOVD dig+0(FP), R1
MOVD (R1), R2 MOVD (R1), R2
VL 0(R2), V0 VLM (R2), V0, V1
VL 16(R2), V4
MOVD 8(R1), R2 MOVD 8(R1), R2
VL 0(R2), V1 VLM (R2), V2, V3
VL 16(R2), V5
MOVD 16(R1), R2 MOVD 16(R1), R2
VL 0(R2), V2 VLM (R2), V4, V5
VL 16(R2), V6
MOVD 24(R1), R2 MOVD 24(R1), R2
VL 0(R2), V3 VLM (R2), V6, V7
VL 16(R2), V7
MOVD $mask<>+0x00(SB), R2 MOVD $mask<>+0x00(SB), R2
VLM 0(R2), V8, V11 VLM (R2), V8, V11
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15) TRANSPOSE_MATRIX(V0, V2, V4, V6, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15) TRANSPOSE_MATRIX(V1, V3, V5, V7, V8, V9, V10, V11, V12, V13, V14, V15)
MOVD (R1), R2 MOVD (R1), R2
VST V0, 0(R2) VSTM V0, V1, (R2)
VST V4, 16(R2)
MOVD 8(R1), R2 MOVD 8(R1), R2
VST V1, 0(R2) VSTM V2, V3, (R2)
VST V5, 16(R2)
MOVD 16(R1), R2 MOVD 16(R1), R2
VST V2, 0(R2) VSTM V4, V5, (R2)
VST V6, 16(R2)
MOVD 24(R1), R2 MOVD 24(R1), R2
VST V3, 0(R2) VSTM V6, V7, (R2)
VST V7, 16(R2)
RET RET

View File

@ -9,15 +9,18 @@ import (
func TestTransposeMatrix(t *testing.T) { func TestTransposeMatrix(t *testing.T) {
var m [4][8]uint32 var m [4][8]uint32
var k uint32 = 0
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ { for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j) m[i][j] = k
k++
fmt.Printf("%04x ", m[i][j]) fmt.Printf("%04x ", m[i][j])
} }
fmt.Println() fmt.Println()
} }
input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]} input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]}
transposeMatrix(&input[0]) transposeMatrix(&input[0])
fmt.Println()
for i := 0; i < 4; i++ { for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ { for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j) m[i][j] = uint32(i*4 + j)