This commit is contained in:
Sun Yimin 2024-09-03 15:20:10 +08:00 committed by GitHub
parent fda15351c7
commit 40bc2f5f84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 20 deletions

View File

@ -5,6 +5,7 @@
//go:build !purego
#include "textflag.h"
#include "go_asm.h"
DATA mask<>+0x00(SB)/8, $0x0001020310111213
DATA mask<>+0x08(SB)/8, $0x0405060714151617
@ -31,34 +32,27 @@ TEXT ·transposeMatrix(SB),NOSPLIT,$0
MOVD dig+0(FP), R1
MOVD (R1), R2
VL 0(R2), V0
VL 16(R2), V4
VLM (R2), V0, V1
MOVD 8(R1), R2
VL 0(R2), V1
VL 16(R2), V5
VLM (R2), V2, V3
MOVD 16(R1), R2
VL 0(R2), V2
VL 16(R2), V6
VLM (R2), V4, V5
MOVD 24(R1), R2
VL 0(R2), V3
VL 16(R2), V7
VLM (R2), V6, V7
MOVD $mask<>+0x00(SB), R2
VLM 0(R2), V8, V11
VLM (R2), V8, V11
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V0, V2, V4, V6, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V1, V3, V5, V7, V8, V9, V10, V11, V12, V13, V14, V15)
MOVD (R1), R2
VST V0, 0(R2)
VST V4, 16(R2)
VSTM V0, V1, (R2)
MOVD 8(R1), R2
VST V1, 0(R2)
VST V5, 16(R2)
VSTM V2, V3, (R2)
MOVD 16(R1), R2
VST V2, 0(R2)
VST V6, 16(R2)
VSTM V4, V5, (R2)
MOVD 24(R1), R2
VST V3, 0(R2)
VST V7, 16(R2)
VSTM V6, V7, (R2)
RET

View File

@ -9,15 +9,18 @@ import (
func TestTransposeMatrix(t *testing.T) {
var m [4][8]uint32
var k uint32 = 0
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j)
m[i][j] = k
k++
fmt.Printf("%04x ", m[i][j])
}
fmt.Println()
}
input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]}
transposeMatrix(&input[0])
fmt.Println()
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j)