sm3: s390x transpose matrix poc

This commit is contained in:
Sun Yimin 2024-09-03 14:28:14 +08:00 committed by GitHub
parent c7a68d9246
commit e626a00982
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 102 additions and 0 deletions

10
sm3/sm3blocks_s390x.go Normal file
View File

@ -0,0 +1,10 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
package sm3
//go:noescape
func transposeMatrix(dig **[8]uint32)

64
sm3/sm3blocks_s390x.s Normal file
View File

@ -0,0 +1,64 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
DATA mask<>+0x00(SB)/8, $0x0001020310111213
DATA mask<>+0x08(SB)/8, $0x0405060714151617
DATA mask<>+0x10(SB)/8, $0x08090a0b18191a1b
DATA mask<>+0x18(SB)/8, $0x0c0d0e0f1c1d1e1f
DATA mask<>+0x20(SB)/8, $0x0001020304050607
DATA mask<>+0x28(SB)/8, $0x1011121314151617
DATA mask<>+0x30(SB)/8, $0x08090a0b0c0d0e0f
DATA mask<>+0x38(SB)/8, $0x18191a1b1c1d1e1f
GLOBL mask<>(SB), 8, $64
#define TRANSPOSE_MATRIX(T0, T1, T2, T3, M1, M2, M3, M4, TMP0, TMP1, TMP2, TMP3) \
VPERM T0, T1, M0, TMP0; \
VPERM T2, T3, M0, TMP1; \
VPERM T0, T1, M1, TMP2; \
VPERM T2, T3, M1, TMP3; \
VPERM TMP0, TMP1, M2, T0; \
VPERM TMP0, TMP1, M3, T1; \
VPERM TMP2, TMP3, M2, T2; \
VPERM TMP2, TMP3, M3, T3
// transposeMatrix(dig **[8]uint32)
TEXT ·transposeMatrix(SB),NOSPLIT,$0
MOVD dig+0(FP), R1
MOVD (R1), R2
VL 0(R2), V0
VL 16(R2), V4
MOVD 8(R1), R2
VL 0(R2), V1
VL 16(R2), V5
MOVD 16(R1), R2
VL 0(R2), V2
VL 16(R2), V6
MOVD 24(R1), R2
VL 0(R2), V3
VL 16(R2), V7
MOVD $mask<>+0x00(SB), R2
VLM 0(R2), V8, V11
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
MOVD (R1), R2
VST V0, 0(R2)
VST V4, 16(R2)
MOVD 8(R1), R2
VST V1, 0(R2)
VST V5, 16(R2)
MOVD 16(R1), R2
VST V2, 0(R2)
VST V6, 16(R2)
MOVD 24(R1), R2
VST V3, 0(R2)
VST V7, 16(R2)
RET

View File

@ -0,0 +1,28 @@
//go:build s390x && !purego
package sm3
import (
"fmt"
"testing"
)
func TestTransposeMatrix(t *testing.T) {
var m [4][8]uint32
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j)
fmt.Printf("%04x ", m[i][j])
}
fmt.Println()
}
input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]}
transposeMatrix(&input[0])
for i := 0; i < 4; i++ {
for j := 0; j < 8; j++ {
m[i][j] = uint32(i*4 + j)
fmt.Printf("%04x ", m[i][j])
}
fmt.Println()
}
}