mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-22 10:16:18 +08:00
sm3: s390x transpose matrix poc
This commit is contained in:
parent
c7a68d9246
commit
e626a00982
10
sm3/sm3blocks_s390x.go
Normal file
10
sm3/sm3blocks_s390x.go
Normal file
@ -0,0 +1,10 @@
|
||||
// Copyright 2024 Sun Yimin. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
package sm3
|
||||
|
||||
//go:noescape
|
||||
func transposeMatrix(dig **[8]uint32)
|
64
sm3/sm3blocks_s390x.s
Normal file
64
sm3/sm3blocks_s390x.s
Normal file
@ -0,0 +1,64 @@
|
||||
// Copyright 2024 Sun Yimin. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA mask<>+0x00(SB)/8, $0x0001020310111213
|
||||
DATA mask<>+0x08(SB)/8, $0x0405060714151617
|
||||
DATA mask<>+0x10(SB)/8, $0x08090a0b18191a1b
|
||||
DATA mask<>+0x18(SB)/8, $0x0c0d0e0f1c1d1e1f
|
||||
DATA mask<>+0x20(SB)/8, $0x0001020304050607
|
||||
DATA mask<>+0x28(SB)/8, $0x1011121314151617
|
||||
DATA mask<>+0x30(SB)/8, $0x08090a0b0c0d0e0f
|
||||
DATA mask<>+0x38(SB)/8, $0x18191a1b1c1d1e1f
|
||||
GLOBL mask<>(SB), 8, $64
|
||||
|
||||
#define TRANSPOSE_MATRIX(T0, T1, T2, T3, M1, M2, M3, M4, TMP0, TMP1, TMP2, TMP3) \
|
||||
VPERM T0, T1, M0, TMP0; \
|
||||
VPERM T2, T3, M0, TMP1; \
|
||||
VPERM T0, T1, M1, TMP2; \
|
||||
VPERM T2, T3, M1, TMP3; \
|
||||
VPERM TMP0, TMP1, M2, T0; \
|
||||
VPERM TMP0, TMP1, M3, T1; \
|
||||
VPERM TMP2, TMP3, M2, T2; \
|
||||
VPERM TMP2, TMP3, M3, T3
|
||||
|
||||
// transposeMatrix(dig **[8]uint32)
|
||||
TEXT ·transposeMatrix(SB),NOSPLIT,$0
|
||||
MOVD dig+0(FP), R1
|
||||
|
||||
MOVD (R1), R2
|
||||
VL 0(R2), V0
|
||||
VL 16(R2), V4
|
||||
MOVD 8(R1), R2
|
||||
VL 0(R2), V1
|
||||
VL 16(R2), V5
|
||||
MOVD 16(R1), R2
|
||||
VL 0(R2), V2
|
||||
VL 16(R2), V6
|
||||
MOVD 24(R1), R2
|
||||
VL 0(R2), V3
|
||||
VL 16(R2), V7
|
||||
|
||||
MOVD $mask<>+0x00(SB), R2
|
||||
VLM 0(R2), V8, V11
|
||||
|
||||
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
|
||||
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
|
||||
|
||||
MOVD (R1), R2
|
||||
VST V0, 0(R2)
|
||||
VST V4, 16(R2)
|
||||
MOVD 8(R1), R2
|
||||
VST V1, 0(R2)
|
||||
VST V5, 16(R2)
|
||||
MOVD 16(R1), R2
|
||||
VST V2, 0(R2)
|
||||
VST V6, 16(R2)
|
||||
MOVD 24(R1), R2
|
||||
VST V3, 0(R2)
|
||||
VST V7, 16(R2)
|
||||
RET
|
28
sm3/sm3blocks_s390x_test.go
Normal file
28
sm3/sm3blocks_s390x_test.go
Normal file
@ -0,0 +1,28 @@
|
||||
//go:build s390x && !purego
|
||||
|
||||
package sm3
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestTransposeMatrix(t *testing.T) {
|
||||
var m [4][8]uint32
|
||||
for i := 0; i < 4; i++ {
|
||||
for j := 0; j < 8; j++ {
|
||||
m[i][j] = uint32(i*4 + j)
|
||||
fmt.Printf("%04x ", m[i][j])
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
input := [4]*[8]uint32{&m[0], &m[1], &m[2], &m[3]}
|
||||
transposeMatrix(&input[0])
|
||||
for i := 0; i < 4; i++ {
|
||||
for j := 0; j < 8; j++ {
|
||||
m[i][j] = uint32(i*4 + j)
|
||||
fmt.Printf("%04x ", m[i][j])
|
||||
}
|
||||
fmt.Println()
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user