gmsm/sm3/sm3blocks_s390x.s
2024-09-03 14:28:14 +08:00

65 lines
1.5 KiB
ArmAsm

// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
DATA mask<>+0x00(SB)/8, $0x0001020310111213
DATA mask<>+0x08(SB)/8, $0x0405060714151617
DATA mask<>+0x10(SB)/8, $0x08090a0b18191a1b
DATA mask<>+0x18(SB)/8, $0x0c0d0e0f1c1d1e1f
DATA mask<>+0x20(SB)/8, $0x0001020304050607
DATA mask<>+0x28(SB)/8, $0x1011121314151617
DATA mask<>+0x30(SB)/8, $0x08090a0b0c0d0e0f
DATA mask<>+0x38(SB)/8, $0x18191a1b1c1d1e1f
GLOBL mask<>(SB), 8, $64
#define TRANSPOSE_MATRIX(T0, T1, T2, T3, M1, M2, M3, M4, TMP0, TMP1, TMP2, TMP3) \
VPERM T0, T1, M0, TMP0; \
VPERM T2, T3, M0, TMP1; \
VPERM T0, T1, M1, TMP2; \
VPERM T2, T3, M1, TMP3; \
VPERM TMP0, TMP1, M2, T0; \
VPERM TMP0, TMP1, M3, T1; \
VPERM TMP2, TMP3, M2, T2; \
VPERM TMP2, TMP3, M3, T3
// transposeMatrix(dig **[8]uint32)
TEXT ·transposeMatrix(SB),NOSPLIT,$0
MOVD dig+0(FP), R1
MOVD (R1), R2
VL 0(R2), V0
VL 16(R2), V4
MOVD 8(R1), R2
VL 0(R2), V1
VL 16(R2), V5
MOVD 16(R1), R2
VL 0(R2), V2
VL 16(R2), V6
MOVD 24(R1), R2
VL 0(R2), V3
VL 16(R2), V7
MOVD $mask<>+0x00(SB), R2
VLM 0(R2), V8, V11
TRANSPOSE_MATRIX(V0, V1, V2, V3, V8, V9, V10, V11, V12, V13, V14, V15)
TRANSPOSE_MATRIX(V4, V5, V6, V7, V8, V9, V10, V11, V12, V13, V14, V15)
MOVD (R1), R2
VST V0, 0(R2)
VST V4, 16(R2)
MOVD 8(R1), R2
VST V1, 0(R2)
VST V5, 16(R2)
MOVD 16(R1), R2
VST V2, 0(R2)
VST V6, 16(R2)
MOVD 24(R1), R2
VST V3, 0(R2)
VST V7, 16(R2)
RET