gmsm/cipher/xts_s390x.s
2024-09-21 11:42:13 +08:00

122 lines
2.2 KiB
ArmAsm

// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
DATA xtsMask<>+0x00(SB)/8, $0x0f0e0d0c0b0a0908 // byte swap BE -> LE
DATA xtsMask<>+0x08(SB)/8, $0x0706050403020100
GLOBL xtsMask<>(SB), (NOPTR+RODATA), $16
#define BSWAP V0
#define POLY V1
#define B0 V2
#define T0 V3
#define T1 V4
#define CPOOL R3
#define doubleTweak(B0, BSWAP, POLY, T0, T1) \
VPERM B0, B0, BSWAP, B0 \// BE -> LE
\ // Multiply by 2
VESRAF $31, B0, T0 \
VREPF $0, T0, T0 \
VN POLY, T0, T0 \ // T0 for reduction
VREPIB $1, T1 \
VSL T1, B0, T1 \
VX T1, T0, B0 \
\
VPERM B0, B0, BSWAP, B0
#define gbDoubleTweak(B0, POLY, T0, T1) \
VESLF $31, B0, T0 \
VESRAF $31, T0, T0 \
VREPF $3, T0, T0 \
VN POLY, T0, T0 \ // T0 for reduction
\
VREPIB $1, T1 \
VSRL T1, B0, T1 \
VX T1, T0, B0
// func mul2(tweak *[blockSize]byte, isGB bool)
TEXT ·mul2(SB),NOSPLIT,$0
MOVD tweak+0(FP), R1
MOVB isGB+8(FP), R2
CMPBEQ R2, $1, gb_alg
MOVD $xtsMask<>+0x00(SB), CPOOL
VL (CPOOL), BSWAP
// Load polynomial for reduction
VZERO POLY
VLEIB $15, $0x87, POLY
// Load tweak
VL 0(R1), B0
doubleTweak(B0, BSWAP, POLY, T0, T1)
VST B0, 0(R1)
RET
gb_alg:
// Load polynomial for reduction
VZERO POLY
VLEIB $0, $0xe1, POLY
// Load tweak
VL 0(R1), B0
gbDoubleTweak(B0, POLY, T0, T1)
VST B0, 0(R1)
RET
// func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
TEXT ·doubleTweaks(SB),NOSPLIT,$0
MOVD tweak+0(FP), R1
MOVD tweaks+8(FP), R2
MOVD tweaks_len+16(FP), R3
MOVB isGB+32(FP), R4
AND $-16, R3
LAY (R2)(R3*1), R5
VL 0(R1), B0
CMPBEQ R4, $1, gb_alg
MOVD $xtsMask<>+0x00(SB), CPOOL
VL (CPOOL), BSWAP
// Load polynomial for reduction
VZERO POLY
VLEIB $15, $0x87, POLY
loop:
VST B0, 0(R2)
doubleTweak(B0, BSWAP, POLY, T0, T1)
LA 16(R2), R2
CMPBLT R2, R5, loop
VST B0, 0(R1)
RET
gb_alg:
// Load polynomial for reduction
VZERO POLY
VLEIB $0, $0xe1, POLY
gb_alg_loop:
VST B0, 0(R2)
gbDoubleTweak(B0, POLY, T0, T1)
LA 16(R2), R2
CMPBLT R2, R5, gb_alg_loop
VST B0, 0(R1)
RET