gmsm/cipher/xts_amd64.s

146 lines
2.2 KiB
ArmAsm
Raw Normal View History

//go:build !purego
#include "textflag.h"
DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f
DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607
DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087
DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000
DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000
DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000
GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16
GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16
GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
#define POLY X0
#define BSWAP X1
#define B0 X2
#define T0 X3
#define T1 X4
// func mul2(tweak *[blockSize]byte, isGB bool)
TEXT ·mul2(SB),NOSPLIT,$0
MOVQ tweak+0(FP), DI
MOVB isGB+8(FP), AX
MOVOU (0*16)(DI), B0
CMPB AX, $1
JE gb_alg
MOVOU gcmPoly<>(SB), POLY
// B0 * 2
PSHUFD $0xff, B0, T0
MOVOU B0, T1
PSRAL $31, T0 // T0 for reduction
PAND POLY, T0
PSRLL $31, T1
PSLLDQ $4, T1
PSLLL $1, B0
PXOR T0, B0
PXOR T1, B0
MOVOU B0, (0*16)(DI)
RET
gb_alg:
MOVOU bswapMask<>(SB), BSWAP
MOVOU gbGcmPoly<>(SB), POLY
PSHUFB BSWAP, B0
// B0 * 2
MOVOU B0, T0
PSHUFD $0, B0, T1
PSRLQ $1, B0
PSLLQ $63, T0
PSRLDQ $8, T0
POR T0, B0
// reduction
PSLLL $31, T1
PSRAL $31, T1
PAND POLY, T1
PXOR T1, B0
PSHUFB BSWAP, B0
MOVOU B0, (0*16)(DI)
RET
// func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
TEXT ·doubleTweaks(SB),NOSPLIT,$0
MOVQ tweak+0(FP), DI
MOVQ tweaks+8(FP), AX
MOVQ tweaks_len+16(FP), BX
MOVB isGB+32(FP), CX
MOVOU (0*16)(DI), B0
SHRQ $4, BX
XORQ DX, DX
CMPB CX, $1
JE dt_gb_alg
MOVOU gcmPoly<>(SB), POLY
loop:
MOVOU B0, (0*16)(AX)
LEAQ 16(AX), AX
// B0 * 2
PSHUFD $0xff, B0, T0
MOVOU B0, T1
PSRAL $31, T0 // T0 for reduction
PAND POLY, T0
PSRLL $31, T1
PSLLDQ $4, T1
PSLLL $1, B0
PXOR T0, B0
PXOR T1, B0
ADDQ $1, DX
CMPQ DX, BX
JB loop
MOVOU B0, (0*16)(DI)
RET
dt_gb_alg:
MOVOU bswapMask<>(SB), BSWAP
MOVOU gbGcmPoly<>(SB), POLY
gb_loop:
MOVOU B0, (0*16)(AX)
LEAQ 16(AX), AX
PSHUFB BSWAP, B0
// B0 * 2
MOVOU B0, T0
PSHUFD $0, B0, T1
PSRLQ $1, B0
PSLLQ $63, T0
PSRLDQ $8, T0
POR T0, B0
// reduction
PSLLL $31, T1
PSRAL $31, T1
PAND POLY, T1
PXOR T1, B0
PSHUFB BSWAP, B0
ADDQ $1, DX
CMPQ DX, BX
JB gb_loop
MOVOU B0, (0*16)(DI)
RET