//go:build !purego #include "textflag.h" DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607 DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087 DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000 DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000 DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000 GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16 GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16 GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16 #define POLY X0 #define BSWAP X1 #define B0 X2 #define T0 X3 #define T1 X4 #define doubleTweak(B0, POLY, T0, T1) \ \ // B0 * 2 PSHUFD $0xff, B0, T0 \ MOVOU B0, T1 \ PSRAL $31, T0 \ // T0 for reduction PAND POLY, T0 \ PSRLL $31, T1 \ PSLLDQ $4, T1 \ PSLLL $1, B0 \ PXOR T0, B0 \ PXOR T1, B0 #define gbDoubleTweak(B0, BSWAP, POLY, T0, T1) \ PSHUFB BSWAP, B0 \ \ // B0 * 2 MOVOU B0, T0 \ PSHUFD $0, B0, T1 \ PSRLQ $1, B0 \ PSLLQ $63, T0 \ PSRLDQ $8, T0 \ POR T0, B0 \ \ // reduction PSLLL $31, T1 \ PSRAL $31, T1 \ PAND POLY, T1 \ PXOR T1, B0 \ PSHUFB BSWAP, B0 // func mul2(tweak *[blockSize]byte, isGB bool) TEXT ·mul2(SB),NOSPLIT,$0 MOVQ tweak+0(FP), DI MOVB isGB+8(FP), AX MOVOU (0*16)(DI), B0 CMPB AX, $1 JE gb_alg MOVOU gcmPoly<>(SB), POLY doubleTweak(B0, POLY, T0, T1) MOVOU B0, (0*16)(DI) RET gb_alg: MOVOU bswapMask<>(SB), BSWAP MOVOU gbGcmPoly<>(SB), POLY gbDoubleTweak(B0, BSWAP, POLY, T0, T1) MOVOU B0, (0*16)(DI) RET // func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) TEXT ·doubleTweaks(SB),NOSPLIT,$0 MOVQ tweak+0(FP), DI MOVQ tweaks+8(FP), AX MOVQ tweaks_len+16(FP), BX MOVB isGB+32(FP), CX MOVOU (0*16)(DI), B0 SHRQ $4, BX XORQ DX, DX CMPB CX, $1 JE dt_gb_alg MOVOU gcmPoly<>(SB), POLY loop: MOVOU B0, (0*16)(AX) LEAQ 16(AX), AX doubleTweak(B0, POLY, T0, T1) ADDQ $1, DX CMPQ DX, BX JB loop MOVOU B0, (0*16)(DI) RET dt_gb_alg: MOVOU bswapMask<>(SB), BSWAP MOVOU gbGcmPoly<>(SB), POLY gb_loop: MOVOU B0, (0*16)(AX) LEAQ 16(AX), AX gbDoubleTweak(B0, BSWAP, POLY, T0, T1) ADDQ $1, DX CMPQ DX, BX JB gb_loop MOVOU B0, (0*16)(DI) RET