From ddea8332fefdd8181decdfac793b4d48f67dca66 Mon Sep 17 00:00:00 2001 From: emmansun Date: Sun, 20 Aug 2023 12:05:48 +0800 Subject: [PATCH] cipher: optimize gb gf(2^128) reduction --- README.md | 2 +- cipher/xts_amd64.s | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 66803e0..009290c 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ * **CFCA** - some cfca specific implementations. -* **CIPHER** - ECB/CCM/XTS cipher modes. +* **CIPHER** - ECB/CCM/XTS cipher modes, XTS mode also supports **GB/T 17964-2021**. * **SMX509** - a fork of golang X509 that supports ShangMi. diff --git a/cipher/xts_amd64.s b/cipher/xts_amd64.s index 8ad4a10..9384314 100644 --- a/cipher/xts_amd64.s +++ b/cipher/xts_amd64.s @@ -12,21 +12,16 @@ DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000 DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000 DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000 -DATA one<>+0x00(SB)/8, $0x0000000000000001 -DATA one<>+0x08(SB)/8, $0x0000000000000000 - GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16 GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16 GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16 -GLOBL one<>(SB), (NOPTR+RODATA), $16 #define POLY X0 #define BSWAP X1 -#define ONE X2 -#define B0 X3 -#define T0 X4 -#define T1 X5 +#define B0 X2 +#define T0 X3 +#define T1 X4 // func mul2(tweak *[blockSize]byte, isGB bool) TEXT ·mul2(SB),NOSPLIT,$0 @@ -58,8 +53,6 @@ TEXT ·mul2(SB),NOSPLIT,$0 gb_alg: MOVOU bswapMask<>(SB), BSWAP MOVOU gbGcmPoly<>(SB), POLY - MOVOU one<>(SB), ONE - PXOR X6, X6 PSHUFB BSWAP, B0 @@ -72,10 +65,10 @@ gb_alg: POR T0, B0 // reduction - PAND ONE, T1 PSHUFD $0, T1, T1 - PCMPEQL X6, T1 - PANDN POLY, T1 + PSLLL $31, T1 + PSRAL $31, T1 + PAND POLY, T1 PXOR T1, B0 PSHUFB BSWAP, B0 @@ -124,8 +117,6 @@ loop: dt_gb_alg: MOVOU bswapMask<>(SB), BSWAP MOVOU gbGcmPoly<>(SB), POLY - MOVOU one<>(SB), ONE - PXOR X6, X6 gb_loop: MOVOU B0, (0*16)(AX) @@ -142,10 +133,10 @@ gb_loop: POR T0, B0 // reduction - PAND ONE, T1 PSHUFD $0, T1, T1 - PCMPEQL X6, T1 - PANDN POLY, T1 + PSLLL $31, T1 + PSRAL $31, T1 + PAND POLY, T1 PXOR T1, B0 PSHUFB BSWAP, B0