sm4: optimize amd64 sse, cbc decrypter

This commit is contained in:
Sun Yimin 2023-07-04 08:38:53 +08:00 committed by GitHub
parent a53659eb5b
commit 12ef9e0ef9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 24 additions and 39 deletions

View File

@ -83,7 +83,7 @@ GLOBL fk_mask<>(SB), 8, $16
// PUNPCKHQDQ r2, r3;
// PUNPCKLQDQ r2, tmp2;
// MOVOU tmp2, r2
#define SSE_TRANSPOSE_MATRIX(r, r0, r1, r2, r3, tmp1, tmp2) \
#define SSE_TRANSPOSE_MATRIX(r0, r1, r2, r3, tmp1, tmp2) \
MOVOU r0, tmp2; \
PUNPCKHLQ r1, tmp2; \
PUNPCKLLQ r1, r0; \

View File

@ -177,7 +177,7 @@ non_avx2_start:
PSHUFB flip_mask<>(SB), t1
PSHUFB flip_mask<>(SB), t2
PSHUFB flip_mask<>(SB), t3
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y)
XORL CX, CX
@ -191,7 +191,7 @@ loop:
CMPL CX, $4*32
JB loop
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y);
PSHUFB bswap_mask<>(SB), t3
PSHUFB bswap_mask<>(SB), t2
PSHUFB bswap_mask<>(SB), t1

View File

@ -156,29 +156,15 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
JE avx
non_avx2_start:
PINSRD $0, 0(DX), t0
PINSRD $1, 16(DX), t0
PINSRD $2, 32(DX), t0
PINSRD $3, 48(DX), t0
MOVOU 0(DX), t0
MOVOU 16(DX), t1
MOVOU 32(DX), t2
MOVOU 48(DX), t3
PSHUFB flip_mask<>(SB), t0
PINSRD $0, 4(DX), t1
PINSRD $1, 20(DX), t1
PINSRD $2, 36(DX), t1
PINSRD $3, 52(DX), t1
PSHUFB flip_mask<>(SB), t1
PINSRD $0, 8(DX), t2
PINSRD $1, 24(DX), t2
PINSRD $2, 40(DX), t2
PINSRD $3, 56(DX), t2
PSHUFB flip_mask<>(SB), t2
PINSRD $0, 12(DX), t3
PINSRD $1, 28(DX), t3
PINSRD $2, 44(DX), t3
PINSRD $3, 60(DX), t3
PSHUFB flip_mask<>(SB), t3
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y)
XORL CX, CX
@ -192,22 +178,21 @@ loop:
CMPL CX, $4*32
JB loop
PSHUFB flip_mask<>(SB), t3
PSHUFB flip_mask<>(SB), t2
PSHUFB flip_mask<>(SB), t1
PSHUFB flip_mask<>(SB), t0
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y);
PSHUFB bswap_mask<>(SB), t3
PSHUFB bswap_mask<>(SB), t2
PSHUFB bswap_mask<>(SB), t1
PSHUFB bswap_mask<>(SB), t0
SSE_TRANSPOSE_MATRIX(CX, t3, t2, t1, t0, XWORD, YWORD)
PXOR 0(SI), t0
PXOR 16(SI), t1
PXOR 32(SI), t2
PXOR 48(SI), t3
PXOR 0(SI), t3
PXOR 16(SI), t2
PXOR 32(SI), t1
PXOR 48(SI), t0
MOVUPS t3, 0(BX)
MOVUPS t2, 16(BX)
MOVUPS t1, 32(BX)
MOVUPS t0, 48(BX)
MOVUPS t0, 0(BX)
MOVUPS t1, 16(BX)
MOVUPS t2, 32(BX)
MOVUPS t3, 48(BX)
done_sm4:
RET

View File

@ -216,7 +216,7 @@ TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
PSHUFB flip_mask<>(SB), t1; \
PSHUFB flip_mask<>(SB), t2; \
PSHUFB flip_mask<>(SB), t3; \
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y); \
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y); \
XORL IND, IND; \
SM4_ROUND(0, RK, IND, x, y, z, t0, t1, t2, t3); \
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
@ -257,7 +257,7 @@ TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
SM4_ROUND(2, RK, IND, x, y, z, t2, t3, t0, t1); \
SM4_ROUND(3, RK, IND, x, y, z, t3, t0, t1, t2); \
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y); \
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y); \
PSHUFB BSWAP, t3; \
PSHUFB BSWAP, t2; \
PSHUFB BSWAP, t1; \