mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 12:46:18 +08:00
sm4: optimize amd64 sse, cbc decrypter
This commit is contained in:
parent
a53659eb5b
commit
12ef9e0ef9
@ -83,7 +83,7 @@ GLOBL fk_mask<>(SB), 8, $16
|
|||||||
// PUNPCKHQDQ r2, r3;
|
// PUNPCKHQDQ r2, r3;
|
||||||
// PUNPCKLQDQ r2, tmp2;
|
// PUNPCKLQDQ r2, tmp2;
|
||||||
// MOVOU tmp2, r2
|
// MOVOU tmp2, r2
|
||||||
#define SSE_TRANSPOSE_MATRIX(r, r0, r1, r2, r3, tmp1, tmp2) \
|
#define SSE_TRANSPOSE_MATRIX(r0, r1, r2, r3, tmp1, tmp2) \
|
||||||
MOVOU r0, tmp2; \
|
MOVOU r0, tmp2; \
|
||||||
PUNPCKHLQ r1, tmp2; \
|
PUNPCKHLQ r1, tmp2; \
|
||||||
PUNPCKLLQ r1, r0; \
|
PUNPCKLLQ r1, r0; \
|
||||||
|
@ -177,7 +177,7 @@ non_avx2_start:
|
|||||||
PSHUFB flip_mask<>(SB), t1
|
PSHUFB flip_mask<>(SB), t1
|
||||||
PSHUFB flip_mask<>(SB), t2
|
PSHUFB flip_mask<>(SB), t2
|
||||||
PSHUFB flip_mask<>(SB), t3
|
PSHUFB flip_mask<>(SB), t3
|
||||||
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y)
|
||||||
|
|
||||||
XORL CX, CX
|
XORL CX, CX
|
||||||
|
|
||||||
@ -191,7 +191,7 @@ loop:
|
|||||||
CMPL CX, $4*32
|
CMPL CX, $4*32
|
||||||
JB loop
|
JB loop
|
||||||
|
|
||||||
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y);
|
||||||
PSHUFB bswap_mask<>(SB), t3
|
PSHUFB bswap_mask<>(SB), t3
|
||||||
PSHUFB bswap_mask<>(SB), t2
|
PSHUFB bswap_mask<>(SB), t2
|
||||||
PSHUFB bswap_mask<>(SB), t1
|
PSHUFB bswap_mask<>(SB), t1
|
||||||
|
@ -156,29 +156,15 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
|
|||||||
JE avx
|
JE avx
|
||||||
|
|
||||||
non_avx2_start:
|
non_avx2_start:
|
||||||
PINSRD $0, 0(DX), t0
|
MOVOU 0(DX), t0
|
||||||
PINSRD $1, 16(DX), t0
|
MOVOU 16(DX), t1
|
||||||
PINSRD $2, 32(DX), t0
|
MOVOU 32(DX), t2
|
||||||
PINSRD $3, 48(DX), t0
|
MOVOU 48(DX), t3
|
||||||
PSHUFB flip_mask<>(SB), t0
|
PSHUFB flip_mask<>(SB), t0
|
||||||
|
|
||||||
PINSRD $0, 4(DX), t1
|
|
||||||
PINSRD $1, 20(DX), t1
|
|
||||||
PINSRD $2, 36(DX), t1
|
|
||||||
PINSRD $3, 52(DX), t1
|
|
||||||
PSHUFB flip_mask<>(SB), t1
|
PSHUFB flip_mask<>(SB), t1
|
||||||
|
|
||||||
PINSRD $0, 8(DX), t2
|
|
||||||
PINSRD $1, 24(DX), t2
|
|
||||||
PINSRD $2, 40(DX), t2
|
|
||||||
PINSRD $3, 56(DX), t2
|
|
||||||
PSHUFB flip_mask<>(SB), t2
|
PSHUFB flip_mask<>(SB), t2
|
||||||
|
|
||||||
PINSRD $0, 12(DX), t3
|
|
||||||
PINSRD $1, 28(DX), t3
|
|
||||||
PINSRD $2, 44(DX), t3
|
|
||||||
PINSRD $3, 60(DX), t3
|
|
||||||
PSHUFB flip_mask<>(SB), t3
|
PSHUFB flip_mask<>(SB), t3
|
||||||
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y)
|
||||||
|
|
||||||
XORL CX, CX
|
XORL CX, CX
|
||||||
|
|
||||||
@ -192,22 +178,21 @@ loop:
|
|||||||
CMPL CX, $4*32
|
CMPL CX, $4*32
|
||||||
JB loop
|
JB loop
|
||||||
|
|
||||||
PSHUFB flip_mask<>(SB), t3
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y);
|
||||||
PSHUFB flip_mask<>(SB), t2
|
PSHUFB bswap_mask<>(SB), t3
|
||||||
PSHUFB flip_mask<>(SB), t1
|
PSHUFB bswap_mask<>(SB), t2
|
||||||
PSHUFB flip_mask<>(SB), t0
|
PSHUFB bswap_mask<>(SB), t1
|
||||||
|
PSHUFB bswap_mask<>(SB), t0
|
||||||
|
|
||||||
SSE_TRANSPOSE_MATRIX(CX, t3, t2, t1, t0, XWORD, YWORD)
|
PXOR 0(SI), t0
|
||||||
|
PXOR 16(SI), t1
|
||||||
|
PXOR 32(SI), t2
|
||||||
|
PXOR 48(SI), t3
|
||||||
|
|
||||||
PXOR 0(SI), t3
|
MOVUPS t0, 0(BX)
|
||||||
PXOR 16(SI), t2
|
MOVUPS t1, 16(BX)
|
||||||
PXOR 32(SI), t1
|
MOVUPS t2, 32(BX)
|
||||||
PXOR 48(SI), t0
|
MOVUPS t3, 48(BX)
|
||||||
|
|
||||||
MOVUPS t3, 0(BX)
|
|
||||||
MOVUPS t2, 16(BX)
|
|
||||||
MOVUPS t1, 32(BX)
|
|
||||||
MOVUPS t0, 48(BX)
|
|
||||||
|
|
||||||
done_sm4:
|
done_sm4:
|
||||||
RET
|
RET
|
||||||
|
@ -216,7 +216,7 @@ TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
|
|||||||
PSHUFB flip_mask<>(SB), t1; \
|
PSHUFB flip_mask<>(SB), t1; \
|
||||||
PSHUFB flip_mask<>(SB), t2; \
|
PSHUFB flip_mask<>(SB), t2; \
|
||||||
PSHUFB flip_mask<>(SB), t3; \
|
PSHUFB flip_mask<>(SB), t3; \
|
||||||
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y); \
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y); \
|
||||||
XORL IND, IND; \
|
XORL IND, IND; \
|
||||||
SM4_ROUND(0, RK, IND, x, y, z, t0, t1, t2, t3); \
|
SM4_ROUND(0, RK, IND, x, y, z, t0, t1, t2, t3); \
|
||||||
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
|
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
|
||||||
@ -257,7 +257,7 @@ TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
|
|||||||
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
|
SM4_ROUND(1, RK, IND, x, y, z, t1, t2, t3, t0); \
|
||||||
SM4_ROUND(2, RK, IND, x, y, z, t2, t3, t0, t1); \
|
SM4_ROUND(2, RK, IND, x, y, z, t2, t3, t0, t1); \
|
||||||
SM4_ROUND(3, RK, IND, x, y, z, t3, t0, t1, t2); \
|
SM4_ROUND(3, RK, IND, x, y, z, t3, t0, t1, t2); \
|
||||||
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y); \
|
SSE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y); \
|
||||||
PSHUFB BSWAP, t3; \
|
PSHUFB BSWAP, t3; \
|
||||||
PSHUFB BSWAP, t2; \
|
PSHUFB BSWAP, t2; \
|
||||||
PSHUFB BSWAP, t1; \
|
PSHUFB BSWAP, t1; \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user