mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
sm4: optimize sse version
This commit is contained in:
parent
f10b09f5a2
commit
a53659eb5b
@ -84,60 +84,19 @@ GLOBL fk_mask<>(SB), 8, $16
|
|||||||
// PUNPCKLQDQ r2, tmp2;
|
// PUNPCKLQDQ r2, tmp2;
|
||||||
// MOVOU tmp2, r2
|
// MOVOU tmp2, r2
|
||||||
#define SSE_TRANSPOSE_MATRIX(r, r0, r1, r2, r3, tmp1, tmp2) \
|
#define SSE_TRANSPOSE_MATRIX(r, r0, r1, r2, r3, tmp1, tmp2) \
|
||||||
PEXTRD $2, r0, r; \
|
MOVOU r0, tmp2; \
|
||||||
PINSRD $0, r, tmp2; \
|
PUNPCKHLQ r1, tmp2; \
|
||||||
PEXTRD $2, r1, r; \
|
PUNPCKLLQ r1, r0; \
|
||||||
PINSRD $1, r, tmp2; \
|
MOVOU r2, tmp1; \
|
||||||
; \
|
PUNPCKLLQ r3, tmp1; \
|
||||||
PEXTRD $3, r0, r; \
|
PUNPCKHLQ r3, r2; \
|
||||||
PINSRD $2, r, tmp2; \
|
MOVOU r0, r1; \
|
||||||
PEXTRD $3, r1, r; \
|
PUNPCKHQDQ tmp1, r1; \
|
||||||
PINSRD $3, r, tmp2; \ // tmp2 = [w7, w3, w6, w2]
|
PUNPCKLQDQ tmp1, r0; \
|
||||||
; \
|
MOVOU tmp2, r3; \
|
||||||
PEXTRD $1, r0, r; \
|
PUNPCKHQDQ r2, r3; \
|
||||||
PINSRD $2, r, r0; \
|
PUNPCKLQDQ r2, tmp2; \
|
||||||
PEXTRD $0, r1, r; \
|
MOVOU tmp2, r2
|
||||||
PINSRD $1, r, r0; \
|
|
||||||
PEXTRD $1, r1, r; \
|
|
||||||
PINSRD $3, r, r0; \ // r0 = [w5, w1, w4, w0]
|
|
||||||
; \
|
|
||||||
PEXTRD $0, r2, r; \
|
|
||||||
PINSRD $0, r, tmp1; \
|
|
||||||
PEXTRD $0, r3, r; \
|
|
||||||
PINSRD $1, r, tmp1; \
|
|
||||||
PEXTRD $1, r2, r; \
|
|
||||||
PINSRD $2, r, tmp1; \
|
|
||||||
PEXTRD $1, r3, r; \
|
|
||||||
PINSRD $3, r, tmp1; \ // tmp1 = [w13, w9, w12, w8]
|
|
||||||
; \
|
|
||||||
PEXTRD $2, r2, r; \
|
|
||||||
PINSRD $0, r, r2; \
|
|
||||||
PEXTRD $2, r3, r; \
|
|
||||||
PINSRD $1, r, r2; \
|
|
||||||
PEXTRD $3, r2, r; \
|
|
||||||
PINSRD $2, r, r2; \
|
|
||||||
PEXTRD $3, r3, r; \
|
|
||||||
PINSRD $3, r, r2; \ // r2 = [w15, w11, w14, w10]
|
|
||||||
; \
|
|
||||||
MOVOU r0, r1; \
|
|
||||||
PEXTRQ $1, r1, r; \
|
|
||||||
PINSRQ $0, r, r1; \
|
|
||||||
PEXTRQ $1, tmp1, r; \
|
|
||||||
PINSRQ $1, r, r1; \ // r1 = [w13, w9, w5, w1]
|
|
||||||
; \
|
|
||||||
PEXTRQ $0, tmp1, r; \
|
|
||||||
PINSRQ $1, r, r0; \ // r0 = [w12, w8, w4, w0]
|
|
||||||
; \
|
|
||||||
MOVOU tmp2, r3; \
|
|
||||||
PEXTRQ $1, r3, r; \
|
|
||||||
PINSRQ $0, r, r3; \
|
|
||||||
PEXTRQ $1, r2, r; \
|
|
||||||
PINSRQ $1, r, r3; \ // r3 = [w15, w11, w7, w3]
|
|
||||||
; \
|
|
||||||
PEXTRQ $0, r2, r; \
|
|
||||||
PINSRQ $1, r, r2; \
|
|
||||||
PEXTRQ $0, tmp2, r; \
|
|
||||||
PINSRQ $0, r, r2
|
|
||||||
|
|
||||||
// SM4 sbox function
|
// SM4 sbox function
|
||||||
// parameters:
|
// parameters:
|
||||||
|
@ -169,29 +169,15 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
|||||||
JE avx
|
JE avx
|
||||||
|
|
||||||
non_avx2_start:
|
non_avx2_start:
|
||||||
PINSRD $0, 0(DX), t0
|
MOVOU 0(DX), t0
|
||||||
PINSRD $1, 16(DX), t0
|
MOVOU 16(DX), t1
|
||||||
PINSRD $2, 32(DX), t0
|
MOVOU 32(DX), t2
|
||||||
PINSRD $3, 48(DX), t0
|
MOVOU 48(DX), t3
|
||||||
PSHUFB flip_mask<>(SB), t0
|
PSHUFB flip_mask<>(SB), t0
|
||||||
|
|
||||||
PINSRD $0, 4(DX), t1
|
|
||||||
PINSRD $1, 20(DX), t1
|
|
||||||
PINSRD $2, 36(DX), t1
|
|
||||||
PINSRD $3, 52(DX), t1
|
|
||||||
PSHUFB flip_mask<>(SB), t1
|
PSHUFB flip_mask<>(SB), t1
|
||||||
|
|
||||||
PINSRD $0, 8(DX), t2
|
|
||||||
PINSRD $1, 24(DX), t2
|
|
||||||
PINSRD $2, 40(DX), t2
|
|
||||||
PINSRD $3, 56(DX), t2
|
|
||||||
PSHUFB flip_mask<>(SB), t2
|
PSHUFB flip_mask<>(SB), t2
|
||||||
|
|
||||||
PINSRD $0, 12(DX), t3
|
|
||||||
PINSRD $1, 28(DX), t3
|
|
||||||
PINSRD $2, 44(DX), t3
|
|
||||||
PINSRD $3, 60(DX), t3
|
|
||||||
PSHUFB flip_mask<>(SB), t3
|
PSHUFB flip_mask<>(SB), t3
|
||||||
|
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
|
||||||
|
|
||||||
XORL CX, CX
|
XORL CX, CX
|
||||||
|
|
||||||
@ -205,38 +191,16 @@ loop:
|
|||||||
CMPL CX, $4*32
|
CMPL CX, $4*32
|
||||||
JB loop
|
JB loop
|
||||||
|
|
||||||
PSHUFB flip_mask<>(SB), t3
|
SSE_TRANSPOSE_MATRIX(R12, t0, t1, t2, t3, x, y);
|
||||||
PSHUFB flip_mask<>(SB), t2
|
PSHUFB bswap_mask<>(SB), t3
|
||||||
PSHUFB flip_mask<>(SB), t1
|
PSHUFB bswap_mask<>(SB), t2
|
||||||
PSHUFB flip_mask<>(SB), t0
|
PSHUFB bswap_mask<>(SB), t1
|
||||||
MOVUPS t3, 0(BX)
|
PSHUFB bswap_mask<>(SB), t0
|
||||||
MOVUPS t2, 16(BX)
|
|
||||||
MOVUPS t1, 32(BX)
|
MOVOU t0, 0(BX)
|
||||||
MOVUPS t0, 48(BX)
|
MOVOU t1, 16(BX)
|
||||||
MOVL 4(BX), R8
|
MOVOU t2, 32(BX)
|
||||||
MOVL 8(BX), R9
|
MOVOU t3, 48(BX)
|
||||||
MOVL 12(BX), R10
|
|
||||||
MOVL 16(BX), R11
|
|
||||||
MOVL 32(BX), R12
|
|
||||||
MOVL 48(BX), R13
|
|
||||||
MOVL R11, 4(BX)
|
|
||||||
MOVL R12, 8(BX)
|
|
||||||
MOVL R13, 12(BX)
|
|
||||||
MOVL R8, 16(BX)
|
|
||||||
MOVL R9, 32(BX)
|
|
||||||
MOVL R10, 48(BX)
|
|
||||||
MOVL 24(BX), R8
|
|
||||||
MOVL 28(BX), R9
|
|
||||||
MOVL 36(BX), R10
|
|
||||||
MOVL 52(BX), R11
|
|
||||||
MOVL R10, 24(BX)
|
|
||||||
MOVL R11, 28(BX)
|
|
||||||
MOVL R8, 36(BX)
|
|
||||||
MOVL R9, 52(BX)
|
|
||||||
MOVL 44(BX), R8
|
|
||||||
MOVL 56(BX), R9
|
|
||||||
MOVL R9, 44(BX)
|
|
||||||
MOVL R8, 56(BX)
|
|
||||||
|
|
||||||
done_sm4:
|
done_sm4:
|
||||||
RET
|
RET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user