mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
zuc: amd64 eliminate the usage of R15
This commit is contained in:
parent
17e66c88d8
commit
df14178097
@ -211,12 +211,12 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
VMOVDQU Comb_matrix_mul_high_nibble<>(SB), XIN_OUT \
|
VMOVDQU Comb_matrix_mul_high_nibble<>(SB), XIN_OUT \
|
||||||
MUL_PSHUFB_AVX(XTMP2, XTMP1, XIN_OUT, XTMP3)
|
MUL_PSHUFB_AVX(XTMP2, XTMP1, XIN_OUT, XTMP3)
|
||||||
|
|
||||||
#define F_R1 R10
|
#define F_R1 R9
|
||||||
#define F_R2 R11
|
#define F_R2 R10
|
||||||
#define BRC_X0 R12
|
#define BRC_X0 R11
|
||||||
#define BRC_X1 R13
|
#define BRC_X1 R12
|
||||||
#define BRC_X2 R14
|
#define BRC_X2 R13
|
||||||
#define BRC_X3 R15
|
#define BRC_X3 R14
|
||||||
|
|
||||||
// BITS_REORG(idx)
|
// BITS_REORG(idx)
|
||||||
//
|
//
|
||||||
@ -225,7 +225,7 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
// uses
|
// uses
|
||||||
// AX, BX, CX, DX
|
// AX, BX, CX, DX
|
||||||
// return
|
// return
|
||||||
// updates R12, R13, R14, R15
|
// updates R11, R12, R13, R14
|
||||||
//
|
//
|
||||||
#define BITS_REORG(idx) \
|
#define BITS_REORG(idx) \
|
||||||
MOVL (((15 + idx) % 16)*4)(SI), BRC_X0 \
|
MOVL (((15 + idx) % 16)*4)(SI), BRC_X0 \
|
||||||
@ -251,25 +251,25 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
// params
|
// params
|
||||||
// %1 - round number
|
// %1 - round number
|
||||||
// uses
|
// uses
|
||||||
// AX as input (ZERO or W), BX, CX, DX, R8, R9
|
// AX as input (ZERO or W), BX, CX, DX, R8
|
||||||
#define LFSR_UPDT(idx) \
|
#define LFSR_UPDT(idx) \
|
||||||
MOVL (((0 + idx) % 16)*4)(SI), BX \
|
MOVL (((0 + idx) % 16)*4)(SI), BX \
|
||||||
MOVL (((4 + idx) % 16)*4)(SI), CX \
|
MOVL (((4 + idx) % 16)*4)(SI), CX \
|
||||||
MOVL (((10 + idx) % 16)*4)(SI), DX \
|
MOVL (((10 + idx) % 16)*4)(SI), DX \
|
||||||
MOVL (((13 + idx) % 16)*4)(SI), R8 \
|
MOVL (((13 + idx) % 16)*4)(SI), R8 \
|
||||||
MOVL (((15 + idx) % 16)*4)(SI), R9 \
|
|
||||||
\ // Calculate 64-bit LFSR feedback
|
\ // Calculate 64-bit LFSR feedback
|
||||||
ADDQ BX, AX \
|
ADDQ BX, AX \
|
||||||
SHLQ $8, BX \
|
SHLQ $8, BX \
|
||||||
SHLQ $20, CX \
|
SHLQ $20, CX \
|
||||||
SHLQ $21, DX \
|
SHLQ $21, DX \
|
||||||
SHLQ $17, R8 \
|
SHLQ $17, R8 \
|
||||||
SHLQ $15, R9 \
|
|
||||||
ADDQ BX, AX \
|
ADDQ BX, AX \
|
||||||
ADDQ CX, AX \
|
ADDQ CX, AX \
|
||||||
ADDQ DX, AX \
|
ADDQ DX, AX \
|
||||||
ADDQ R8, AX \
|
ADDQ R8, AX \
|
||||||
ADDQ R9, AX \
|
MOVL (((15 + idx) % 16)*4)(SI), R8 \
|
||||||
|
SHLQ $15, R8 \
|
||||||
|
ADDQ R8, AX \
|
||||||
\ // Reduce it to 31-bit value
|
\ // Reduce it to 31-bit value
|
||||||
MOVQ AX, BX \
|
MOVQ AX, BX \
|
||||||
ANDQ $0x7FFFFFFF, AX \
|
ANDQ $0x7FFFFFFF, AX \
|
||||||
@ -295,34 +295,30 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
SHLDL(F_R2, F_R1, $16) \ // Q = (W2 << 16) | (W1 >> 16)
|
SHLDL(F_R2, F_R1, $16) \ // Q = (W2 << 16) | (W1 >> 16)
|
||||||
MOVL DX, BX \ // start L1
|
MOVL DX, BX \ // start L1
|
||||||
MOVL DX, CX \
|
MOVL DX, CX \
|
||||||
MOVL DX, R8 \
|
|
||||||
MOVL DX, R9 \
|
|
||||||
ROLL $2, BX \
|
ROLL $2, BX \
|
||||||
ROLL $10, CX \
|
ROLL $24, CX \
|
||||||
ROLL $18, R8 \
|
|
||||||
ROLL $24, R9 \
|
|
||||||
XORL BX, DX \
|
|
||||||
XORL CX, DX \
|
XORL CX, DX \
|
||||||
XORL R8, DX \
|
XORL BX, DX \
|
||||||
XORL R9, DX \ // U = L1(P) = EDX, hi(RDX)=0
|
ROLL $8, BX \
|
||||||
|
XORL BX, DX \
|
||||||
|
ROLL $8, BX \
|
||||||
|
XORL BX, DX \ // U = L1(P) = EDX, hi(RDX)=0
|
||||||
MOVL F_R2, BX \
|
MOVL F_R2, BX \
|
||||||
MOVL F_R2, CX \
|
MOVL F_R2, CX \
|
||||||
MOVL F_R2, R8 \
|
|
||||||
MOVL F_R2, R9 \
|
|
||||||
ROLL $8, BX \
|
ROLL $8, BX \
|
||||||
ROLL $14, CX \
|
|
||||||
ROLL $22, R8 \
|
|
||||||
ROLL $30, R9 \
|
|
||||||
XORL BX, F_R2 \
|
XORL BX, F_R2 \
|
||||||
|
ROLL $14, CX \
|
||||||
XORL CX, F_R2 \
|
XORL CX, F_R2 \
|
||||||
XORL R8, F_R2 \
|
ROLL $8, CX \
|
||||||
XORL R9, F_R2 \ // V = L2(Q) = R11D, hi(R11)=0
|
XORL CX, F_R2 \
|
||||||
|
ROLL $8, CX \
|
||||||
|
XORL CX, F_R2 \ // V = L2(Q) = R11D, hi(R11)=0
|
||||||
SHLQ $32, F_R2 \ // DX = V || U
|
SHLQ $32, F_R2 \ // DX = V || U
|
||||||
XORQ F_R2, DX
|
XORQ F_R2, DX
|
||||||
|
|
||||||
// Non-Linear function F, SSE version.
|
// Non-Linear function F, SSE version.
|
||||||
// uses
|
// uses
|
||||||
// AX, BX, CX, DX, R8, R9
|
// AX, BX, CX, DX, R8
|
||||||
// X0, X1, X2, X3, X4
|
// X0, X1, X2, X3, X4
|
||||||
// return
|
// return
|
||||||
// W in AX
|
// W in AX
|
||||||
@ -397,7 +393,7 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
|
|
||||||
// Non-Linear function F, AVX version.
|
// Non-Linear function F, AVX version.
|
||||||
// uses
|
// uses
|
||||||
// AX, BX, CX, DX, R8, R9
|
// AX, BX, CX, DX, R8
|
||||||
// X0, X1, X2, X3, X4
|
// X0, X1, X2, X3, X4
|
||||||
// return
|
// return
|
||||||
// W in AX
|
// W in AX
|
||||||
@ -413,8 +409,8 @@ GLOBL flip_mask<>(SB), RODATA, $16
|
|||||||
VPAND mask_S0<>(SB), X1, X1 \
|
VPAND mask_S0<>(SB), X1, X1 \
|
||||||
VPXOR X1, X0, X0 \
|
VPXOR X1, X0, X0 \
|
||||||
\
|
\
|
||||||
MOVL X0, R10 \ // F_R1
|
MOVL X0, F_R1 \ // F_R1
|
||||||
VPEXTRD $1, X0, R11
|
VPEXTRD $1, X0, F_R2
|
||||||
|
|
||||||
#define LOAD_STATE \
|
#define LOAD_STATE \
|
||||||
MOVL OFFSET_FR1(SI), F_R1 \
|
MOVL OFFSET_FR1(SI), F_R1 \
|
||||||
@ -477,7 +473,7 @@ avx:
|
|||||||
#define ROUND_SSE(idx) \
|
#define ROUND_SSE(idx) \
|
||||||
BITS_REORG(idx) \
|
BITS_REORG(idx) \
|
||||||
NONLIN_FUN_SSE \
|
NONLIN_FUN_SSE \
|
||||||
XORL R15, AX \
|
XORL BRC_X3, AX \
|
||||||
MOVL AX, (idx*4)(DI) \
|
MOVL AX, (idx*4)(DI) \
|
||||||
XORQ AX, AX \
|
XORQ AX, AX \
|
||||||
LFSR_UPDT(idx)
|
LFSR_UPDT(idx)
|
||||||
@ -485,7 +481,7 @@ avx:
|
|||||||
#define ROUND_AVX(idx) \
|
#define ROUND_AVX(idx) \
|
||||||
BITS_REORG(idx) \
|
BITS_REORG(idx) \
|
||||||
NONLIN_FUN_AVX \
|
NONLIN_FUN_AVX \
|
||||||
XORL R15, AX \
|
XORL BRC_X3, AX \
|
||||||
MOVL AX, (idx*4)(DI) \
|
MOVL AX, (idx*4)(DI) \
|
||||||
XORQ AX, AX \
|
XORQ AX, AX \
|
||||||
LFSR_UPDT(idx)
|
LFSR_UPDT(idx)
|
||||||
@ -493,7 +489,7 @@ avx:
|
|||||||
#define ROUND_REV32_SSE(idx) \
|
#define ROUND_REV32_SSE(idx) \
|
||||||
BITS_REORG(idx) \
|
BITS_REORG(idx) \
|
||||||
NONLIN_FUN_SSE \
|
NONLIN_FUN_SSE \
|
||||||
XORL R15, AX \
|
XORL BRC_X3, AX \
|
||||||
BSWAPL AX \
|
BSWAPL AX \
|
||||||
MOVL AX, (idx*4)(DI) \
|
MOVL AX, (idx*4)(DI) \
|
||||||
XORQ AX, AX \
|
XORQ AX, AX \
|
||||||
@ -502,7 +498,7 @@ avx:
|
|||||||
#define ROUND_REV32_AVX(idx) \
|
#define ROUND_REV32_AVX(idx) \
|
||||||
BITS_REORG(idx) \
|
BITS_REORG(idx) \
|
||||||
NONLIN_FUN_AVX \
|
NONLIN_FUN_AVX \
|
||||||
XORL R15, AX \
|
XORL BRC_X3, AX \
|
||||||
BSWAPL AX \
|
BSWAPL AX \
|
||||||
MOVL AX, (idx*4)(DI) \
|
MOVL AX, (idx*4)(DI) \
|
||||||
XORQ AX, AX \
|
XORQ AX, AX \
|
||||||
|
Loading…
x
Reference in New Issue
Block a user