mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
sm4: disable PXOR use m128 directly
This commit is contained in:
parent
2f163662b5
commit
8f5e603f94
@ -151,15 +151,23 @@ cbcSm4Octets:
|
||||
MOVOU 112(DX), XWORD7
|
||||
|
||||
SM4_8BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3, XWORD4, XWORD5, XWORD6, XWORD7)
|
||||
|
||||
PXOR -16(DX), XWORD0
|
||||
PXOR 0(DX), XWORD1
|
||||
PXOR 16(DX), XWORD2
|
||||
PXOR 32(DX), XWORD3
|
||||
PXOR 48(DX), XWORD4
|
||||
PXOR 64(DX), XWORD5
|
||||
PXOR 80(DX), XWORD6
|
||||
PXOR 96(DX), XWORD7
|
||||
|
||||
MOVOU -16(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
MOVOU 0(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD1
|
||||
MOVOU 16(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD2
|
||||
MOVOU 32(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD3
|
||||
MOVOU 48(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD4
|
||||
MOVOU 64(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD5
|
||||
MOVOU 80(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD6
|
||||
MOVOU 96(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD7
|
||||
|
||||
MOVOU XWORD0, 0(BX)
|
||||
MOVOU XWORD1, 16(BX)
|
||||
@ -186,10 +194,14 @@ cbcSm4Nibbles:
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR -16(DX), XWORD0
|
||||
PXOR 0(DX), XWORD1
|
||||
PXOR 16(DX), XWORD2
|
||||
PXOR 32(DX), XWORD3
|
||||
MOVUPS -16(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
MOVUPS 0(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD1
|
||||
MOVUPS 16(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD2
|
||||
MOVUPS 32(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD3
|
||||
|
||||
MOVUPS XWORD0, 0(BX)
|
||||
MOVUPS XWORD1, 16(BX)
|
||||
@ -213,10 +225,14 @@ cbCSm4Single:
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
PXOR -64(DX), XWORD1
|
||||
PXOR -48(DX), XWORD2
|
||||
PXOR -32(DX), XWORD3
|
||||
MOVUPS 0(SI), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
MOVUPS -64(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD1
|
||||
MOVUPS -48(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD2
|
||||
MOVUPS -32(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD3
|
||||
|
||||
MOVUPS XWORD0, -64(BX)
|
||||
MOVUPS XWORD1, -48(BX)
|
||||
@ -230,7 +246,8 @@ cbcSm4Single16:
|
||||
|
||||
SM4_SINGLE_BLOCK(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
MOVUPS 0(SI), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
|
||||
MOVUPS XWORD0, -16(BX)
|
||||
|
||||
@ -242,8 +259,10 @@ cbcSm4Single32:
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
PXOR -32(DX), XWORD1
|
||||
MOVUPS 0(SI), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
MOVUPS -32(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD1
|
||||
|
||||
MOVUPS XWORD0, -32(BX)
|
||||
MOVUPS XWORD1, -16(BX)
|
||||
@ -257,9 +276,12 @@ cbcSm4Single48:
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
PXOR -48(DX), XWORD1
|
||||
PXOR -32(DX), XWORD2
|
||||
MOVUPS 0(SI), XWTMP0
|
||||
PXOR XWTMP0, XWORD0
|
||||
MOVUPS -48(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD1
|
||||
MOVUPS -32(DX), XWTMP0
|
||||
PXOR XWTMP0, XWORD2
|
||||
|
||||
MOVUPS XWORD0, -48(BX)
|
||||
MOVUPS XWORD1, -32(BX)
|
||||
|
@ -677,14 +677,22 @@ gcmSm4EncOctetsLoop:
|
||||
|
||||
SM4_8BLOCKS_WO_BS(rk, ACC1, T0, T1, T2, B0, B1, B2, B3, B4, B5, B6, B7)
|
||||
|
||||
PXOR (16*0)(ptx), B0
|
||||
PXOR (16*1)(ptx), B1
|
||||
PXOR (16*2)(ptx), B2
|
||||
PXOR (16*3)(ptx), B3
|
||||
PXOR (16*4)(ptx), B4
|
||||
PXOR (16*5)(ptx), B5
|
||||
PXOR (16*6)(ptx), B6
|
||||
PXOR (16*7)(ptx), B7
|
||||
MOVOU (16*0)(ptx), T0
|
||||
PXOR T0, B0
|
||||
MOVOU (16*1)(ptx), T0
|
||||
PXOR T0, B1
|
||||
MOVOU (16*2)(ptx), T0
|
||||
PXOR T0, B2
|
||||
MOVOU (16*3)(ptx), T0
|
||||
PXOR T0, B3
|
||||
MOVOU (16*4)(ptx), T0
|
||||
PXOR T0, B4
|
||||
MOVOU (16*5)(ptx), T0
|
||||
PXOR T0, B5
|
||||
MOVOU (16*6)(ptx), T0
|
||||
PXOR T0, B6
|
||||
MOVOU (16*7)(ptx), T0
|
||||
PXOR T0, B7
|
||||
|
||||
MOVOU B0, (16*0)(ctx)
|
||||
PSHUFB BSWAP, B0
|
||||
@ -765,10 +773,14 @@ gcmSm4EncNibbles:
|
||||
MOVOU (8*16 + 3*16)(SP), B3
|
||||
|
||||
SM4_4BLOCKS_WO_BS(AX, B4, T0, T1, T2, B0, B1, B2, B3)
|
||||
PXOR (16*0)(ptx), B0
|
||||
PXOR (16*1)(ptx), B1
|
||||
PXOR (16*2)(ptx), B2
|
||||
PXOR (16*3)(ptx), B3
|
||||
MOVOU (16*0)(ptx), T0
|
||||
PXOR T0, B0
|
||||
MOVOU (16*1)(ptx), T0
|
||||
PXOR T0, B1
|
||||
MOVOU (16*2)(ptx), T0
|
||||
PXOR T0, B2
|
||||
MOVOU (16*3)(ptx), T0
|
||||
PXOR T0, B3
|
||||
|
||||
MOVOU B0, (16*0)(ctx)
|
||||
MOVOU B1, (16*1)(ctx)
|
||||
@ -1683,14 +1695,22 @@ gcmSm4DecOctetsLoop:
|
||||
|
||||
SM4_8BLOCKS_WO_BS(rk, ACC1, T0, T1, T2, B0, B1, B2, B3, B4, B5, B6, B7)
|
||||
|
||||
PXOR (16*0)(ctx), B0
|
||||
PXOR (16*1)(ctx), B1
|
||||
PXOR (16*2)(ctx), B2
|
||||
PXOR (16*3)(ctx), B3
|
||||
PXOR (16*4)(ctx), B4
|
||||
PXOR (16*5)(ctx), B5
|
||||
PXOR (16*6)(ctx), B6
|
||||
PXOR (16*7)(ctx), B7
|
||||
MOVOU (16*0)(ctx), T0
|
||||
PXOR T0, B0
|
||||
MOVOU (16*1)(ctx), T0
|
||||
PXOR T0, B1
|
||||
MOVOU (16*2)(ctx), T0
|
||||
PXOR T0, B2
|
||||
MOVOU (16*3)(ctx), T0
|
||||
PXOR T0, B3
|
||||
MOVOU (16*4)(ctx), T0
|
||||
PXOR T0, B4
|
||||
MOVOU (16*5)(ctx), T0
|
||||
PXOR T0, B5
|
||||
MOVOU (16*6)(ctx), T0
|
||||
PXOR T0, B6
|
||||
MOVOU (16*7)(ctx), T0
|
||||
PXOR T0, B7
|
||||
|
||||
MOVOU B0, (16*0)(ptx)
|
||||
MOVOU B1, (16*1)(ptx)
|
||||
|
@ -210,44 +210,60 @@ GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
|
||||
|
||||
#define sseLoad4Blocks \
|
||||
MOVOU (16*0)(DX), B0; \
|
||||
PXOR (16*0)(SP), B0; \
|
||||
MOVOU (16*0)(SP), T0; \
|
||||
PXOR T0, B0; \
|
||||
MOVOU (16*1)(DX), B1; \
|
||||
PXOR (16*1)(SP), B1; \
|
||||
MOVOU (16*1)(SP), T0; \
|
||||
PXOR T0, B1; \
|
||||
MOVOU (16*2)(DX), B2; \
|
||||
PXOR (16*2)(SP), B2; \
|
||||
MOVOU (16*2)(SP), T0; \
|
||||
PXOR T0, B2; \
|
||||
MOVOU (16*3)(DX), B3; \
|
||||
PXOR (16*3)(SP), B3
|
||||
MOVOU (16*3)(SP), T0; \
|
||||
PXOR T0, B3
|
||||
|
||||
#define sseStore4Blocks \
|
||||
PXOR (16*0)(SP), B0; \
|
||||
MOVOU (16*0)(SP), T0; \
|
||||
PXOR T0, B0; \
|
||||
MOVOU B0, (16*0)(CX); \
|
||||
PXOR (16*1)(SP), B1; \
|
||||
MOVOU (16*1)(SP), T0; \
|
||||
PXOR T0, B1; \
|
||||
MOVOU B1, (16*1)(CX); \
|
||||
PXOR (16*2)(SP), B2; \
|
||||
MOVOU (16*2)(SP), T0; \
|
||||
PXOR T0, B2; \
|
||||
MOVOU B2, (16*2)(CX); \
|
||||
PXOR (16*3)(SP), B3; \
|
||||
MOVOU (16*3)(SP), T0; \
|
||||
PXOR T0, B3; \
|
||||
MOVOU B3, (16*3)(CX)
|
||||
|
||||
#define sseLoad8Blocks \
|
||||
sseLoad4Blocks; \
|
||||
MOVOU (16*4)(DX), B4; \
|
||||
PXOR (16*4)(SP), B4; \
|
||||
MOVOU (16*4)(SP), T0; \
|
||||
PXOR T0, B4; \
|
||||
MOVOU (16*5)(DX), B5; \
|
||||
PXOR (16*5)(SP), B5; \
|
||||
MOVOU (16*5)(SP), T0; \
|
||||
PXOR T0, B5; \
|
||||
MOVOU (16*6)(DX), B6; \
|
||||
PXOR (16*6)(SP), B6; \
|
||||
MOVOU (16*6)(SP), T0; \
|
||||
PXOR T0, B6; \
|
||||
MOVOU (16*7)(DX), B7; \
|
||||
PXOR (16*7)(SP), B7
|
||||
MOVOU (16*7)(SP), T0; \
|
||||
PXOR T0, B7
|
||||
|
||||
#define sseStore8Blocks \
|
||||
sseStore4Blocks; \
|
||||
PXOR (16*4)(SP), B4; \
|
||||
MOVOU (16*4)(SP), T0; \
|
||||
PXOR T0, B4; \
|
||||
MOVOU B4, (16*4)(CX); \
|
||||
PXOR (16*5)(SP), B5; \
|
||||
MOVOU (16*5)(SP), T0; \
|
||||
PXOR T0, B5; \
|
||||
MOVOU B5, (16*5)(CX); \
|
||||
PXOR (16*6)(SP), B6; \
|
||||
MOVOU (16*6)(SP), T0; \
|
||||
PXOR T0, B6; \
|
||||
MOVOU B6, (16*6)(CX); \
|
||||
PXOR (16*7)(SP), B7; \
|
||||
MOVOU (16*7)(SP), T0; \
|
||||
PXOR T0, B7; \
|
||||
MOVOU B7, (16*7)(CX)
|
||||
|
||||
#define avxLoad4Blocks \
|
||||
|
Loading…
x
Reference in New Issue
Block a user