mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-22 10:16:18 +08:00
sm4: fix cbc iv issue
This commit is contained in:
parent
0fbc30f868
commit
feb76edda8
@ -132,7 +132,9 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
|
||||
JE avx2Start
|
||||
|
||||
CMPB ·useAVX(SB), $1
|
||||
JE avxCbcSm4Octets
|
||||
JE avxStart
|
||||
|
||||
MOVOU -16(DX), X15
|
||||
|
||||
cbcSm4Octets:
|
||||
CMPQ DI, $128
|
||||
@ -155,7 +157,7 @@ cbcSm4Octets:
|
||||
PXOR -16(DX), XWORD0
|
||||
PXOR 0(DX), XWORD1
|
||||
PXOR 16(DX), XWORD2
|
||||
PXOR 32(DX), XWORD3
|
||||
PXOR 32(DX), XWORD3
|
||||
PXOR 48(DX), XWORD4
|
||||
PXOR 64(DX), XWORD5
|
||||
PXOR 80(DX), XWORD6
|
||||
@ -211,8 +213,6 @@ cbCSm4Single:
|
||||
MOVOU -32(DX), XWORD2
|
||||
MOVOU -16(DX), XWORD3
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -224,28 +224,24 @@ cbCSm4Single:
|
||||
MOVUPS XWORD1, -48(BX)
|
||||
MOVUPS XWORD2, -32(BX)
|
||||
MOVUPS XWORD3, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single16:
|
||||
MOVOU -16(DX), XWORD0
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
|
||||
MOVUPS XWORD0, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single32:
|
||||
MOVOU -32(DX), XWORD0
|
||||
MOVOU -16(DX), XWORD1
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -253,7 +249,7 @@ cbcSm4Single32:
|
||||
|
||||
MOVUPS XWORD0, -32(BX)
|
||||
MOVUPS XWORD1, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single48:
|
||||
@ -261,8 +257,6 @@ cbcSm4Single48:
|
||||
MOVOU -32(DX), XWORD1
|
||||
MOVOU -16(DX), XWORD2
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -272,11 +266,14 @@ cbcSm4Single48:
|
||||
MOVUPS XWORD0, -48(BX)
|
||||
MOVUPS XWORD1, -32(BX)
|
||||
MOVUPS XWORD2, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
|
||||
cbcSm4Done:
|
||||
MOVUPS X15, (SI)
|
||||
RET
|
||||
|
||||
avxStart:
|
||||
VMOVDQU -16(DX), X15
|
||||
|
||||
avxCbcSm4Octets:
|
||||
CMPQ DI, $128
|
||||
JLE avxCbcSm4Nibbles
|
||||
@ -354,8 +351,6 @@ avxCbCSm4Single:
|
||||
VMOVDQU -32(DX), XWORD2
|
||||
VMOVDQU -16(DX), XWORD3
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -367,28 +362,24 @@ avxCbCSm4Single:
|
||||
VMOVDQU XWORD1, -48(BX)
|
||||
VMOVDQU XWORD2, -32(BX)
|
||||
VMOVDQU XWORD3, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single16:
|
||||
VMOVDQU -16(DX), XWORD0
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
|
||||
VMOVDQU XWORD0, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single32:
|
||||
VMOVDQU -32(DX), XWORD0
|
||||
VMOVDQU -16(DX), XWORD1
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -396,7 +387,7 @@ avxCbcSm4Single32:
|
||||
|
||||
VMOVDQU XWORD0, -32(BX)
|
||||
VMOVDQU XWORD1, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single48:
|
||||
@ -404,8 +395,6 @@ avxCbcSm4Single48:
|
||||
VMOVDQU -32(DX), XWORD1
|
||||
VMOVDQU -16(DX), XWORD2
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -415,9 +404,9 @@ avxCbcSm4Single48:
|
||||
VMOVDQU XWORD0, -48(BX)
|
||||
VMOVDQU XWORD1, -32(BX)
|
||||
VMOVDQU XWORD2, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
avxCbcSm4Done:
|
||||
VMOVDQU X15, (SI)
|
||||
RET
|
||||
|
||||
avx2Start:
|
||||
@ -425,6 +414,8 @@ avx2Start:
|
||||
VBROADCASTI128 flip_mask<>(SB), BYTE_FLIP_MASK
|
||||
VBROADCASTI128 bswap_mask<>(SB), BSWAP_MASK
|
||||
|
||||
VMOVDQU -16(DX), X15
|
||||
|
||||
avx2_16blocks:
|
||||
CMPQ DI, $256
|
||||
JLE avx2CbcSm4Octets
|
||||
@ -572,8 +563,6 @@ avx2CbCSm4Single:
|
||||
VMOVDQU -32(DX), XWORD2
|
||||
VMOVDQU -16(DX), XWORD3
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -585,20 +574,17 @@ avx2CbCSm4Single:
|
||||
VMOVDQU XWORD1, -48(BX)
|
||||
VMOVDQU XWORD2, -32(BX)
|
||||
VMOVDQU XWORD3, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
avx2CbcSm4Single16:
|
||||
VMOVDQU -16(DX), XWORD0
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
|
||||
VMOVDQU XWORD0, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
@ -606,8 +592,6 @@ avx2CbcSm4Single32:
|
||||
VMOVDQU -32(DX), XWORD0
|
||||
VMOVDQU -16(DX), XWORD1
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -615,7 +599,6 @@ avx2CbcSm4Single32:
|
||||
|
||||
VMOVDQU XWORD0, -32(BX)
|
||||
VMOVDQU XWORD1, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
@ -624,8 +607,6 @@ avx2CbcSm4Single48:
|
||||
VMOVDQU -32(DX), XWORD1
|
||||
VMOVDQU -16(DX), XWORD2
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -635,8 +616,8 @@ avx2CbcSm4Single48:
|
||||
VMOVDQU XWORD0, -48(BX)
|
||||
VMOVDQU XWORD1, -32(BX)
|
||||
VMOVDQU XWORD2, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
avx2CbcSm4Done:
|
||||
VMOVDQU X15, (SI)
|
||||
VZEROUPPER
|
||||
RET
|
||||
|
@ -114,6 +114,10 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
|
||||
MOVD rk, rkSave
|
||||
VLD1 (R6), [IV.B16]
|
||||
|
||||
ADD srcPtr, srcPtrLen, R10
|
||||
SUB $16, R10, R10
|
||||
VLD1 (R10), [V15.S4]
|
||||
|
||||
cbcSm4Octets:
|
||||
CMP $128, srcPtrLen
|
||||
BLE cbcSm4Nibbles
|
||||
@ -233,7 +237,6 @@ cbcSm4Single:
|
||||
|
||||
// 4 blocks
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
@ -263,13 +266,11 @@ cbc4BlocksLoop64:
|
||||
VEOR V8.B16, t3.B16, t3.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single16:
|
||||
VLD1 (srcPtr), [t0.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
@ -293,13 +294,11 @@ cbc4BlocksLoop16:
|
||||
VEOR IV.B16, t3.B16, t3.B16
|
||||
|
||||
VST1 [t3.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single32:
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||
@ -323,12 +322,10 @@ cbc4BlocksLoop32:
|
||||
VEOR V6.B16, t1.B16, t1.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single48:
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
@ -355,7 +352,7 @@ cbc4BlocksLoop48:
|
||||
VEOR V7.B16, t2.B16, t2.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
cbcSm4Done:
|
||||
VST1 [V15.S4], (R6)
|
||||
RET
|
||||
|
Loading…
x
Reference in New Issue
Block a user