mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
sm4: fix cbc iv issue
This commit is contained in:
parent
0fbc30f868
commit
feb76edda8
@ -132,7 +132,9 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
|
|||||||
JE avx2Start
|
JE avx2Start
|
||||||
|
|
||||||
CMPB ·useAVX(SB), $1
|
CMPB ·useAVX(SB), $1
|
||||||
JE avxCbcSm4Octets
|
JE avxStart
|
||||||
|
|
||||||
|
MOVOU -16(DX), X15
|
||||||
|
|
||||||
cbcSm4Octets:
|
cbcSm4Octets:
|
||||||
CMPQ DI, $128
|
CMPQ DI, $128
|
||||||
@ -211,8 +213,6 @@ cbCSm4Single:
|
|||||||
MOVOU -32(DX), XWORD2
|
MOVOU -32(DX), XWORD2
|
||||||
MOVOU -16(DX), XWORD3
|
MOVOU -16(DX), XWORD3
|
||||||
|
|
||||||
MOVOU XWORD0, XWORD4
|
|
||||||
|
|
||||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
PXOR 0(SI), XWORD0
|
PXOR 0(SI), XWORD0
|
||||||
@ -224,28 +224,24 @@ cbCSm4Single:
|
|||||||
MOVUPS XWORD1, -48(BX)
|
MOVUPS XWORD1, -48(BX)
|
||||||
MOVUPS XWORD2, -32(BX)
|
MOVUPS XWORD2, -32(BX)
|
||||||
MOVUPS XWORD3, -16(BX)
|
MOVUPS XWORD3, -16(BX)
|
||||||
MOVUPS XWORD4, (SI)
|
|
||||||
JMP cbcSm4Done
|
JMP cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single16:
|
cbcSm4Single16:
|
||||||
MOVOU -16(DX), XWORD0
|
MOVOU -16(DX), XWORD0
|
||||||
|
|
||||||
MOVOU XWORD0, XWORD4
|
|
||||||
|
|
||||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
PXOR 0(SI), XWORD0
|
PXOR 0(SI), XWORD0
|
||||||
|
|
||||||
MOVUPS XWORD0, -16(BX)
|
MOVUPS XWORD0, -16(BX)
|
||||||
MOVUPS XWORD4, (SI)
|
|
||||||
JMP cbcSm4Done
|
JMP cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single32:
|
cbcSm4Single32:
|
||||||
MOVOU -32(DX), XWORD0
|
MOVOU -32(DX), XWORD0
|
||||||
MOVOU -16(DX), XWORD1
|
MOVOU -16(DX), XWORD1
|
||||||
|
|
||||||
MOVOU XWORD0, XWORD4
|
|
||||||
|
|
||||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
PXOR 0(SI), XWORD0
|
PXOR 0(SI), XWORD0
|
||||||
@ -253,7 +249,7 @@ cbcSm4Single32:
|
|||||||
|
|
||||||
MOVUPS XWORD0, -32(BX)
|
MOVUPS XWORD0, -32(BX)
|
||||||
MOVUPS XWORD1, -16(BX)
|
MOVUPS XWORD1, -16(BX)
|
||||||
MOVUPS XWORD4, (SI)
|
|
||||||
JMP cbcSm4Done
|
JMP cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single48:
|
cbcSm4Single48:
|
||||||
@ -261,8 +257,6 @@ cbcSm4Single48:
|
|||||||
MOVOU -32(DX), XWORD1
|
MOVOU -32(DX), XWORD1
|
||||||
MOVOU -16(DX), XWORD2
|
MOVOU -16(DX), XWORD2
|
||||||
|
|
||||||
MOVOU XWORD0, XWORD4
|
|
||||||
|
|
||||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
PXOR 0(SI), XWORD0
|
PXOR 0(SI), XWORD0
|
||||||
@ -272,11 +266,14 @@ cbcSm4Single48:
|
|||||||
MOVUPS XWORD0, -48(BX)
|
MOVUPS XWORD0, -48(BX)
|
||||||
MOVUPS XWORD1, -32(BX)
|
MOVUPS XWORD1, -32(BX)
|
||||||
MOVUPS XWORD2, -16(BX)
|
MOVUPS XWORD2, -16(BX)
|
||||||
MOVUPS XWORD4, (SI)
|
|
||||||
|
|
||||||
cbcSm4Done:
|
cbcSm4Done:
|
||||||
|
MOVUPS X15, (SI)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
avxStart:
|
||||||
|
VMOVDQU -16(DX), X15
|
||||||
|
|
||||||
avxCbcSm4Octets:
|
avxCbcSm4Octets:
|
||||||
CMPQ DI, $128
|
CMPQ DI, $128
|
||||||
JLE avxCbcSm4Nibbles
|
JLE avxCbcSm4Nibbles
|
||||||
@ -354,8 +351,6 @@ avxCbCSm4Single:
|
|||||||
VMOVDQU -32(DX), XWORD2
|
VMOVDQU -32(DX), XWORD2
|
||||||
VMOVDQU -16(DX), XWORD3
|
VMOVDQU -16(DX), XWORD3
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -367,28 +362,24 @@ avxCbCSm4Single:
|
|||||||
VMOVDQU XWORD1, -48(BX)
|
VMOVDQU XWORD1, -48(BX)
|
||||||
VMOVDQU XWORD2, -32(BX)
|
VMOVDQU XWORD2, -32(BX)
|
||||||
VMOVDQU XWORD3, -16(BX)
|
VMOVDQU XWORD3, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
JMP avxCbcSm4Done
|
JMP avxCbcSm4Done
|
||||||
|
|
||||||
avxCbcSm4Single16:
|
avxCbcSm4Single16:
|
||||||
VMOVDQU -16(DX), XWORD0
|
VMOVDQU -16(DX), XWORD0
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
|
|
||||||
VMOVDQU XWORD0, -16(BX)
|
VMOVDQU XWORD0, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
JMP avxCbcSm4Done
|
JMP avxCbcSm4Done
|
||||||
|
|
||||||
avxCbcSm4Single32:
|
avxCbcSm4Single32:
|
||||||
VMOVDQU -32(DX), XWORD0
|
VMOVDQU -32(DX), XWORD0
|
||||||
VMOVDQU -16(DX), XWORD1
|
VMOVDQU -16(DX), XWORD1
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -396,7 +387,7 @@ avxCbcSm4Single32:
|
|||||||
|
|
||||||
VMOVDQU XWORD0, -32(BX)
|
VMOVDQU XWORD0, -32(BX)
|
||||||
VMOVDQU XWORD1, -16(BX)
|
VMOVDQU XWORD1, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
JMP avxCbcSm4Done
|
JMP avxCbcSm4Done
|
||||||
|
|
||||||
avxCbcSm4Single48:
|
avxCbcSm4Single48:
|
||||||
@ -404,8 +395,6 @@ avxCbcSm4Single48:
|
|||||||
VMOVDQU -32(DX), XWORD1
|
VMOVDQU -32(DX), XWORD1
|
||||||
VMOVDQU -16(DX), XWORD2
|
VMOVDQU -16(DX), XWORD2
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -415,9 +404,9 @@ avxCbcSm4Single48:
|
|||||||
VMOVDQU XWORD0, -48(BX)
|
VMOVDQU XWORD0, -48(BX)
|
||||||
VMOVDQU XWORD1, -32(BX)
|
VMOVDQU XWORD1, -32(BX)
|
||||||
VMOVDQU XWORD2, -16(BX)
|
VMOVDQU XWORD2, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
|
|
||||||
avxCbcSm4Done:
|
avxCbcSm4Done:
|
||||||
|
VMOVDQU X15, (SI)
|
||||||
RET
|
RET
|
||||||
|
|
||||||
avx2Start:
|
avx2Start:
|
||||||
@ -425,6 +414,8 @@ avx2Start:
|
|||||||
VBROADCASTI128 flip_mask<>(SB), BYTE_FLIP_MASK
|
VBROADCASTI128 flip_mask<>(SB), BYTE_FLIP_MASK
|
||||||
VBROADCASTI128 bswap_mask<>(SB), BSWAP_MASK
|
VBROADCASTI128 bswap_mask<>(SB), BSWAP_MASK
|
||||||
|
|
||||||
|
VMOVDQU -16(DX), X15
|
||||||
|
|
||||||
avx2_16blocks:
|
avx2_16blocks:
|
||||||
CMPQ DI, $256
|
CMPQ DI, $256
|
||||||
JLE avx2CbcSm4Octets
|
JLE avx2CbcSm4Octets
|
||||||
@ -572,8 +563,6 @@ avx2CbCSm4Single:
|
|||||||
VMOVDQU -32(DX), XWORD2
|
VMOVDQU -32(DX), XWORD2
|
||||||
VMOVDQU -16(DX), XWORD3
|
VMOVDQU -16(DX), XWORD3
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -585,20 +574,17 @@ avx2CbCSm4Single:
|
|||||||
VMOVDQU XWORD1, -48(BX)
|
VMOVDQU XWORD1, -48(BX)
|
||||||
VMOVDQU XWORD2, -32(BX)
|
VMOVDQU XWORD2, -32(BX)
|
||||||
VMOVDQU XWORD3, -16(BX)
|
VMOVDQU XWORD3, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
JMP avx2CbcSm4Done
|
JMP avx2CbcSm4Done
|
||||||
|
|
||||||
avx2CbcSm4Single16:
|
avx2CbcSm4Single16:
|
||||||
VMOVDQU -16(DX), XWORD0
|
VMOVDQU -16(DX), XWORD0
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
|
|
||||||
VMOVDQU XWORD0, -16(BX)
|
VMOVDQU XWORD0, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
|
|
||||||
JMP avx2CbcSm4Done
|
JMP avx2CbcSm4Done
|
||||||
|
|
||||||
@ -606,8 +592,6 @@ avx2CbcSm4Single32:
|
|||||||
VMOVDQU -32(DX), XWORD0
|
VMOVDQU -32(DX), XWORD0
|
||||||
VMOVDQU -16(DX), XWORD1
|
VMOVDQU -16(DX), XWORD1
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -615,7 +599,6 @@ avx2CbcSm4Single32:
|
|||||||
|
|
||||||
VMOVDQU XWORD0, -32(BX)
|
VMOVDQU XWORD0, -32(BX)
|
||||||
VMOVDQU XWORD1, -16(BX)
|
VMOVDQU XWORD1, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
|
|
||||||
JMP avx2CbcSm4Done
|
JMP avx2CbcSm4Done
|
||||||
|
|
||||||
@ -624,8 +607,6 @@ avx2CbcSm4Single48:
|
|||||||
VMOVDQU -32(DX), XWORD1
|
VMOVDQU -32(DX), XWORD1
|
||||||
VMOVDQU -16(DX), XWORD2
|
VMOVDQU -16(DX), XWORD2
|
||||||
|
|
||||||
VMOVDQU XWORD0, XWORD4
|
|
||||||
|
|
||||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||||
|
|
||||||
VPXOR 0(SI), XWORD0, XWORD0
|
VPXOR 0(SI), XWORD0, XWORD0
|
||||||
@ -635,8 +616,8 @@ avx2CbcSm4Single48:
|
|||||||
VMOVDQU XWORD0, -48(BX)
|
VMOVDQU XWORD0, -48(BX)
|
||||||
VMOVDQU XWORD1, -32(BX)
|
VMOVDQU XWORD1, -32(BX)
|
||||||
VMOVDQU XWORD2, -16(BX)
|
VMOVDQU XWORD2, -16(BX)
|
||||||
VMOVDQU XWORD4, (SI)
|
|
||||||
|
|
||||||
avx2CbcSm4Done:
|
avx2CbcSm4Done:
|
||||||
|
VMOVDQU X15, (SI)
|
||||||
VZEROUPPER
|
VZEROUPPER
|
||||||
RET
|
RET
|
||||||
|
@ -114,6 +114,10 @@ TEXT ·decryptBlocksChain(SB),NOSPLIT,$0
|
|||||||
MOVD rk, rkSave
|
MOVD rk, rkSave
|
||||||
VLD1 (R6), [IV.B16]
|
VLD1 (R6), [IV.B16]
|
||||||
|
|
||||||
|
ADD srcPtr, srcPtrLen, R10
|
||||||
|
SUB $16, R10, R10
|
||||||
|
VLD1 (R10), [V15.S4]
|
||||||
|
|
||||||
cbcSm4Octets:
|
cbcSm4Octets:
|
||||||
CMP $128, srcPtrLen
|
CMP $128, srcPtrLen
|
||||||
BLE cbcSm4Nibbles
|
BLE cbcSm4Nibbles
|
||||||
@ -233,7 +237,6 @@ cbcSm4Single:
|
|||||||
|
|
||||||
// 4 blocks
|
// 4 blocks
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
||||||
VMOV t0.B16, t4.B16
|
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
@ -263,13 +266,11 @@ cbc4BlocksLoop64:
|
|||||||
VEOR V8.B16, t3.B16, t3.B16
|
VEOR V8.B16, t3.B16, t3.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
||||||
VST1 [t4.S4], (R6)
|
|
||||||
|
|
||||||
B cbcSm4Done
|
B cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single16:
|
cbcSm4Single16:
|
||||||
VLD1 (srcPtr), [t0.S4]
|
VLD1 (srcPtr), [t0.S4]
|
||||||
VMOV t0.B16, t4.B16
|
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VMOV t0.S[1], t1.S[0]
|
VMOV t0.S[1], t1.S[0]
|
||||||
VMOV t0.S[2], t2.S[0]
|
VMOV t0.S[2], t2.S[0]
|
||||||
@ -293,13 +294,11 @@ cbc4BlocksLoop16:
|
|||||||
VEOR IV.B16, t3.B16, t3.B16
|
VEOR IV.B16, t3.B16, t3.B16
|
||||||
|
|
||||||
VST1 [t3.S4], (dstPtr)
|
VST1 [t3.S4], (dstPtr)
|
||||||
VST1 [t4.S4], (R6)
|
|
||||||
|
|
||||||
B cbcSm4Done
|
B cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single32:
|
cbcSm4Single32:
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4]
|
||||||
VMOV t0.B16, t4.B16
|
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||||
@ -323,12 +322,10 @@ cbc4BlocksLoop32:
|
|||||||
VEOR V6.B16, t1.B16, t1.B16
|
VEOR V6.B16, t1.B16, t1.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4], (dstPtr)
|
||||||
VST1 [t4.S4], (R6)
|
|
||||||
B cbcSm4Done
|
B cbcSm4Done
|
||||||
|
|
||||||
cbcSm4Single48:
|
cbcSm4Single48:
|
||||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
||||||
VMOV t0.B16, t4.B16
|
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
VREV32 t1.B16, t1.B16
|
VREV32 t1.B16, t1.B16
|
||||||
VREV32 t2.B16, t2.B16
|
VREV32 t2.B16, t2.B16
|
||||||
@ -355,7 +352,7 @@ cbc4BlocksLoop48:
|
|||||||
VEOR V7.B16, t2.B16, t2.B16
|
VEOR V7.B16, t2.B16, t2.B16
|
||||||
|
|
||||||
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
||||||
VST1 [t4.S4], (R6)
|
|
||||||
|
|
||||||
cbcSm4Done:
|
cbcSm4Done:
|
||||||
|
VST1 [V15.S4], (R6)
|
||||||
RET
|
RET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user