mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-22 10:16:18 +08:00
sm4: optimize cbc iv handling
This commit is contained in:
parent
cb47e82478
commit
0fbc30f868
@ -211,6 +211,8 @@ cbCSm4Single:
|
||||
MOVOU -32(DX), XWORD2
|
||||
MOVOU -16(DX), XWORD3
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -222,22 +224,28 @@ cbCSm4Single:
|
||||
MOVUPS XWORD1, -48(BX)
|
||||
MOVUPS XWORD2, -32(BX)
|
||||
MOVUPS XWORD3, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single16:
|
||||
MOVOU -16(DX), XWORD0
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
|
||||
MOVUPS XWORD0, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single32:
|
||||
MOVOU -32(DX), XWORD0
|
||||
MOVOU -16(DX), XWORD1
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -245,6 +253,7 @@ cbcSm4Single32:
|
||||
|
||||
MOVUPS XWORD0, -32(BX)
|
||||
MOVUPS XWORD1, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
JMP cbcSm4Done
|
||||
|
||||
cbcSm4Single48:
|
||||
@ -252,6 +261,8 @@ cbcSm4Single48:
|
||||
MOVOU -32(DX), XWORD1
|
||||
MOVOU -16(DX), XWORD2
|
||||
|
||||
MOVOU XWORD0, XWORD4
|
||||
|
||||
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
PXOR 0(SI), XWORD0
|
||||
@ -261,6 +272,7 @@ cbcSm4Single48:
|
||||
MOVUPS XWORD0, -48(BX)
|
||||
MOVUPS XWORD1, -32(BX)
|
||||
MOVUPS XWORD2, -16(BX)
|
||||
MOVUPS XWORD4, (SI)
|
||||
|
||||
cbcSm4Done:
|
||||
RET
|
||||
@ -342,6 +354,8 @@ avxCbCSm4Single:
|
||||
VMOVDQU -32(DX), XWORD2
|
||||
VMOVDQU -16(DX), XWORD3
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -353,22 +367,28 @@ avxCbCSm4Single:
|
||||
VMOVDQU XWORD1, -48(BX)
|
||||
VMOVDQU XWORD2, -32(BX)
|
||||
VMOVDQU XWORD3, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single16:
|
||||
VMOVDQU -16(DX), XWORD0
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
|
||||
VMOVDQU XWORD0, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single32:
|
||||
VMOVDQU -32(DX), XWORD0
|
||||
VMOVDQU -16(DX), XWORD1
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -376,6 +396,7 @@ avxCbcSm4Single32:
|
||||
|
||||
VMOVDQU XWORD0, -32(BX)
|
||||
VMOVDQU XWORD1, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
JMP avxCbcSm4Done
|
||||
|
||||
avxCbcSm4Single48:
|
||||
@ -383,6 +404,8 @@ avxCbcSm4Single48:
|
||||
VMOVDQU -32(DX), XWORD1
|
||||
VMOVDQU -16(DX), XWORD2
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -392,6 +415,7 @@ avxCbcSm4Single48:
|
||||
VMOVDQU XWORD0, -48(BX)
|
||||
VMOVDQU XWORD1, -32(BX)
|
||||
VMOVDQU XWORD2, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
avxCbcSm4Done:
|
||||
RET
|
||||
@ -548,6 +572,8 @@ avx2CbCSm4Single:
|
||||
VMOVDQU -32(DX), XWORD2
|
||||
VMOVDQU -16(DX), XWORD3
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -559,22 +585,29 @@ avx2CbCSm4Single:
|
||||
VMOVDQU XWORD1, -48(BX)
|
||||
VMOVDQU XWORD2, -32(BX)
|
||||
VMOVDQU XWORD3, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
avx2CbcSm4Single16:
|
||||
VMOVDQU -16(DX), XWORD0
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
|
||||
VMOVDQU XWORD0, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
avx2CbcSm4Single32:
|
||||
VMOVDQU -32(DX), XWORD0
|
||||
VMOVDQU -16(DX), XWORD1
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -582,6 +615,8 @@ avx2CbcSm4Single32:
|
||||
|
||||
VMOVDQU XWORD0, -32(BX)
|
||||
VMOVDQU XWORD1, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
JMP avx2CbcSm4Done
|
||||
|
||||
avx2CbcSm4Single48:
|
||||
@ -589,6 +624,8 @@ avx2CbcSm4Single48:
|
||||
VMOVDQU -32(DX), XWORD1
|
||||
VMOVDQU -16(DX), XWORD2
|
||||
|
||||
VMOVDQU XWORD0, XWORD4
|
||||
|
||||
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
|
||||
|
||||
VPXOR 0(SI), XWORD0, XWORD0
|
||||
@ -598,6 +635,7 @@ avx2CbcSm4Single48:
|
||||
VMOVDQU XWORD0, -48(BX)
|
||||
VMOVDQU XWORD1, -32(BX)
|
||||
VMOVDQU XWORD2, -16(BX)
|
||||
VMOVDQU XWORD4, (SI)
|
||||
|
||||
avx2CbcSm4Done:
|
||||
VZEROUPPER
|
||||
|
@ -233,6 +233,7 @@ cbcSm4Single:
|
||||
|
||||
// 4 blocks
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
@ -262,11 +263,13 @@ cbc4BlocksLoop64:
|
||||
VEOR V8.B16, t3.B16, t3.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single16:
|
||||
VLD1 (srcPtr), [t0.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
@ -290,11 +293,13 @@ cbc4BlocksLoop16:
|
||||
VEOR IV.B16, t3.B16, t3.B16
|
||||
|
||||
VST1 [t3.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single32:
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||
@ -318,10 +323,12 @@ cbc4BlocksLoop32:
|
||||
VEOR V6.B16, t1.B16, t1.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
B cbcSm4Done
|
||||
|
||||
cbcSm4Single48:
|
||||
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
|
||||
VMOV t0.B16, t4.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VREV32 t1.B16, t1.B16
|
||||
VREV32 t2.B16, t2.B16
|
||||
@ -348,6 +355,7 @@ cbc4BlocksLoop48:
|
||||
VEOR V7.B16, t2.B16, t2.B16
|
||||
|
||||
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
|
||||
VST1 [t4.S4], (R6)
|
||||
|
||||
cbcSm4Done:
|
||||
RET
|
||||
|
@ -19,7 +19,6 @@ const cbcDecrypt = 0
|
||||
type cbc struct {
|
||||
b *sm4CipherAsm
|
||||
iv []byte
|
||||
tmp []byte
|
||||
enc int
|
||||
}
|
||||
|
||||
@ -28,7 +27,6 @@ func (b *sm4CipherAsm) NewCBCEncrypter(iv []byte) cipher.BlockMode {
|
||||
c.b = b
|
||||
c.enc = cbcEncrypt
|
||||
c.iv = make([]byte, BlockSize)
|
||||
c.tmp = make([]byte, BlockSize)
|
||||
copy(c.iv, iv)
|
||||
return &c
|
||||
}
|
||||
@ -38,7 +36,6 @@ func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode {
|
||||
c.b = b
|
||||
c.enc = cbcDecrypt
|
||||
c.iv = make([]byte, BlockSize)
|
||||
c.tmp = make([]byte, BlockSize)
|
||||
copy(c.iv, iv)
|
||||
return &c
|
||||
}
|
||||
@ -68,16 +65,8 @@ func (x *cbc) CryptBlocks(dst, src []byte) {
|
||||
encryptBlocksChain(&x.b.enc[0], dst, src, &x.iv[0])
|
||||
return
|
||||
}
|
||||
// For each block, we need to xor the decrypted data with the previous block's ciphertext (the iv).
|
||||
// To avoid making a copy each time, we loop over the blocks BACKWARDS.
|
||||
end := len(src)
|
||||
// Copy the last block of ciphertext in preparation as the new iv.
|
||||
copy(x.tmp, src[end-BlockSize:end])
|
||||
|
||||
decryptBlocksChain(&x.b.dec[0], dst, src, &x.iv[0])
|
||||
|
||||
// Set the new iv to the first block we copied earlier.
|
||||
x.iv, x.tmp = x.tmp, x.iv
|
||||
}
|
||||
|
||||
func (x *cbc) SetIV(iv []byte) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user