sm4: optimize cbc iv handling

This commit is contained in:
Sun Yimin 2023-08-08 12:35:42 +08:00 committed by GitHub
parent cb47e82478
commit 0fbc30f868
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 46 additions and 11 deletions

View File

@ -211,6 +211,8 @@ cbCSm4Single:
MOVOU -32(DX), XWORD2
MOVOU -16(DX), XWORD3
MOVOU XWORD0, XWORD4
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
PXOR 0(SI), XWORD0
@ -222,22 +224,28 @@ cbCSm4Single:
MOVUPS XWORD1, -48(BX)
MOVUPS XWORD2, -32(BX)
MOVUPS XWORD3, -16(BX)
MOVUPS XWORD4, (SI)
JMP cbcSm4Done
cbcSm4Single16:
MOVOU -16(DX), XWORD0
MOVOU XWORD0, XWORD4
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
PXOR 0(SI), XWORD0
MOVUPS XWORD0, -16(BX)
MOVUPS XWORD4, (SI)
JMP cbcSm4Done
cbcSm4Single32:
MOVOU -32(DX), XWORD0
MOVOU -16(DX), XWORD1
MOVOU XWORD0, XWORD4
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
PXOR 0(SI), XWORD0
@ -245,6 +253,7 @@ cbcSm4Single32:
MOVUPS XWORD0, -32(BX)
MOVUPS XWORD1, -16(BX)
MOVUPS XWORD4, (SI)
JMP cbcSm4Done
cbcSm4Single48:
@ -252,6 +261,8 @@ cbcSm4Single48:
MOVOU -32(DX), XWORD1
MOVOU -16(DX), XWORD2
MOVOU XWORD0, XWORD4
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
PXOR 0(SI), XWORD0
@ -261,6 +272,7 @@ cbcSm4Single48:
MOVUPS XWORD0, -48(BX)
MOVUPS XWORD1, -32(BX)
MOVUPS XWORD2, -16(BX)
MOVUPS XWORD4, (SI)
cbcSm4Done:
RET
@ -342,6 +354,8 @@ avxCbCSm4Single:
VMOVDQU -32(DX), XWORD2
VMOVDQU -16(DX), XWORD3
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -353,22 +367,28 @@ avxCbCSm4Single:
VMOVDQU XWORD1, -48(BX)
VMOVDQU XWORD2, -32(BX)
VMOVDQU XWORD3, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avxCbcSm4Done
avxCbcSm4Single16:
VMOVDQU -16(DX), XWORD0
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
VMOVDQU XWORD0, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avxCbcSm4Done
avxCbcSm4Single32:
VMOVDQU -32(DX), XWORD0
VMOVDQU -16(DX), XWORD1
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -376,6 +396,7 @@ avxCbcSm4Single32:
VMOVDQU XWORD0, -32(BX)
VMOVDQU XWORD1, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avxCbcSm4Done
avxCbcSm4Single48:
@ -383,6 +404,8 @@ avxCbcSm4Single48:
VMOVDQU -32(DX), XWORD1
VMOVDQU -16(DX), XWORD2
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -392,6 +415,7 @@ avxCbcSm4Single48:
VMOVDQU XWORD0, -48(BX)
VMOVDQU XWORD1, -32(BX)
VMOVDQU XWORD2, -16(BX)
VMOVDQU XWORD4, (SI)
avxCbcSm4Done:
RET
@ -548,6 +572,8 @@ avx2CbCSm4Single:
VMOVDQU -32(DX), XWORD2
VMOVDQU -16(DX), XWORD3
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -559,22 +585,29 @@ avx2CbCSm4Single:
VMOVDQU XWORD1, -48(BX)
VMOVDQU XWORD2, -32(BX)
VMOVDQU XWORD3, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avx2CbcSm4Done
avx2CbcSm4Single16:
VMOVDQU -16(DX), XWORD0
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
VMOVDQU XWORD0, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avx2CbcSm4Done
avx2CbcSm4Single32:
VMOVDQU -32(DX), XWORD0
VMOVDQU -16(DX), XWORD1
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -582,6 +615,8 @@ avx2CbcSm4Single32:
VMOVDQU XWORD0, -32(BX)
VMOVDQU XWORD1, -16(BX)
VMOVDQU XWORD4, (SI)
JMP avx2CbcSm4Done
avx2CbcSm4Single48:
@ -589,6 +624,8 @@ avx2CbcSm4Single48:
VMOVDQU -32(DX), XWORD1
VMOVDQU -16(DX), XWORD2
VMOVDQU XWORD0, XWORD4
AVX_SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
VPXOR 0(SI), XWORD0, XWORD0
@ -598,6 +635,7 @@ avx2CbcSm4Single48:
VMOVDQU XWORD0, -48(BX)
VMOVDQU XWORD1, -32(BX)
VMOVDQU XWORD2, -16(BX)
VMOVDQU XWORD4, (SI)
avx2CbcSm4Done:
VZEROUPPER

View File

@ -233,6 +233,7 @@ cbcSm4Single:
// 4 blocks
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4, t3.S4]
VMOV t0.B16, t4.B16
VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16
VREV32 t2.B16, t2.B16
@ -262,11 +263,13 @@ cbc4BlocksLoop64:
VEOR V8.B16, t3.B16, t3.B16
VST1 [t0.S4, t1.S4, t2.S4, t3.S4], (dstPtr)
VST1 [t4.S4], (R6)
B cbcSm4Done
cbcSm4Single16:
VLD1 (srcPtr), [t0.S4]
VMOV t0.B16, t4.B16
VREV32 t0.B16, t0.B16
VMOV t0.S[1], t1.S[0]
VMOV t0.S[2], t2.S[0]
@ -290,11 +293,13 @@ cbc4BlocksLoop16:
VEOR IV.B16, t3.B16, t3.B16
VST1 [t3.S4], (dstPtr)
VST1 [t4.S4], (R6)
B cbcSm4Done
cbcSm4Single32:
VLD1 (srcPtr), [t0.S4, t1.S4]
VMOV t0.B16, t4.B16
VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
@ -318,10 +323,12 @@ cbc4BlocksLoop32:
VEOR V6.B16, t1.B16, t1.B16
VST1 [t0.S4, t1.S4], (dstPtr)
VST1 [t4.S4], (R6)
B cbcSm4Done
cbcSm4Single48:
VLD1 (srcPtr), [t0.S4, t1.S4, t2.S4]
VMOV t0.B16, t4.B16
VREV32 t0.B16, t0.B16
VREV32 t1.B16, t1.B16
VREV32 t2.B16, t2.B16
@ -348,6 +355,7 @@ cbc4BlocksLoop48:
VEOR V7.B16, t2.B16, t2.B16
VST1 [t0.S4, t1.S4, t2.S4], (dstPtr)
VST1 [t4.S4], (R6)
cbcSm4Done:
RET

View File

@ -19,7 +19,6 @@ const cbcDecrypt = 0
type cbc struct {
b *sm4CipherAsm
iv []byte
tmp []byte
enc int
}
@ -28,7 +27,6 @@ func (b *sm4CipherAsm) NewCBCEncrypter(iv []byte) cipher.BlockMode {
c.b = b
c.enc = cbcEncrypt
c.iv = make([]byte, BlockSize)
c.tmp = make([]byte, BlockSize)
copy(c.iv, iv)
return &c
}
@ -38,7 +36,6 @@ func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode {
c.b = b
c.enc = cbcDecrypt
c.iv = make([]byte, BlockSize)
c.tmp = make([]byte, BlockSize)
copy(c.iv, iv)
return &c
}
@ -68,16 +65,8 @@ func (x *cbc) CryptBlocks(dst, src []byte) {
encryptBlocksChain(&x.b.enc[0], dst, src, &x.iv[0])
return
}
// For each block, we need to xor the decrypted data with the previous block's ciphertext (the iv).
// To avoid making a copy each time, we loop over the blocks BACKWARDS.
end := len(src)
// Copy the last block of ciphertext in preparation as the new iv.
copy(x.tmp, src[end-BlockSize:end])
decryptBlocksChain(&x.b.dec[0], dst, src, &x.iv[0])
// Set the new iv to the first block we copied earlier.
x.iv, x.tmp = x.tmp, x.iv
}
func (x *cbc) SetIV(iv []byte) {