mirror of
https://github.com/emmansun/gmsm.git
synced 2025-05-12 03:56:17 +08:00
sm4: cbc encryption improvement a little
This commit is contained in:
parent
8ddf1bc68f
commit
42faebb588
@ -319,17 +319,11 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
||||
MOVQ dst+8(FP), BX
|
||||
MOVQ src+16(FP), DX
|
||||
|
||||
PINSRD $0, 0(DX), t0
|
||||
PSHUFB flip_mask<>(SB), t0
|
||||
|
||||
PINSRD $0, 4(DX), t1
|
||||
PSHUFB flip_mask<>(SB), t1
|
||||
|
||||
PINSRD $0, 8(DX), t2
|
||||
PSHUFB flip_mask<>(SB), t2
|
||||
|
||||
PINSRD $0, 12(DX), t3
|
||||
PSHUFB flip_mask<>(SB), t3
|
||||
MOVUPS (DX), t0
|
||||
PSHUFB flip_mask<>(SB), t0
|
||||
PSHUFD $1, t0, t1
|
||||
PSHUFD $2, t0, t2
|
||||
PSHUFD $3, t0, t3
|
||||
|
||||
XORL CX, CX
|
||||
|
||||
@ -343,16 +337,14 @@ loop:
|
||||
CMPL CX, $4*32
|
||||
JB loop
|
||||
|
||||
PSHUFB flip_mask<>(SB), t3
|
||||
PSHUFB flip_mask<>(SB), t2
|
||||
PSHUFB flip_mask<>(SB), t1
|
||||
PSHUFB flip_mask<>(SB), t0
|
||||
MOVUPS t3, 0(BX)
|
||||
PEXTRD $0, t2, R8
|
||||
MOVL R8, 4(BX)
|
||||
PEXTRD $0, t1, R8
|
||||
MOVL R8, 8(BX)
|
||||
PEXTRD $0, t0, R8
|
||||
MOVL R8, 12(BX)
|
||||
PEXTRD $0, t2, R8
|
||||
PINSRD $1, R8, t3
|
||||
PEXTRD $0, t1, R8
|
||||
PINSRD $2, R8, t3
|
||||
PEXTRD $0, t0, R8
|
||||
PINSRD $3, R8, t3
|
||||
PSHUFB flip_mask<>(SB), t3
|
||||
MOVUPS t3, (BX)
|
||||
|
||||
done_sm4:
|
||||
RET
|
||||
|
@ -12,16 +12,32 @@ import (
|
||||
|
||||
// Assert that sm4CipherAsm implements the cbcDecAble interfaces.
|
||||
var _ cbcDecAble = (*sm4CipherAsm)(nil)
|
||||
var _ cbcDecAble = (*sm4CipherAsm)(nil)
|
||||
|
||||
const cbcEncrypt = 1
|
||||
const cbcDecrypt = 0
|
||||
|
||||
type cbc struct {
|
||||
b *sm4CipherAsm
|
||||
iv []byte
|
||||
tmp []byte
|
||||
enc int
|
||||
}
|
||||
|
||||
func (b *sm4CipherAsm) NewCBCEncrypter(iv []byte) cipher.BlockMode {
|
||||
var c cbc
|
||||
c.b = b
|
||||
c.enc = cbcEncrypt
|
||||
c.iv = make([]byte, BlockSize)
|
||||
c.tmp = make([]byte, BlockSize)
|
||||
copy(c.iv, iv)
|
||||
return &c
|
||||
}
|
||||
|
||||
func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode {
|
||||
var c cbc
|
||||
c.b = b
|
||||
c.enc = cbcDecrypt
|
||||
c.iv = make([]byte, BlockSize)
|
||||
c.tmp = make([]byte, BlockSize)
|
||||
copy(c.iv, iv)
|
||||
@ -30,6 +46,9 @@ func (b *sm4CipherAsm) NewCBCDecrypter(iv []byte) cipher.BlockMode {
|
||||
|
||||
func (x *cbc) BlockSize() int { return BlockSize }
|
||||
|
||||
//go:noescape
|
||||
func encryptBlocksChain(xk *uint32, dst, src []byte, iv *byte)
|
||||
|
||||
func (x *cbc) CryptBlocks(dst, src []byte) {
|
||||
if len(src)%BlockSize != 0 {
|
||||
panic("cipher: input not full blocks")
|
||||
@ -43,6 +62,10 @@ func (x *cbc) CryptBlocks(dst, src []byte) {
|
||||
if len(src) == 0 {
|
||||
return
|
||||
}
|
||||
if x.enc == cbcEncrypt {
|
||||
encryptBlocksChain(&x.b.enc[0], dst, src, &x.iv[0])
|
||||
return
|
||||
}
|
||||
// For each block, we need to xor the decrypted data with the previous block's ciphertext (the iv).
|
||||
// To avoid making a copy each time, we loop over the blocks BACKWARDS.
|
||||
end := len(src)
|
||||
|
87
sm4/cbc_cipher_asm_amd64.s
Normal file
87
sm4/cbc_cipher_asm_amd64.s
Normal file
@ -0,0 +1,87 @@
|
||||
//go:build amd64 && !generic
|
||||
// +build amd64,!generic
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define x X0
|
||||
#define y X1
|
||||
#define t0 X2
|
||||
#define t1 X3
|
||||
#define t2 X4
|
||||
#define t3 X5
|
||||
|
||||
#define XTMP6 X6
|
||||
#define IV X8
|
||||
|
||||
#include "aesni_amd64.h"
|
||||
|
||||
#define SM4_SINGLE_ROUND(index, RK, IND, x, y, z, t0, t1, t2, t3) \
|
||||
PINSRD $0, (index * 4)(RK)(IND*1), x; \
|
||||
PXOR t1, x; \
|
||||
PXOR t2, x; \
|
||||
PXOR t3, x; \
|
||||
SM4_TAO_L1(x, y, z); \
|
||||
PXOR x, t0
|
||||
|
||||
// func encryptBlocksChain(xk *uint32, dst, src []byte, iv *byte)
|
||||
TEXT ·encryptBlocksChain(SB),NOSPLIT,$0
|
||||
#define ctx BX
|
||||
#define ptx DX
|
||||
#define ptxLen DI
|
||||
|
||||
MOVQ xk+0(FP), AX
|
||||
MOVQ dst+8(FP), ctx
|
||||
MOVQ src+32(FP), ptx
|
||||
MOVQ src_len+40(FP), ptxLen
|
||||
MOVQ iv+56(FP), SI
|
||||
|
||||
MOVUPS (SI), IV
|
||||
|
||||
loopSrc:
|
||||
CMPQ ptxLen, $16
|
||||
JB done_sm4
|
||||
SUBQ $16, ptxLen
|
||||
|
||||
MOVUPS (ptx), t0
|
||||
PXOR IV, t0
|
||||
|
||||
PSHUFB flip_mask<>(SB), t0
|
||||
PSHUFD $1, t0, t1
|
||||
PSHUFD $2, t0, t2
|
||||
PSHUFD $3, t0, t3
|
||||
|
||||
XORL CX, CX
|
||||
|
||||
loopRound:
|
||||
SM4_SINGLE_ROUND(0, AX, CX, x, y, XTMP6, t0, t1, t2, t3)
|
||||
SM4_SINGLE_ROUND(1, AX, CX, x, y, XTMP6, t1, t2, t3, t0)
|
||||
SM4_SINGLE_ROUND(2, AX, CX, x, y, XTMP6, t2, t3, t0, t1)
|
||||
SM4_SINGLE_ROUND(3, AX, CX, x, y, XTMP6, t3, t0, t1, t2)
|
||||
|
||||
ADDL $16, CX
|
||||
CMPL CX, $4*32
|
||||
JB loopRound
|
||||
|
||||
PEXTRD $0, t2, R8
|
||||
PINSRD $1, R8, t3
|
||||
PEXTRD $0, t1, R8
|
||||
PINSRD $2, R8, t3
|
||||
PEXTRD $0, t0, R8
|
||||
PINSRD $3, R8, t3
|
||||
PSHUFB flip_mask<>(SB), t3
|
||||
|
||||
MOVOU t3, IV
|
||||
MOVUPS t3, (ctx)
|
||||
|
||||
LEAQ 16(ptx), ptx
|
||||
LEAQ 16(ctx), ctx
|
||||
|
||||
JMP loopSrc
|
||||
|
||||
done_sm4:
|
||||
MOVUPS IV, (SI)
|
||||
RET
|
||||
|
||||
#undef ctx
|
||||
#undef ptx
|
||||
#undef ptxLen
|
131
sm4/cbc_cipher_asm_arm64.s
Normal file
131
sm4/cbc_cipher_asm_arm64.s
Normal file
@ -0,0 +1,131 @@
|
||||
//go:build arm64 && !generic
|
||||
// +build arm64,!generic
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
#define x V0
|
||||
#define y V1
|
||||
#define t0 V2
|
||||
#define t1 V3
|
||||
#define t2 V4
|
||||
#define t3 V5
|
||||
#define ZERO V16
|
||||
#define NIBBLE_MASK V20
|
||||
#define INVERSE_SHIFT_ROWS V21
|
||||
#define M1L V22
|
||||
#define M1H V23
|
||||
#define M2L V24
|
||||
#define M2H V25
|
||||
#define R08_MASK V26
|
||||
#define R16_MASK V27
|
||||
#define R24_MASK V28
|
||||
#define FK_MASK V29
|
||||
#define XTMP6 V6
|
||||
#define IV V7
|
||||
|
||||
#include "aesni_arm64.h"
|
||||
|
||||
#define SM4_ROUND(RK, x, y, z, t0, t1, t2, t3) \
|
||||
MOVW.P 4(RK), R19; \
|
||||
VMOV R19, x.S4; \
|
||||
VEOR t1.B16, x.B16, x.B16; \
|
||||
VEOR t2.B16, x.B16, x.B16; \
|
||||
VEOR t3.B16, x.B16, x.B16; \
|
||||
SM4_TAO_L1(x, y, z); \
|
||||
VEOR x.B16, t0.B16, t0.B16
|
||||
|
||||
#define load_global_data_1() \
|
||||
LDP nibble_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, NIBBLE_MASK.D[0] \
|
||||
VMOV R1, NIBBLE_MASK.D[1] \
|
||||
LDP m1_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M1L.D[0] \
|
||||
VMOV R1, M1L.D[1] \
|
||||
LDP m1_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M1H.D[0] \
|
||||
VMOV R1, M1H.D[1] \
|
||||
LDP m2_low<>(SB), (R0, R1) \
|
||||
VMOV R0, M2L.D[0] \
|
||||
VMOV R1, M2L.D[1] \
|
||||
LDP m2_high<>(SB), (R0, R1) \
|
||||
VMOV R0, M2H.D[0] \
|
||||
VMOV R1, M2H.D[1] \
|
||||
LDP fk_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, FK_MASK.D[0] \
|
||||
VMOV R1, FK_MASK.D[1] \
|
||||
LDP inverse_shift_rows<>(SB), (R0, R1) \
|
||||
VMOV R0, INVERSE_SHIFT_ROWS.D[0] \
|
||||
VMOV R1, INVERSE_SHIFT_ROWS.D[1]
|
||||
|
||||
#define load_global_data_2() \
|
||||
load_global_data_1() \
|
||||
LDP r08_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R08_MASK.D[0] \
|
||||
VMOV R1, R08_MASK.D[1] \
|
||||
LDP r16_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R16_MASK.D[0] \
|
||||
VMOV R1, R16_MASK.D[1] \
|
||||
LDP r24_mask<>(SB), (R0, R1) \
|
||||
VMOV R0, R24_MASK.D[0] \
|
||||
VMOV R1, R24_MASK.D[1]
|
||||
|
||||
// func encryptBlocksChain(xk *uint32, dst, src []byte, iv *byte)
|
||||
TEXT ·encryptBlocksChain(SB),NOSPLIT,$0
|
||||
#define ctx R1
|
||||
#define ptx R3
|
||||
#define ptxLen R4
|
||||
#define rkSave R8
|
||||
|
||||
load_global_data_2()
|
||||
|
||||
MOVD xk+0(FP), rkSave
|
||||
MOVD dst+8(FP), ctx
|
||||
MOVD src+32(FP), ptx
|
||||
MOVD src_len+40(FP), ptxLen
|
||||
MOVD iv+56(FP), R5
|
||||
|
||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||
|
||||
loopSrc:
|
||||
CMP $16, ptxLen
|
||||
BLT done_sm4
|
||||
SUB $16, ptxLen
|
||||
|
||||
VLD1.P (ptx), [t0.S4]
|
||||
VEOR IV.B16, t0.B16, t0.B16
|
||||
VREV32 t0.B16, t0.B16
|
||||
VMOV t0.S[1], t1.S[0]
|
||||
VMOV t0.S[2], t2.S[0]
|
||||
VMOV t0.S[3], t3.S[0]
|
||||
|
||||
|
||||
EOR R2, R2
|
||||
MOVD rkSave, R0
|
||||
|
||||
encryptBlockLoop:
|
||||
SM4_ROUND(R0, x, y, XTMP6, t0, t1, t2, t3)
|
||||
SM4_ROUND(R0, x, y, XTMP6, t1, t2, t3, t0)
|
||||
SM4_ROUND(R0, x, y, XTMP6, t2, t3, t0, t1)
|
||||
SM4_ROUND(R0, x, y, XTMP6, t3, t0, t1, t2)
|
||||
|
||||
ADD $16, R2
|
||||
CMP $128, R2
|
||||
BNE encryptBlockLoop
|
||||
|
||||
VMOV t2.S[0], t3.S[1]
|
||||
VMOV t1.S[0], t3.S[2]
|
||||
VMOV t0.S[0], t3.S[3]
|
||||
VREV32 t3.B16, t3.B16
|
||||
|
||||
VST1.P [t3.B16], (ctx)
|
||||
VMOV t3.B16, IV.B16
|
||||
|
||||
B loopSrc
|
||||
done_sm4:
|
||||
VST1 [IV.B16], (R5)
|
||||
RET
|
||||
|
||||
#undef ctx
|
||||
#undef ptx
|
||||
#undef ptxLen
|
||||
#undef rkSave
|
Loading…
x
Reference in New Issue
Block a user