mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-29 05:36:17 +08:00
refactoring and fix bug
This commit is contained in:
parent
0beb90aff8
commit
4c131bdeb6
@ -59,6 +59,75 @@
|
|||||||
VPMULL T0.D1, T2.D1, T3.Q1 \
|
VPMULL T0.D1, T2.D1, T3.Q1 \
|
||||||
VEOR T3.B16, ACCM.B16, ACCM.B16
|
VEOR T3.B16, ACCM.B16, ACCM.B16
|
||||||
|
|
||||||
|
#define sm4eEnc1block() \
|
||||||
|
WORD 0x6086c0ce \ //SM4E V0.4S, V19.4S
|
||||||
|
WORD 0x8086c0ce \ //SM4E V0.4S, V20.4S
|
||||||
|
WORD 0xa086c0ce \ //SM4E V0.4S, V21.4S
|
||||||
|
WORD 0xc086c0ce \ //SM4E V0.4S, V22.4S
|
||||||
|
WORD 0xe086c0ce \ //SM4E V0.4S, V23.4S
|
||||||
|
WORD 0x0087c0ce \ //SM4E V0.4S, V24.4S
|
||||||
|
WORD 0x2087c0ce \ //SM4E V0.4S, V25.4S
|
||||||
|
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
||||||
|
|
||||||
|
#define sm4eEnc8blocks() \
|
||||||
|
sm4eEnc1block() \
|
||||||
|
WORD 0x6186c0ce \ //SM4E V1.4S, V19.4S
|
||||||
|
WORD 0x8186c0ce \ //SM4E V1.4S, V20.4S
|
||||||
|
WORD 0xa186c0ce \ //SM4E V1.4S, V21.4S
|
||||||
|
WORD 0xc186c0ce \ //SM4E V1.4S, V22.4S
|
||||||
|
WORD 0xe186c0ce \ //SM4E V1.4S, V23.4S
|
||||||
|
WORD 0x0187c0ce \ //SM4E V1.4S, V24.4S
|
||||||
|
WORD 0x2187c0ce \ //SM4E V1.4S, V25.4S
|
||||||
|
WORD 0x4187c0ce \ //SM4E V1.4S, V26.4S
|
||||||
|
WORD 0x6286c0ce \ //SM4E V2.4S, V19.4S
|
||||||
|
WORD 0x8286c0ce \ //SM4E V2.4S, V20.4S
|
||||||
|
WORD 0xa286c0ce \ //SM4E V2.4S, V21.4S
|
||||||
|
WORD 0xc286c0ce \ //SM4E V2.4S, V22.4S
|
||||||
|
WORD 0xe286c0ce \ //SM4E V2.4S, V23.4S
|
||||||
|
WORD 0x0287c0ce \ //SM4E V2.4S, V24.4S
|
||||||
|
WORD 0x2287c0ce \ //SM4E V2.4S, V25.4S
|
||||||
|
WORD 0x4287c0ce \ //SM4E V2.4S, V26.4S
|
||||||
|
WORD 0x6386c0ce \ //SM4E V3.4S, V19.4S
|
||||||
|
WORD 0x8386c0ce \ //SM4E V3.4S, V20.4S
|
||||||
|
WORD 0xa386c0ce \ //SM4E V3.4S, V21.4S
|
||||||
|
WORD 0xc386c0ce \ //SM4E V3.4S, V22.4S
|
||||||
|
WORD 0xe386c0ce \ //SM4E V3.4S, V23.4S
|
||||||
|
WORD 0x0387c0ce \ //SM4E V3.4S, V24.4S
|
||||||
|
WORD 0x2387c0ce \ //SM4E V3.4S, V25.4S
|
||||||
|
WORD 0x4387c0ce \ //SM4E V3.4S, V26.4S
|
||||||
|
WORD 0x6486c0ce \ //SM4E V4.4S, V19.4S
|
||||||
|
WORD 0x8486c0ce \ //SM4E V4.4S, V20.4S
|
||||||
|
WORD 0xa486c0ce \ //SM4E V4.4S, V21.4S
|
||||||
|
WORD 0xc486c0ce \ //SM4E V4.4S, V22.4S
|
||||||
|
WORD 0xe486c0ce \ //SM4E V4.4S, V23.4S
|
||||||
|
WORD 0x0487c0ce \ //SM4E V4.4S, V24.4S
|
||||||
|
WORD 0x2487c0ce \ //SM4E V4.4S, V25.4S
|
||||||
|
WORD 0x4487c0ce \ //SM4E V4.4S, V26.4S
|
||||||
|
WORD 0x6586c0ce \ //SM4E V5.4S, V19.4S
|
||||||
|
WORD 0x8586c0ce \ //SM4E V5.4S, V20.4S
|
||||||
|
WORD 0xa586c0ce \ //SM4E V5.4S, V21.4S
|
||||||
|
WORD 0xc586c0ce \ //SM4E V5.4S, V22.4S
|
||||||
|
WORD 0xe586c0ce \ //SM4E V5.4S, V23.4S
|
||||||
|
WORD 0x0587c0ce \ //SM4E V5.4S, V24.4S
|
||||||
|
WORD 0x2587c0ce \ //SM4E V5.4S, V25.4S
|
||||||
|
WORD 0x4587c0ce \ //SM4E V5.4S, V26.4S
|
||||||
|
WORD 0x6686c0ce \ //SM4E V6.4S, V19.4S
|
||||||
|
WORD 0x8686c0ce \ //SM4E V6.4S, V20.4S
|
||||||
|
WORD 0xa686c0ce \ //SM4E V6.4S, V21.4S
|
||||||
|
WORD 0xc686c0ce \ //SM4E V6.4S, V22.4S
|
||||||
|
WORD 0xe686c0ce \ //SM4E V6.4S, V23.4S
|
||||||
|
WORD 0x0687c0ce \ //SM4E V6.4S, V24.4S
|
||||||
|
WORD 0x2687c0ce \ //SM4E V6.4S, V25.4S
|
||||||
|
WORD 0x4687c0ce \ //SM4E V6.4S, V26.4S
|
||||||
|
WORD 0x6786c0ce \ //SM4E V7.4S, V19.4S
|
||||||
|
WORD 0x8786c0ce \ //SM4E V7.4S, V20.4S
|
||||||
|
WORD 0xa786c0ce \ //SM4E V7.4S, V21.4S
|
||||||
|
WORD 0xc786c0ce \ //SM4E V7.4S, V22.4S
|
||||||
|
WORD 0xe786c0ce \ //SM4E V7.4S, V23.4S
|
||||||
|
WORD 0x0787c0ce \ //SM4E V7.4S, V24.4S
|
||||||
|
WORD 0x2787c0ce \ //SM4E V7.4S, V25.4S
|
||||||
|
WORD 0x4787c0ce //SM4E V7.4S, V26.4S
|
||||||
|
|
||||||
// func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
|
// func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
|
||||||
TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
|
TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
|
||||||
#define pTbl R0
|
#define pTbl R0
|
||||||
@ -123,70 +192,7 @@ octetsLoop:
|
|||||||
VADD B6.S4, INC.S4, B7.S4
|
VADD B6.S4, INC.S4, B7.S4
|
||||||
VADD B7.S4, INC.S4, CTR.S4
|
VADD B7.S4, INC.S4, CTR.S4
|
||||||
|
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
sm4eEnc8blocks()
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
WORD 0x6186c0ce //SM4E V1.4S, V19.4S
|
|
||||||
WORD 0x8186c0ce //SM4E V1.4S, V20.4S
|
|
||||||
WORD 0xa186c0ce //SM4E V1.4S, V21.4S
|
|
||||||
WORD 0xc186c0ce //SM4E V1.4S, V22.4S
|
|
||||||
WORD 0xe186c0ce //SM4E V1.4S, V23.4S
|
|
||||||
WORD 0x0187c0ce //SM4E V1.4S, V24.4S
|
|
||||||
WORD 0x2187c0ce //SM4E V1.4S, V25.4S
|
|
||||||
WORD 0x4187c0ce //SM4E V1.4S, V26.4S
|
|
||||||
WORD 0x6286c0ce //SM4E V2.4S, V19.4S
|
|
||||||
WORD 0x8286c0ce //SM4E V2.4S, V20.4S
|
|
||||||
WORD 0xa286c0ce //SM4E V2.4S, V21.4S
|
|
||||||
WORD 0xc286c0ce //SM4E V2.4S, V22.4S
|
|
||||||
WORD 0xe286c0ce //SM4E V2.4S, V23.4S
|
|
||||||
WORD 0x0287c0ce //SM4E V2.4S, V24.4S
|
|
||||||
WORD 0x2287c0ce //SM4E V2.4S, V25.4S
|
|
||||||
WORD 0x4287c0ce //SM4E V2.4S, V26.4S
|
|
||||||
WORD 0x6386c0ce //SM4E V3.4S, V19.4S
|
|
||||||
WORD 0x8386c0ce //SM4E V3.4S, V20.4S
|
|
||||||
WORD 0xa386c0ce //SM4E V3.4S, V21.4S
|
|
||||||
WORD 0xc386c0ce //SM4E V3.4S, V22.4S
|
|
||||||
WORD 0xe386c0ce //SM4E V3.4S, V23.4S
|
|
||||||
WORD 0x0387c0ce //SM4E V3.4S, V24.4S
|
|
||||||
WORD 0x2387c0ce //SM4E V3.4S, V25.4S
|
|
||||||
WORD 0x4387c0ce //SM4E V3.4S, V26.4S
|
|
||||||
WORD 0x6486c0ce //SM4E V4.4S, V19.4S
|
|
||||||
WORD 0x8486c0ce //SM4E V4.4S, V20.4S
|
|
||||||
WORD 0xa486c0ce //SM4E V4.4S, V21.4S
|
|
||||||
WORD 0xc486c0ce //SM4E V4.4S, V22.4S
|
|
||||||
WORD 0xe486c0ce //SM4E V4.4S, V23.4S
|
|
||||||
WORD 0x0487c0ce //SM4E V4.4S, V24.4S
|
|
||||||
WORD 0x2487c0ce //SM4E V4.4S, V25.4S
|
|
||||||
WORD 0x4487c0ce //SM4E V4.4S, V26.4S
|
|
||||||
WORD 0x6586c0ce //SM4E V5.4S, V19.4S
|
|
||||||
WORD 0x8586c0ce //SM4E V5.4S, V20.4S
|
|
||||||
WORD 0xa586c0ce //SM4E V5.4S, V21.4S
|
|
||||||
WORD 0xc586c0ce //SM4E V5.4S, V22.4S
|
|
||||||
WORD 0xe586c0ce //SM4E V5.4S, V23.4S
|
|
||||||
WORD 0x0587c0ce //SM4E V5.4S, V24.4S
|
|
||||||
WORD 0x2587c0ce //SM4E V5.4S, V25.4S
|
|
||||||
WORD 0x4587c0ce //SM4E V5.4S, V26.4S
|
|
||||||
WORD 0x6686c0ce //SM4E V6.4S, V19.4S
|
|
||||||
WORD 0x8686c0ce //SM4E V6.4S, V20.4S
|
|
||||||
WORD 0xa686c0ce //SM4E V6.4S, V21.4S
|
|
||||||
WORD 0xc686c0ce //SM4E V6.4S, V22.4S
|
|
||||||
WORD 0xe686c0ce //SM4E V6.4S, V23.4S
|
|
||||||
WORD 0x0687c0ce //SM4E V6.4S, V24.4S
|
|
||||||
WORD 0x2687c0ce //SM4E V6.4S, V25.4S
|
|
||||||
WORD 0x4687c0ce //SM4E V6.4S, V26.4S
|
|
||||||
WORD 0x6786c0ce //SM4E V7.4S, V19.4S
|
|
||||||
WORD 0x8786c0ce //SM4E V7.4S, V20.4S
|
|
||||||
WORD 0xa786c0ce //SM4E V7.4S, V21.4S
|
|
||||||
WORD 0xc786c0ce //SM4E V7.4S, V22.4S
|
|
||||||
WORD 0xe786c0ce //SM4E V7.4S, V23.4S
|
|
||||||
WORD 0x0787c0ce //SM4E V7.4S, V24.4S
|
|
||||||
WORD 0x2787c0ce //SM4E V7.4S, V25.4S
|
|
||||||
WORD 0x4787c0ce //SM4E V7.4S, V26.4S
|
|
||||||
VREV32 B0.B16, B0.B16
|
VREV32 B0.B16, B0.B16
|
||||||
VREV32 B1.B16, B1.B16
|
VREV32 B1.B16, B1.B16
|
||||||
VREV32 B2.B16, B2.B16
|
VREV32 B2.B16, B2.B16
|
||||||
@ -249,15 +255,7 @@ singlesLoop:
|
|||||||
|
|
||||||
VMOV CTR.B16, B0.B16
|
VMOV CTR.B16, B0.B16
|
||||||
VADD CTR.S4, INC.S4, CTR.S4
|
VADD CTR.S4, INC.S4, CTR.S4
|
||||||
// SM4 8 rounds
|
sm4eEnc1block()
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
VREV32 B0.B16, B0.B16
|
VREV32 B0.B16, B0.B16
|
||||||
|
|
||||||
singlesLast:
|
singlesLast:
|
||||||
@ -317,15 +315,7 @@ ld1:
|
|||||||
ld0:
|
ld0:
|
||||||
MOVD ZR, srcPtrLen
|
MOVD ZR, srcPtrLen
|
||||||
VMOV CTR.B16, B0.B16
|
VMOV CTR.B16, B0.B16
|
||||||
// SM4 8 rounds
|
sm4eEnc1block()
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
VREV32 B0.B16, B0.B16
|
VREV32 B0.B16, B0.B16
|
||||||
|
|
||||||
tailLast:
|
tailLast:
|
||||||
@ -390,70 +380,7 @@ octetsLoop:
|
|||||||
VADD B6.S4, INC.S4, B7.S4
|
VADD B6.S4, INC.S4, B7.S4
|
||||||
VADD B7.S4, INC.S4, CTR.S4
|
VADD B7.S4, INC.S4, CTR.S4
|
||||||
|
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
sm4eEnc8blocks()
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
WORD 0x6186c0ce //SM4E V1.4S, V19.4S
|
|
||||||
WORD 0x8186c0ce //SM4E V1.4S, V20.4S
|
|
||||||
WORD 0xa186c0ce //SM4E V1.4S, V21.4S
|
|
||||||
WORD 0xc186c0ce //SM4E V1.4S, V22.4S
|
|
||||||
WORD 0xe186c0ce //SM4E V1.4S, V23.4S
|
|
||||||
WORD 0x0187c0ce //SM4E V1.4S, V24.4S
|
|
||||||
WORD 0x2187c0ce //SM4E V1.4S, V25.4S
|
|
||||||
WORD 0x4187c0ce //SM4E V1.4S, V26.4S
|
|
||||||
WORD 0x6286c0ce //SM4E V2.4S, V19.4S
|
|
||||||
WORD 0x8286c0ce //SM4E V2.4S, V20.4S
|
|
||||||
WORD 0xa286c0ce //SM4E V2.4S, V21.4S
|
|
||||||
WORD 0xc286c0ce //SM4E V2.4S, V22.4S
|
|
||||||
WORD 0xe286c0ce //SM4E V2.4S, V23.4S
|
|
||||||
WORD 0x0287c0ce //SM4E V2.4S, V24.4S
|
|
||||||
WORD 0x2287c0ce //SM4E V2.4S, V25.4S
|
|
||||||
WORD 0x4287c0ce //SM4E V2.4S, V26.4S
|
|
||||||
WORD 0x6386c0ce //SM4E V3.4S, V19.4S
|
|
||||||
WORD 0x8386c0ce //SM4E V3.4S, V20.4S
|
|
||||||
WORD 0xa386c0ce //SM4E V3.4S, V21.4S
|
|
||||||
WORD 0xc386c0ce //SM4E V3.4S, V22.4S
|
|
||||||
WORD 0xe386c0ce //SM4E V3.4S, V23.4S
|
|
||||||
WORD 0x0387c0ce //SM4E V3.4S, V24.4S
|
|
||||||
WORD 0x2387c0ce //SM4E V3.4S, V25.4S
|
|
||||||
WORD 0x4387c0ce //SM4E V3.4S, V26.4S
|
|
||||||
WORD 0x6486c0ce //SM4E V4.4S, V19.4S
|
|
||||||
WORD 0x8486c0ce //SM4E V4.4S, V20.4S
|
|
||||||
WORD 0xa486c0ce //SM4E V4.4S, V21.4S
|
|
||||||
WORD 0xc486c0ce //SM4E V4.4S, V22.4S
|
|
||||||
WORD 0xe486c0ce //SM4E V4.4S, V23.4S
|
|
||||||
WORD 0x0487c0ce //SM4E V4.4S, V24.4S
|
|
||||||
WORD 0x2487c0ce //SM4E V4.4S, V25.4S
|
|
||||||
WORD 0x4487c0ce //SM4E V4.4S, V26.4S
|
|
||||||
WORD 0x6586c0ce //SM4E V5.4S, V19.4S
|
|
||||||
WORD 0x8586c0ce //SM4E V5.4S, V20.4S
|
|
||||||
WORD 0xa586c0ce //SM4E V5.4S, V21.4S
|
|
||||||
WORD 0xc586c0ce //SM4E V5.4S, V22.4S
|
|
||||||
WORD 0xe586c0ce //SM4E V5.4S, V23.4S
|
|
||||||
WORD 0x0587c0ce //SM4E V5.4S, V24.4S
|
|
||||||
WORD 0x2587c0ce //SM4E V5.4S, V25.4S
|
|
||||||
WORD 0x4587c0ce //SM4E V5.4S, V26.4S
|
|
||||||
WORD 0x6686c0ce //SM4E V6.4S, V19.4S
|
|
||||||
WORD 0x8686c0ce //SM4E V6.4S, V20.4S
|
|
||||||
WORD 0xa686c0ce //SM4E V6.4S, V21.4S
|
|
||||||
WORD 0xc686c0ce //SM4E V6.4S, V22.4S
|
|
||||||
WORD 0xe686c0ce //SM4E V6.4S, V23.4S
|
|
||||||
WORD 0x0687c0ce //SM4E V6.4S, V24.4S
|
|
||||||
WORD 0x2687c0ce //SM4E V6.4S, V25.4S
|
|
||||||
WORD 0x4687c0ce //SM4E V6.4S, V26.4S
|
|
||||||
WORD 0x6786c0ce //SM4E V7.4S, V19.4S
|
|
||||||
WORD 0x8786c0ce //SM4E V7.4S, V20.4S
|
|
||||||
WORD 0xa786c0ce //SM4E V7.4S, V21.4S
|
|
||||||
WORD 0xc786c0ce //SM4E V7.4S, V22.4S
|
|
||||||
WORD 0xe786c0ce //SM4E V7.4S, V23.4S
|
|
||||||
WORD 0x0787c0ce //SM4E V7.4S, V24.4S
|
|
||||||
WORD 0x2787c0ce //SM4E V7.4S, V25.4S
|
|
||||||
WORD 0x4787c0ce //SM4E V7.4S, V26.4S
|
|
||||||
VREV32 B0.B16, T1.B16
|
VREV32 B0.B16, T1.B16
|
||||||
VREV32 B1.B16, T2.B16
|
VREV32 B1.B16, T2.B16
|
||||||
VREV32 B2.B16, B2.B16
|
VREV32 B2.B16, B2.B16
|
||||||
@ -516,21 +443,15 @@ singlesLoop:
|
|||||||
BLT tail
|
BLT tail
|
||||||
SUB $16, srcPtrLen
|
SUB $16, srcPtrLen
|
||||||
|
|
||||||
|
VLD1.P 16(srcPtr), [T0.B16]
|
||||||
|
VREV64 T0.B16, B5.B16
|
||||||
|
|
||||||
VMOV CTR.B16, B0.B16
|
VMOV CTR.B16, B0.B16
|
||||||
VADD CTR.S4, INC.S4, CTR.S4
|
VADD CTR.S4, INC.S4, CTR.S4
|
||||||
// SM4 8 rounds
|
sm4eEnc1block()
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
VREV32 B0.B16, B0.B16
|
VREV32 B0.B16, B0.B16
|
||||||
|
|
||||||
singlesLast:
|
singlesLast:
|
||||||
VLD1.P 16(srcPtr), [T0.B16]
|
|
||||||
VEOR T0.B16, B0.B16, B0.B16
|
VEOR T0.B16, B0.B16, B0.B16
|
||||||
VST1.P [B0.B16], 16(dstPtr)
|
VST1.P [B0.B16], 16(dstPtr)
|
||||||
|
|
||||||
@ -547,18 +468,11 @@ tail:
|
|||||||
CBZ srcPtrLen, done
|
CBZ srcPtrLen, done
|
||||||
VMOV CTR.B16, B0.B16
|
VMOV CTR.B16, B0.B16
|
||||||
VADD CTR.S4, INC.S4, CTR.S4
|
VADD CTR.S4, INC.S4, CTR.S4
|
||||||
// SM4 8 rounds
|
sm4eEnc1block()
|
||||||
WORD 0x6086c0ce //SM4E V0.4S, V19.4S
|
|
||||||
WORD 0x8086c0ce //SM4E V0.4S, V20.4S
|
|
||||||
WORD 0xa086c0ce //SM4E V0.4S, V21.4S
|
|
||||||
WORD 0xc086c0ce //SM4E V0.4S, V22.4S
|
|
||||||
WORD 0xe086c0ce //SM4E V0.4S, V23.4S
|
|
||||||
WORD 0x0087c0ce //SM4E V0.4S, V24.4S
|
|
||||||
WORD 0x2087c0ce //SM4E V0.4S, V25.4S
|
|
||||||
WORD 0x4087c0ce //SM4E V0.4S, V26.4S
|
|
||||||
VREV32 B0.B16, B0.B16
|
VREV32 B0.B16, B0.B16
|
||||||
tailLast:
|
tailLast:
|
||||||
// Assuming it is safe to load past dstPtr due to the presence of the tag
|
// Assuming it is safe to load past dstPtr due to the presence of the tag
|
||||||
|
// B5 stored last ciphertext
|
||||||
VLD1 (srcPtr), [B5.B16]
|
VLD1 (srcPtr), [B5.B16]
|
||||||
|
|
||||||
VEOR B5.B16, B0.B16, B0.B16
|
VEOR B5.B16, B0.B16, B0.B16
|
||||||
@ -567,27 +481,27 @@ tailLast:
|
|||||||
MOVD $0, H1
|
MOVD $0, H1
|
||||||
SUB $1, H1
|
SUB $1, H1
|
||||||
|
|
||||||
TBZ $3, srcPtrLen, ld4
|
TBZ $3, srcPtrLen, ld4 // Test if srcPtrLen < 8, if yes, goto ld4
|
||||||
VMOV B0.D[0], H0
|
VMOV B0.D[0], H0
|
||||||
MOVD.P H0, 8(dstPtr)
|
MOVD.P H0, 8(dstPtr)
|
||||||
VMOV H1, T3.D[0]
|
VMOV H1, T3.D[0]
|
||||||
VEXT $8, ZERO.B16, B0.B16, B0.B16
|
VEXT $8, ZERO.B16, B0.B16, B0.B16
|
||||||
ld4:
|
ld4:
|
||||||
TBZ $2, srcPtrLen, ld2
|
TBZ $2, srcPtrLen, ld2 // Test if srcPtrLen < 4, if yes, goto ld2
|
||||||
VMOV B0.S[0], H0
|
VMOV B0.S[0], H0
|
||||||
MOVW.P H0, 4(dstPtr)
|
MOVW.P H0, 4(dstPtr)
|
||||||
VEXT $12, T3.B16, ZERO.B16, T3.B16
|
VEXT $12, T3.B16, ZERO.B16, T3.B16
|
||||||
VMOV H1, T3.S[0]
|
VMOV H1, T3.S[0]
|
||||||
VEXT $4, ZERO.B16, B0.B16, B0.B16
|
VEXT $4, ZERO.B16, B0.B16, B0.B16
|
||||||
ld2:
|
ld2:
|
||||||
TBZ $1, srcPtrLen, ld1
|
TBZ $1, srcPtrLen, ld1 // Test if srcPtrLen < 2, if yes, goto ld1
|
||||||
VMOV B0.H[0], H0
|
VMOV B0.H[0], H0
|
||||||
MOVH.P H0, 2(dstPtr)
|
MOVH.P H0, 2(dstPtr)
|
||||||
VEXT $14, T3.B16, ZERO.B16, T3.B16
|
VEXT $14, T3.B16, ZERO.B16, T3.B16
|
||||||
VMOV H1, T3.H[0]
|
VMOV H1, T3.H[0]
|
||||||
VEXT $2, ZERO.B16, B0.B16, B0.B16
|
VEXT $2, ZERO.B16, B0.B16, B0.B16
|
||||||
ld1:
|
ld1:
|
||||||
TBZ $0, srcPtrLen, ld0
|
TBZ $0, srcPtrLen, ld0 // Test if srcPtrLen < 1, if yes, goto ld0
|
||||||
VMOV B0.B[0], H0
|
VMOV B0.B[0], H0
|
||||||
MOVB.P H0, 1(dstPtr)
|
MOVB.P H0, 1(dstPtr)
|
||||||
VEXT $15, T3.B16, ZERO.B16, T3.B16
|
VEXT $15, T3.B16, ZERO.B16, T3.B16
|
||||||
|
Loading…
x
Reference in New Issue
Block a user