mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
sm4: xts asm arm64 test one func first
This commit is contained in:
parent
4fc2acf95c
commit
3a304ee8dd
533
sm4/xts_arm64.s
533
sm4/xts_arm64.s
@ -372,545 +372,12 @@ xtsSm4EncDone:
|
|||||||
|
|
||||||
// func encryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
// func encryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
||||||
TEXT ·encryptSm4XtsGB(SB),0,$128-64
|
TEXT ·encryptSm4XtsGB(SB),0,$128-64
|
||||||
LOAD_SM4_AESNI_CONSTS()
|
|
||||||
MOVD xk+0(FP), rk
|
|
||||||
MOVD tweak+8(FP), twPtr
|
|
||||||
MOVD dst+16(FP), dstPtr
|
|
||||||
MOVD src+40(FP), srcPtr
|
|
||||||
MOVD src_len+48(FP), srcPtrLen
|
|
||||||
|
|
||||||
VEOR POLY.B16, POLY.B16, POLY.B16
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
|
||||||
|
|
||||||
MOVD $0xE1, I
|
|
||||||
LSL $56, I
|
|
||||||
VMOV I, POLY.D[1]
|
|
||||||
|
|
||||||
MOVD rk, rkSave
|
|
||||||
VLD1 (twPtr), [TW.B16]
|
|
||||||
|
|
||||||
xtsSm4EncOctets:
|
|
||||||
CMP $128, srcPtrLen
|
|
||||||
BLT xtsSm4EncNibbles
|
|
||||||
SUB $128, srcPtrLen
|
|
||||||
|
|
||||||
prepareGB8Tweaks
|
|
||||||
load8blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
encOctetsEnc8Blocks:
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B0, B1, B2, B3, B4, B5, B6, B7)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B1, B2, B3, B0, B5, B6, B7, B4)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B2, B3, B0, B1, B6, B7, B4, B5)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B3, B0, B1, B2, B7, B4, B5, B6)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE encOctetsEnc8Blocks
|
|
||||||
|
|
||||||
store8blocks
|
|
||||||
B xtsSm4EncOctets
|
|
||||||
|
|
||||||
xtsSm4EncNibbles:
|
|
||||||
CMP $64, srcPtrLen
|
|
||||||
BLT xtsSm4EncSingles
|
|
||||||
SUB $64, srcPtrLen
|
|
||||||
|
|
||||||
prepareGB4Tweaks
|
|
||||||
load4blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
encNibblesEnc4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE encNibblesEnc4Blocks
|
|
||||||
|
|
||||||
store4blocks
|
|
||||||
|
|
||||||
xtsSm4EncSingles:
|
|
||||||
CMP $16, srcPtrLen
|
|
||||||
BLT xtsSm4EncTail
|
|
||||||
SUB $16, srcPtrLen
|
|
||||||
|
|
||||||
loadOneBlock
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
encSinglesEnc4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE encSinglesEnc4Blocks
|
|
||||||
|
|
||||||
storeOneBlock
|
|
||||||
mul2GBInline
|
|
||||||
B xtsSm4EncSingles
|
|
||||||
|
|
||||||
xtsSm4EncTail:
|
|
||||||
CBZ srcPtrLen, xtsSm4EncDone
|
|
||||||
SUB $16, dstPtr, R7
|
|
||||||
MOVD R7, R9
|
|
||||||
MOVD RSP, R8
|
|
||||||
VLD1 (R7), [B0.B16]
|
|
||||||
VST1 [B0.B16], (R8)
|
|
||||||
|
|
||||||
TBZ $3, srcPtrLen, less_than8
|
|
||||||
MOVD.P 8(srcPtr), R11
|
|
||||||
MOVD.P R11, 8(R8)
|
|
||||||
MOVD.P 8(R7), R12
|
|
||||||
MOVD.P R12, 8(dstPtr)
|
|
||||||
|
|
||||||
less_than8:
|
|
||||||
TBZ $2, srcPtrLen, less_than4
|
|
||||||
MOVWU.P 4(srcPtr), R11
|
|
||||||
MOVWU.P R11, 4(R8)
|
|
||||||
MOVWU.P 4(R7), R12
|
|
||||||
MOVWU.P R12, 4(dstPtr)
|
|
||||||
|
|
||||||
less_than4:
|
|
||||||
TBZ $1, srcPtrLen, less_than2
|
|
||||||
MOVHU.P 2(srcPtr), R11
|
|
||||||
MOVHU.P R11, 2(R8)
|
|
||||||
MOVHU.P 2(R7), R12
|
|
||||||
MOVHU.P R12, 2(dstPtr)
|
|
||||||
|
|
||||||
less_than2:
|
|
||||||
TBZ $0, srcPtrLen, xtsSm4EncTailEnc
|
|
||||||
MOVBU (srcPtr), R11
|
|
||||||
MOVBU R11, (R8)
|
|
||||||
MOVBU (R7), R12
|
|
||||||
MOVBU R12, (dstPtr)
|
|
||||||
|
|
||||||
xtsSm4EncTailEnc:
|
|
||||||
VLD1 (RSP), [B0.B16]
|
|
||||||
VEOR TW.B16, B0.B16, B0.B16
|
|
||||||
VREV32 B0.B16, B0.B16
|
|
||||||
VMOV B0.S[1], B1.S[0]
|
|
||||||
VMOV B0.S[2], B2.S[0]
|
|
||||||
VMOV B0.S[3], B3.S[0]
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
tailEncLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE tailEncLoop
|
|
||||||
|
|
||||||
VMOV B2.S[0], B3.S[1]
|
|
||||||
VMOV B1.S[0], B3.S[2]
|
|
||||||
VMOV B0.S[0], B3.S[3]
|
|
||||||
VREV32 B3.B16, B3.B16
|
|
||||||
|
|
||||||
VEOR TW.B16, B3.B16, B3.B16
|
|
||||||
VST1 [B3.B16], (R9)
|
|
||||||
|
|
||||||
xtsSm4EncDone:
|
|
||||||
VST1 [TW.B16], (twPtr)
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func decryptSm4Xts(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
// func decryptSm4Xts(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
||||||
TEXT ·decryptSm4Xts(SB),0,$128-64
|
TEXT ·decryptSm4Xts(SB),0,$128-64
|
||||||
LOAD_SM4_AESNI_CONSTS()
|
|
||||||
MOVD xk+0(FP), rk
|
|
||||||
MOVD tweak+8(FP), twPtr
|
|
||||||
MOVD dst+16(FP), dstPtr
|
|
||||||
MOVD src+40(FP), srcPtr
|
|
||||||
MOVD src_len+48(FP), srcPtrLen
|
|
||||||
|
|
||||||
VEOR POLY.B16, POLY.B16, POLY.B16
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
|
||||||
|
|
||||||
MOVD $0x87, I
|
|
||||||
VMOV I, POLY.D[0]
|
|
||||||
|
|
||||||
MOVD rk, rkSave
|
|
||||||
VLD1 (twPtr), [TW.B16]
|
|
||||||
|
|
||||||
xtsSm4DecOctets:
|
|
||||||
CMP $128, srcPtrLen
|
|
||||||
BLT xtsSm4DecNibbles
|
|
||||||
SUB $128, srcPtrLen
|
|
||||||
|
|
||||||
prepare8Tweaks
|
|
||||||
load8blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decOctetsDec8Blocks:
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B0, B1, B2, B3, B4, B5, B6, B7)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B1, B2, B3, B0, B5, B6, B7, B4)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B2, B3, B0, B1, B6, B7, B4, B5)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B3, B0, B1, B2, B7, B4, B5, B6)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decOctetsDec8Blocks
|
|
||||||
|
|
||||||
store8blocks
|
|
||||||
B xtsSm4DecOctets
|
|
||||||
|
|
||||||
xtsSm4DecNibbles:
|
|
||||||
CMP $64, srcPtrLen
|
|
||||||
BLT xtsSm4DecSingles
|
|
||||||
SUB $64, srcPtrLen
|
|
||||||
|
|
||||||
prepare4Tweaks
|
|
||||||
load4blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decNibblesDec4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decNibblesDec4Blocks
|
|
||||||
|
|
||||||
store4blocks
|
|
||||||
|
|
||||||
xtsSm4DecSingles:
|
|
||||||
CMP $32, srcPtrLen
|
|
||||||
BLT xtsSm4DecSingles
|
|
||||||
SUB $16, srcPtrLen
|
|
||||||
|
|
||||||
loadOneBlock
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decSinglesDec4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decSinglesDec4Blocks
|
|
||||||
|
|
||||||
storeOneBlock
|
|
||||||
mul2Inline
|
|
||||||
|
|
||||||
B xtsSm4DecSingles
|
|
||||||
|
|
||||||
xtsSm4DecTail:
|
|
||||||
CBZ srcPtrLen, xtsSm4DecDone
|
|
||||||
|
|
||||||
CMP $16, srcPtrLen
|
|
||||||
BEQ xtsSm4DecLastBlock
|
|
||||||
|
|
||||||
VMOV TW.B16, B4.B16
|
|
||||||
mul2Inline
|
|
||||||
loadOneBlock
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decLastCompleteBlockLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decLastCompleteBlockLoop
|
|
||||||
storeOneBlock
|
|
||||||
VMOV B4.B16, TW.B16
|
|
||||||
VST1 [B3.B16], (RSP)
|
|
||||||
|
|
||||||
SUB $16, srcPtrLen
|
|
||||||
SUB $16, dstPtr, R7
|
|
||||||
MOVD R7, R9
|
|
||||||
MOVD RSP, R8
|
|
||||||
|
|
||||||
TBZ $3, srcPtrLen, less_than8
|
|
||||||
MOVD.P 8(srcPtr), R11
|
|
||||||
MOVD.P R11, 8(R8)
|
|
||||||
MOVD.P 8(R7), R12
|
|
||||||
MOVD.P R12, 8(dstPtr)
|
|
||||||
|
|
||||||
less_than8:
|
|
||||||
TBZ $2, srcPtrLen, less_than4
|
|
||||||
MOVWU.P 4(srcPtr), R11
|
|
||||||
MOVWU.P R11, 4(R8)
|
|
||||||
MOVWU.P 4(R7), R12
|
|
||||||
MOVWU.P R12, 4(dstPtr)
|
|
||||||
|
|
||||||
less_than4:
|
|
||||||
TBZ $1, srcPtrLen, less_than2
|
|
||||||
MOVHU.P 2(srcPtr), R11
|
|
||||||
MOVHU.P R11, 2(R8)
|
|
||||||
MOVHU.P 2(R7), R12
|
|
||||||
MOVHU.P R12, 2(dstPtr)
|
|
||||||
|
|
||||||
less_than2:
|
|
||||||
TBZ $0, srcPtrLen, xtsSm4DecTailEnc
|
|
||||||
MOVBU (srcPtr), R11
|
|
||||||
MOVBU R11, (R8)
|
|
||||||
MOVBU (R7), R12
|
|
||||||
MOVBU R12, (dstPtr)
|
|
||||||
|
|
||||||
xtsSm4DecTailEnc:
|
|
||||||
VLD1 (RSP), [B0.B16]
|
|
||||||
VEOR TW.B16, B0.B16, B0.B16
|
|
||||||
VREV32 B0.B16, B0.B16
|
|
||||||
VMOV B0.S[1], B1.S[0]
|
|
||||||
VMOV B0.S[2], B2.S[0]
|
|
||||||
VMOV B0.S[3], B3.S[0]
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
tailDecLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE tailDecLoop
|
|
||||||
|
|
||||||
VMOV B2.S[0], B3.S[1]
|
|
||||||
VMOV B1.S[0], B3.S[2]
|
|
||||||
VMOV B0.S[0], B3.S[3]
|
|
||||||
VREV32 B3.B16, B3.B16
|
|
||||||
|
|
||||||
VEOR TW.B16, B3.B16, B3.B16
|
|
||||||
VST1 [B3.B16], (R9)
|
|
||||||
|
|
||||||
B xtsSm4DecDone
|
|
||||||
|
|
||||||
xtsSm4DecLastBlock:
|
|
||||||
loadOneBlock
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decLastBlockLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decLastBlockLoop
|
|
||||||
|
|
||||||
storeOneBlock
|
|
||||||
mul2Inline
|
|
||||||
|
|
||||||
xtsSm4DecDone:
|
|
||||||
VST1 [TW.B16], (twPtr)
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func decryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
// func decryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
|
||||||
TEXT ·decryptSm4XtsGB(SB),0,$128-64
|
TEXT ·decryptSm4XtsGB(SB),0,$128-64
|
||||||
LOAD_SM4_AESNI_CONSTS()
|
|
||||||
MOVD xk+0(FP), rk
|
|
||||||
MOVD tweak+8(FP), twPtr
|
|
||||||
MOVD dst+16(FP), dstPtr
|
|
||||||
MOVD src+40(FP), srcPtr
|
|
||||||
MOVD src_len+48(FP), srcPtrLen
|
|
||||||
|
|
||||||
VEOR POLY.B16, POLY.B16, POLY.B16
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
|
||||||
|
|
||||||
MOVD $0xE1, I
|
|
||||||
LSL $56, I
|
|
||||||
VMOV I, POLY.D[1]
|
|
||||||
|
|
||||||
MOVD rk, rkSave
|
|
||||||
VLD1 (twPtr), [TW.B16]
|
|
||||||
|
|
||||||
xtsSm4DecOctets:
|
|
||||||
CMP $128, srcPtrLen
|
|
||||||
BLT xtsSm4DecNibbles
|
|
||||||
SUB $128, srcPtrLen
|
|
||||||
|
|
||||||
prepareGB8Tweaks
|
|
||||||
load8blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decOctetsDec8Blocks:
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B0, B1, B2, B3, B4, B5, B6, B7)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B1, B2, B3, B0, B5, B6, B7, B4)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B2, B3, B0, B1, B6, B7, B4, B5)
|
|
||||||
SM4_8BLOCKS_ROUND(rk, R19, K0, K1, K2, K3, B3, B0, B1, B2, B7, B4, B5, B6)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decOctetsDec8Blocks
|
|
||||||
|
|
||||||
store8blocks
|
|
||||||
B xtsSm4DecOctets
|
|
||||||
|
|
||||||
xtsSm4DecNibbles:
|
|
||||||
CMP $64, srcPtrLen
|
|
||||||
BLT xtsSm4DecSingles
|
|
||||||
SUB $64, srcPtrLen
|
|
||||||
|
|
||||||
prepareGB4Tweaks
|
|
||||||
load4blocks
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decNibblesDec4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decNibblesDec4Blocks
|
|
||||||
|
|
||||||
store4blocks
|
|
||||||
|
|
||||||
xtsSm4DecSingles:
|
|
||||||
CMP $32, srcPtrLen
|
|
||||||
BLT xtsSm4DecSingles
|
|
||||||
SUB $16, srcPtrLen
|
|
||||||
|
|
||||||
loadOneBlock
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decSinglesDec4Blocks:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decSinglesDec4Blocks
|
|
||||||
|
|
||||||
storeOneBlock
|
|
||||||
mul2GBInline
|
|
||||||
|
|
||||||
B xtsSm4DecSingles
|
|
||||||
|
|
||||||
xtsSm4DecTail:
|
|
||||||
CBZ srcPtrLen, xtsSm4DecDone
|
|
||||||
|
|
||||||
CMP $16, srcPtrLen
|
|
||||||
BEQ xtsSm4DecLastBlock
|
|
||||||
|
|
||||||
VMOV TW.B16, B4.B16
|
|
||||||
mul2GBInline
|
|
||||||
loadOneBlock
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decLastCompleteBlockLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decLastCompleteBlockLoop
|
|
||||||
storeOneBlock
|
|
||||||
VMOV B4.B16, TW.B16
|
|
||||||
VST1 [B3.B16], (RSP)
|
|
||||||
|
|
||||||
SUB $16, srcPtrLen
|
|
||||||
SUB $16, dstPtr, R7
|
|
||||||
MOVD R7, R9
|
|
||||||
MOVD RSP, R8
|
|
||||||
|
|
||||||
TBZ $3, srcPtrLen, less_than8
|
|
||||||
MOVD.P 8(srcPtr), R11
|
|
||||||
MOVD.P R11, 8(R8)
|
|
||||||
MOVD.P 8(R7), R12
|
|
||||||
MOVD.P R12, 8(dstPtr)
|
|
||||||
|
|
||||||
less_than8:
|
|
||||||
TBZ $2, srcPtrLen, less_than4
|
|
||||||
MOVWU.P 4(srcPtr), R11
|
|
||||||
MOVWU.P R11, 4(R8)
|
|
||||||
MOVWU.P 4(R7), R12
|
|
||||||
MOVWU.P R12, 4(dstPtr)
|
|
||||||
|
|
||||||
less_than4:
|
|
||||||
TBZ $1, srcPtrLen, less_than2
|
|
||||||
MOVHU.P 2(srcPtr), R11
|
|
||||||
MOVHU.P R11, 2(R8)
|
|
||||||
MOVHU.P 2(R7), R12
|
|
||||||
MOVHU.P R12, 2(dstPtr)
|
|
||||||
|
|
||||||
less_than2:
|
|
||||||
TBZ $0, srcPtrLen, xtsSm4DecTailEnc
|
|
||||||
MOVBU (srcPtr), R11
|
|
||||||
MOVBU R11, (R8)
|
|
||||||
MOVBU (R7), R12
|
|
||||||
MOVBU R12, (dstPtr)
|
|
||||||
|
|
||||||
xtsSm4DecTailEnc:
|
|
||||||
VLD1 (RSP), [B0.B16]
|
|
||||||
VEOR TW.B16, B0.B16, B0.B16
|
|
||||||
VREV32 B0.B16, B0.B16
|
|
||||||
VMOV B0.S[1], B1.S[0]
|
|
||||||
VMOV B0.S[2], B2.S[0]
|
|
||||||
VMOV B0.S[3], B3.S[0]
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
tailDecLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE tailDecLoop
|
|
||||||
|
|
||||||
VMOV B2.S[0], B3.S[1]
|
|
||||||
VMOV B1.S[0], B3.S[2]
|
|
||||||
VMOV B0.S[0], B3.S[3]
|
|
||||||
VREV32 B3.B16, B3.B16
|
|
||||||
|
|
||||||
VEOR TW.B16, B3.B16, B3.B16
|
|
||||||
VST1 [B3.B16], (R9)
|
|
||||||
|
|
||||||
B xtsSm4DecDone
|
|
||||||
|
|
||||||
xtsSm4DecLastBlock:
|
|
||||||
loadOneBlock
|
|
||||||
|
|
||||||
MOVD rkSave, rk
|
|
||||||
EOR R13, R13
|
|
||||||
|
|
||||||
decLastBlockLoop:
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B0, B1, B2, B3)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B1, B2, B3, B0)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B2, B3, B0, B1)
|
|
||||||
SM4_ROUND(rk, R19, K0, K1, K2, B3, B0, B1, B2)
|
|
||||||
ADD $1, R13
|
|
||||||
CMP $8, R13
|
|
||||||
BNE decLastBlockLoop
|
|
||||||
|
|
||||||
storeOneBlock
|
|
||||||
mul2GBInline
|
|
||||||
|
|
||||||
xtsSm4DecDone:
|
|
||||||
VST1 [TW.B16], (twPtr)
|
|
||||||
RET
|
RET
|
||||||
|
Loading…
x
Reference in New Issue
Block a user