mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
sm3: kdf arm64 optimization
This commit is contained in:
parent
bda03f4f43
commit
8e050f1064
@ -8,12 +8,12 @@
|
||||
#include "sm3_const_asm.s"
|
||||
|
||||
#define a V0
|
||||
#define b V1
|
||||
#define c V2
|
||||
#define d V3
|
||||
#define e V4
|
||||
#define f V5
|
||||
#define g V6
|
||||
#define e V1
|
||||
#define b V2
|
||||
#define f V3
|
||||
#define c V4
|
||||
#define g V5
|
||||
#define d V6
|
||||
#define h V7
|
||||
|
||||
#define tmp1 V8
|
||||
@ -21,6 +21,15 @@
|
||||
#define tmp3 V10
|
||||
#define tmp4 V11
|
||||
|
||||
#define aSave V24
|
||||
#define bSave V25
|
||||
#define cSave V26
|
||||
#define dSave V27
|
||||
#define eSave V28
|
||||
#define fSave V29
|
||||
#define gSave V30
|
||||
#define hSave V31
|
||||
|
||||
// input: from high to low
|
||||
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
|
||||
// t1 = t1.S3, t1.S2, t1.S1, t1.S0
|
||||
@ -157,7 +166,6 @@
|
||||
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||
#define digPtr R0
|
||||
#define srcPtrPtr R1
|
||||
#define statePtr R2
|
||||
#define blockCount R3
|
||||
#define digSave R4
|
||||
#define wordStart R5
|
||||
@ -168,39 +176,40 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||
#define wordPtr R10
|
||||
MOVD dig+0(FP), digPtr
|
||||
MOVD p+8(FP), srcPtrPtr
|
||||
MOVD buffer+16(FP), statePtr
|
||||
MOVD buffer+16(FP), wordStart
|
||||
MOVD blocks+24(FP), blockCount
|
||||
|
||||
// load state
|
||||
MOVD digPtr, digSave
|
||||
MOVD.P 8(digPtr), R20
|
||||
VLD1.P 16(R20), [a.S4]
|
||||
VLD1 (R20), [e.S4]
|
||||
VLD1 (R20), [a.S4, e.S4]
|
||||
MOVD.P 8(digPtr), R20
|
||||
VLD1.P 16(R20), [b.S4]
|
||||
VLD1 (R20), [f.S4]
|
||||
VLD1 (R20), [b.S4, f.S4]
|
||||
MOVD.P 8(digPtr), R20
|
||||
VLD1.P 16(R20), [c.S4]
|
||||
VLD1 (R20), [g.S4]
|
||||
VLD1 (R20), [c.S4, g.S4]
|
||||
MOVD (digPtr), R20
|
||||
VLD1.P 16(R20), [d.S4]
|
||||
VLD1 (R20), [h.S4]
|
||||
VLD1 (R20), [d.S4, h.S4]
|
||||
|
||||
// transpose state
|
||||
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2, tmp3, tmp4)
|
||||
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
||||
|
||||
// store state to temporary buffer
|
||||
MOVD statePtr, wordStart
|
||||
VST1.P [a.S4, b.S4, c.S4, d.S4], 64(wordStart)
|
||||
VST1.P [e.S4, f.S4, g.S4, h.S4], 64(wordStart)
|
||||
|
||||
MOVD.P 8(srcPtrPtr), srcPtr1
|
||||
MOVD.P 8(srcPtrPtr), srcPtr2
|
||||
MOVD.P 8(srcPtrPtr), srcPtr3
|
||||
MOVD (srcPtrPtr), srcPtr4
|
||||
|
||||
loop:
|
||||
// save state
|
||||
VMOV a.B16, aSave.B16
|
||||
VMOV b.B16, bSave.B16
|
||||
VMOV c.B16, cSave.B16
|
||||
VMOV d.B16, dSave.B16
|
||||
VMOV e.B16, eSave.B16
|
||||
VMOV f.B16, fSave.B16
|
||||
VMOV g.B16, gSave.B16
|
||||
VMOV h.B16, hSave.B16
|
||||
|
||||
// reset wordPtr
|
||||
MOVD wordStart, wordPtr
|
||||
|
||||
@ -277,20 +286,14 @@ loop:
|
||||
ROUND_16_63(62, T30, c, d, e, f, g, h, a, b)
|
||||
ROUND_16_63(63, T31, b, c, d, e, f, g, h, a)
|
||||
|
||||
MOVD statePtr, R20
|
||||
VLD1.P 64(R20), [V8.S4, V9.S4, V10.S4, V11.S4]
|
||||
VLD1 (R20), [V12.S4, V13.S4, V14.S4, V15.S4]
|
||||
VEOR a.B16, V8.B16, a.B16
|
||||
VEOR b.B16, V9.B16, b.B16
|
||||
VEOR c.B16, V10.B16, c.B16
|
||||
VEOR d.B16, V11.B16, d.B16
|
||||
VEOR e.B16, V12.B16, e.B16
|
||||
VEOR f.B16, V13.B16, f.B16
|
||||
VEOR g.B16, V14.B16, g.B16
|
||||
VEOR h.B16, V15.B16, h.B16
|
||||
MOVD statePtr, R20
|
||||
VST1.P [a.S4, b.S4, c.S4, d.S4], 64(R20)
|
||||
VST1 [e.S4, f.S4, g.S4, h.S4], (R20)
|
||||
VEOR a.B16, aSave.B16, a.B16
|
||||
VEOR b.B16, bSave.B16, b.B16
|
||||
VEOR c.B16, cSave.B16, c.B16
|
||||
VEOR d.B16, dSave.B16, d.B16
|
||||
VEOR e.B16, eSave.B16, e.B16
|
||||
VEOR f.B16, fSave.B16, f.B16
|
||||
VEOR g.B16, gSave.B16, g.B16
|
||||
VEOR h.B16, hSave.B16, h.B16
|
||||
|
||||
SUB $1, blockCount
|
||||
CBNZ blockCount, loop
|
||||
@ -300,17 +303,13 @@ loop:
|
||||
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
||||
|
||||
MOVD.P 8(digSave), R20
|
||||
VST1.P [a.S4], 16(R20)
|
||||
VST1 [e.S4], (R20)
|
||||
VST1 [a.S4, e.S4], (R20)
|
||||
MOVD.P 8(digSave), R20
|
||||
VST1.P [b.S4], 16(R20)
|
||||
VST1 [f.S4], (R20)
|
||||
VST1 [b.S4, f.S4], (R20)
|
||||
MOVD.P 8(digSave), R20
|
||||
VST1.P [c.S4], 16(R20)
|
||||
VST1 [g.S4], (R20)
|
||||
VST1 [c.S4, g.S4], (R20)
|
||||
MOVD (digSave), R20
|
||||
VST1.P [d.S4], 16(R20)
|
||||
VST1 [h.S4], (R20)
|
||||
VST1 [d.S4, h.S4], (R20)
|
||||
|
||||
RET
|
||||
|
||||
|
@ -74,7 +74,7 @@ GLOBL mask<>(SB), 8, $64
|
||||
#define ROUND_00_11(index, a, b, c, d, e, f, g, h) \
|
||||
PROLD(a, TMP0, 12) \
|
||||
VLR TMP0, TMP1 \
|
||||
VLREPF (index*4)(R3), TMP2 \
|
||||
VLREPF (index*4)(kPtr), TMP2 \ // It seems that the VREPIF instruction is not supported yet.
|
||||
VAF TMP2, TMP0, TMP0 \
|
||||
VAF e, TMP0, TMP0 \
|
||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||
@ -129,7 +129,7 @@ GLOBL mask<>(SB), 8, $64
|
||||
MESSAGE_SCHEDULE(index) \ // TMP1 is Wt+4 now, Pls do not use it
|
||||
PROLD(a, TMP0, 12) \
|
||||
VLR TMP0, TMP4 \
|
||||
VLREPF (index*4)(R3), TMP2 \
|
||||
VLREPF (index*4)(kPtr), TMP2 \ // It seems that the VREPIF instruction is not supported yet.
|
||||
VAF TMP2, TMP0, TMP0 \
|
||||
VAF e, TMP0, TMP0 \
|
||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||
@ -166,7 +166,6 @@ TEXT ·copyResultsBy4(SB),NOSPLIT,$0
|
||||
MOVD dig+0(FP), digPtr
|
||||
MOVD dst+8(FP), dstPtr
|
||||
|
||||
// load state
|
||||
VLM (digPtr), V0, V7
|
||||
VSTM V0, V7, (dstPtr)
|
||||
|
||||
@ -174,11 +173,13 @@ TEXT ·copyResultsBy4(SB),NOSPLIT,$0
|
||||
#undef digPtr
|
||||
#undef dstPtr
|
||||
|
||||
// Used general purpose registers R1-R11.
|
||||
// blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int)
|
||||
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||
#define digPtr R11
|
||||
#define srcPtrPtr R1
|
||||
#define statePtr R2
|
||||
#define kPtr R3
|
||||
#define blockCount R5
|
||||
#define srcPtr1 R6
|
||||
#define srcPtr2 R7
|
||||
@ -212,7 +213,7 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||
MOVD 24(srcPtrPtr), srcPtr4
|
||||
MOVD $0, srcPtrPtr
|
||||
|
||||
MOVD $·_K+0(SB), R3
|
||||
MOVD $·_K+0(SB), kPtr
|
||||
|
||||
loop:
|
||||
// save state
|
||||
|
Loading…
x
Reference in New Issue
Block a user