mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-27 04:36:19 +08:00
sm3: kdf arm64 optimization
This commit is contained in:
parent
bda03f4f43
commit
8e050f1064
@ -8,12 +8,12 @@
|
|||||||
#include "sm3_const_asm.s"
|
#include "sm3_const_asm.s"
|
||||||
|
|
||||||
#define a V0
|
#define a V0
|
||||||
#define b V1
|
#define e V1
|
||||||
#define c V2
|
#define b V2
|
||||||
#define d V3
|
#define f V3
|
||||||
#define e V4
|
#define c V4
|
||||||
#define f V5
|
#define g V5
|
||||||
#define g V6
|
#define d V6
|
||||||
#define h V7
|
#define h V7
|
||||||
|
|
||||||
#define tmp1 V8
|
#define tmp1 V8
|
||||||
@ -21,6 +21,15 @@
|
|||||||
#define tmp3 V10
|
#define tmp3 V10
|
||||||
#define tmp4 V11
|
#define tmp4 V11
|
||||||
|
|
||||||
|
#define aSave V24
|
||||||
|
#define bSave V25
|
||||||
|
#define cSave V26
|
||||||
|
#define dSave V27
|
||||||
|
#define eSave V28
|
||||||
|
#define fSave V29
|
||||||
|
#define gSave V30
|
||||||
|
#define hSave V31
|
||||||
|
|
||||||
// input: from high to low
|
// input: from high to low
|
||||||
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
|
// t0 = t0.S3, t0.S2, t0.S1, t0.S0
|
||||||
// t1 = t1.S3, t1.S2, t1.S1, t1.S0
|
// t1 = t1.S3, t1.S2, t1.S1, t1.S0
|
||||||
@ -157,7 +166,6 @@
|
|||||||
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||||
#define digPtr R0
|
#define digPtr R0
|
||||||
#define srcPtrPtr R1
|
#define srcPtrPtr R1
|
||||||
#define statePtr R2
|
|
||||||
#define blockCount R3
|
#define blockCount R3
|
||||||
#define digSave R4
|
#define digSave R4
|
||||||
#define wordStart R5
|
#define wordStart R5
|
||||||
@ -168,39 +176,40 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
|||||||
#define wordPtr R10
|
#define wordPtr R10
|
||||||
MOVD dig+0(FP), digPtr
|
MOVD dig+0(FP), digPtr
|
||||||
MOVD p+8(FP), srcPtrPtr
|
MOVD p+8(FP), srcPtrPtr
|
||||||
MOVD buffer+16(FP), statePtr
|
MOVD buffer+16(FP), wordStart
|
||||||
MOVD blocks+24(FP), blockCount
|
MOVD blocks+24(FP), blockCount
|
||||||
|
|
||||||
// load state
|
// load state
|
||||||
MOVD digPtr, digSave
|
MOVD digPtr, digSave
|
||||||
MOVD.P 8(digPtr), R20
|
MOVD.P 8(digPtr), R20
|
||||||
VLD1.P 16(R20), [a.S4]
|
VLD1 (R20), [a.S4, e.S4]
|
||||||
VLD1 (R20), [e.S4]
|
|
||||||
MOVD.P 8(digPtr), R20
|
MOVD.P 8(digPtr), R20
|
||||||
VLD1.P 16(R20), [b.S4]
|
VLD1 (R20), [b.S4, f.S4]
|
||||||
VLD1 (R20), [f.S4]
|
|
||||||
MOVD.P 8(digPtr), R20
|
MOVD.P 8(digPtr), R20
|
||||||
VLD1.P 16(R20), [c.S4]
|
VLD1 (R20), [c.S4, g.S4]
|
||||||
VLD1 (R20), [g.S4]
|
|
||||||
MOVD (digPtr), R20
|
MOVD (digPtr), R20
|
||||||
VLD1.P 16(R20), [d.S4]
|
VLD1 (R20), [d.S4, h.S4]
|
||||||
VLD1 (R20), [h.S4]
|
|
||||||
|
|
||||||
// transpose state
|
// transpose state
|
||||||
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2, tmp3, tmp4)
|
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2, tmp3, tmp4)
|
||||||
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
||||||
|
|
||||||
// store state to temporary buffer
|
|
||||||
MOVD statePtr, wordStart
|
|
||||||
VST1.P [a.S4, b.S4, c.S4, d.S4], 64(wordStart)
|
|
||||||
VST1.P [e.S4, f.S4, g.S4, h.S4], 64(wordStart)
|
|
||||||
|
|
||||||
MOVD.P 8(srcPtrPtr), srcPtr1
|
MOVD.P 8(srcPtrPtr), srcPtr1
|
||||||
MOVD.P 8(srcPtrPtr), srcPtr2
|
MOVD.P 8(srcPtrPtr), srcPtr2
|
||||||
MOVD.P 8(srcPtrPtr), srcPtr3
|
MOVD.P 8(srcPtrPtr), srcPtr3
|
||||||
MOVD (srcPtrPtr), srcPtr4
|
MOVD (srcPtrPtr), srcPtr4
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
|
// save state
|
||||||
|
VMOV a.B16, aSave.B16
|
||||||
|
VMOV b.B16, bSave.B16
|
||||||
|
VMOV c.B16, cSave.B16
|
||||||
|
VMOV d.B16, dSave.B16
|
||||||
|
VMOV e.B16, eSave.B16
|
||||||
|
VMOV f.B16, fSave.B16
|
||||||
|
VMOV g.B16, gSave.B16
|
||||||
|
VMOV h.B16, hSave.B16
|
||||||
|
|
||||||
// reset wordPtr
|
// reset wordPtr
|
||||||
MOVD wordStart, wordPtr
|
MOVD wordStart, wordPtr
|
||||||
|
|
||||||
@ -277,20 +286,14 @@ loop:
|
|||||||
ROUND_16_63(62, T30, c, d, e, f, g, h, a, b)
|
ROUND_16_63(62, T30, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(63, T31, b, c, d, e, f, g, h, a)
|
ROUND_16_63(63, T31, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
MOVD statePtr, R20
|
VEOR a.B16, aSave.B16, a.B16
|
||||||
VLD1.P 64(R20), [V8.S4, V9.S4, V10.S4, V11.S4]
|
VEOR b.B16, bSave.B16, b.B16
|
||||||
VLD1 (R20), [V12.S4, V13.S4, V14.S4, V15.S4]
|
VEOR c.B16, cSave.B16, c.B16
|
||||||
VEOR a.B16, V8.B16, a.B16
|
VEOR d.B16, dSave.B16, d.B16
|
||||||
VEOR b.B16, V9.B16, b.B16
|
VEOR e.B16, eSave.B16, e.B16
|
||||||
VEOR c.B16, V10.B16, c.B16
|
VEOR f.B16, fSave.B16, f.B16
|
||||||
VEOR d.B16, V11.B16, d.B16
|
VEOR g.B16, gSave.B16, g.B16
|
||||||
VEOR e.B16, V12.B16, e.B16
|
VEOR h.B16, hSave.B16, h.B16
|
||||||
VEOR f.B16, V13.B16, f.B16
|
|
||||||
VEOR g.B16, V14.B16, g.B16
|
|
||||||
VEOR h.B16, V15.B16, h.B16
|
|
||||||
MOVD statePtr, R20
|
|
||||||
VST1.P [a.S4, b.S4, c.S4, d.S4], 64(R20)
|
|
||||||
VST1 [e.S4, f.S4, g.S4, h.S4], (R20)
|
|
||||||
|
|
||||||
SUB $1, blockCount
|
SUB $1, blockCount
|
||||||
CBNZ blockCount, loop
|
CBNZ blockCount, loop
|
||||||
@ -300,17 +303,13 @@ loop:
|
|||||||
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2, tmp3, tmp4)
|
||||||
|
|
||||||
MOVD.P 8(digSave), R20
|
MOVD.P 8(digSave), R20
|
||||||
VST1.P [a.S4], 16(R20)
|
VST1 [a.S4, e.S4], (R20)
|
||||||
VST1 [e.S4], (R20)
|
|
||||||
MOVD.P 8(digSave), R20
|
MOVD.P 8(digSave), R20
|
||||||
VST1.P [b.S4], 16(R20)
|
VST1 [b.S4, f.S4], (R20)
|
||||||
VST1 [f.S4], (R20)
|
|
||||||
MOVD.P 8(digSave), R20
|
MOVD.P 8(digSave), R20
|
||||||
VST1.P [c.S4], 16(R20)
|
VST1 [c.S4, g.S4], (R20)
|
||||||
VST1 [g.S4], (R20)
|
|
||||||
MOVD (digSave), R20
|
MOVD (digSave), R20
|
||||||
VST1.P [d.S4], 16(R20)
|
VST1 [d.S4, h.S4], (R20)
|
||||||
VST1 [h.S4], (R20)
|
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ GLOBL mask<>(SB), 8, $64
|
|||||||
#define ROUND_00_11(index, a, b, c, d, e, f, g, h) \
|
#define ROUND_00_11(index, a, b, c, d, e, f, g, h) \
|
||||||
PROLD(a, TMP0, 12) \
|
PROLD(a, TMP0, 12) \
|
||||||
VLR TMP0, TMP1 \
|
VLR TMP0, TMP1 \
|
||||||
VLREPF (index*4)(R3), TMP2 \
|
VLREPF (index*4)(kPtr), TMP2 \ // It seems that the VREPIF instruction is not supported yet.
|
||||||
VAF TMP2, TMP0, TMP0 \
|
VAF TMP2, TMP0, TMP0 \
|
||||||
VAF e, TMP0, TMP0 \
|
VAF e, TMP0, TMP0 \
|
||||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||||
@ -129,7 +129,7 @@ GLOBL mask<>(SB), 8, $64
|
|||||||
MESSAGE_SCHEDULE(index) \ // TMP1 is Wt+4 now, Pls do not use it
|
MESSAGE_SCHEDULE(index) \ // TMP1 is Wt+4 now, Pls do not use it
|
||||||
PROLD(a, TMP0, 12) \
|
PROLD(a, TMP0, 12) \
|
||||||
VLR TMP0, TMP4 \
|
VLR TMP0, TMP4 \
|
||||||
VLREPF (index*4)(R3), TMP2 \
|
VLREPF (index*4)(kPtr), TMP2 \ // It seems that the VREPIF instruction is not supported yet.
|
||||||
VAF TMP2, TMP0, TMP0 \
|
VAF TMP2, TMP0, TMP0 \
|
||||||
VAF e, TMP0, TMP0 \
|
VAF e, TMP0, TMP0 \
|
||||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||||
@ -166,7 +166,6 @@ TEXT ·copyResultsBy4(SB),NOSPLIT,$0
|
|||||||
MOVD dig+0(FP), digPtr
|
MOVD dig+0(FP), digPtr
|
||||||
MOVD dst+8(FP), dstPtr
|
MOVD dst+8(FP), dstPtr
|
||||||
|
|
||||||
// load state
|
|
||||||
VLM (digPtr), V0, V7
|
VLM (digPtr), V0, V7
|
||||||
VSTM V0, V7, (dstPtr)
|
VSTM V0, V7, (dstPtr)
|
||||||
|
|
||||||
@ -174,11 +173,13 @@ TEXT ·copyResultsBy4(SB),NOSPLIT,$0
|
|||||||
#undef digPtr
|
#undef digPtr
|
||||||
#undef dstPtr
|
#undef dstPtr
|
||||||
|
|
||||||
|
// Used general purpose registers R1-R11.
|
||||||
// blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int)
|
// blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int)
|
||||||
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
||||||
#define digPtr R11
|
#define digPtr R11
|
||||||
#define srcPtrPtr R1
|
#define srcPtrPtr R1
|
||||||
#define statePtr R2
|
#define statePtr R2
|
||||||
|
#define kPtr R3
|
||||||
#define blockCount R5
|
#define blockCount R5
|
||||||
#define srcPtr1 R6
|
#define srcPtr1 R6
|
||||||
#define srcPtr2 R7
|
#define srcPtr2 R7
|
||||||
@ -212,7 +213,7 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
|||||||
MOVD 24(srcPtrPtr), srcPtr4
|
MOVD 24(srcPtrPtr), srcPtr4
|
||||||
MOVD $0, srcPtrPtr
|
MOVD $0, srcPtrPtr
|
||||||
|
|
||||||
MOVD $·_K+0(SB), R3
|
MOVD $·_K+0(SB), kPtr
|
||||||
|
|
||||||
loop:
|
loop:
|
||||||
// save state
|
// save state
|
||||||
|
Loading…
x
Reference in New Issue
Block a user