mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
sm4: arm64 asm redefine register usage and const loading
This commit is contained in:
parent
7bb7903ce2
commit
126ee25d2a
@ -28,9 +28,9 @@ GLOBL fk_mask<>(SB), (16+8), $16
|
|||||||
VDUP R20, NIBBLE_MASK.S4 \
|
VDUP R20, NIBBLE_MASK.S4 \
|
||||||
MOVD $m1_2<>(SB), R20 \
|
MOVD $m1_2<>(SB), R20 \
|
||||||
VLD1 (R20), [M1L.B16, M1H.B16, M2L.B16, M2H.B16] \
|
VLD1 (R20), [M1L.B16, M1H.B16, M2L.B16, M2H.B16] \
|
||||||
MOVD $inverse_shift_rows<>(SB), R20 \
|
MOVD $inverse_shift_rows<>(SB), R20 \
|
||||||
VLD1 (R20), [INVERSE_SHIFT_ROWS.B16] \
|
VLD1 (R20), [INVERSE_SHIFT_ROWS.B16] \
|
||||||
MOVD $r08_mask<>(SB), R20 \
|
MOVD $r08_mask<>(SB), R20 \
|
||||||
VLD1 (R20), [R08_MASK.B16] \
|
VLD1 (R20), [R08_MASK.B16] \
|
||||||
|
|
||||||
// input: from high to low
|
// input: from high to low
|
||||||
|
@ -2,27 +2,27 @@
|
|||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
|
|
||||||
#define x V0
|
#define t0 V0
|
||||||
#define y V1
|
#define t1 V1
|
||||||
#define t0 V2
|
#define t2 V2
|
||||||
#define t1 V3
|
#define t3 V3
|
||||||
#define t2 V4
|
#define t4 V4
|
||||||
#define t3 V5
|
#define t5 V5
|
||||||
#define t4 V8
|
#define t6 V6
|
||||||
#define t5 V9
|
#define t7 V7
|
||||||
#define t6 V10
|
#define x V8
|
||||||
#define t7 V11
|
#define y V9
|
||||||
#define ZERO V16
|
#define XTMP6 V10
|
||||||
#define NIBBLE_MASK V20
|
#define XTMP7 V11
|
||||||
#define INVERSE_SHIFT_ROWS V21
|
#define M1L V20
|
||||||
#define M1L V22
|
#define M1H V21
|
||||||
#define M1H V23
|
#define M2L V22
|
||||||
#define M2L V24
|
#define M2H V23
|
||||||
#define M2H V25
|
#define R08_MASK V24
|
||||||
#define R08_MASK V26
|
#define INVERSE_SHIFT_ROWS V25
|
||||||
|
#define NIBBLE_MASK V26
|
||||||
#define FK_MASK V27
|
#define FK_MASK V27
|
||||||
#define XTMP6 V6
|
#define ZERO V28
|
||||||
#define XTMP7 V7
|
|
||||||
|
|
||||||
#include "aesni_macros_arm64.s"
|
#include "aesni_macros_arm64.s"
|
||||||
|
|
||||||
@ -48,9 +48,9 @@
|
|||||||
MOVW.P R2, 4(R10); \
|
MOVW.P R2, 4(R10); \
|
||||||
MOVW.P R2, -4(R11)
|
MOVW.P R2, -4(R11)
|
||||||
|
|
||||||
#define load_global_data_1() \
|
#define LOAD_SM4KEY_AESNI_CONSTS() \
|
||||||
MOVW $0x0F0F0F0F, R0 \
|
MOVW $0x0F0F0F0F, R0 \
|
||||||
VMOV R0, NIBBLE_MASK.S4 \
|
VDUP R0, NIBBLE_MASK.S4 \
|
||||||
MOVD $m1_2<>(SB), R0 \
|
MOVD $m1_2<>(SB), R0 \
|
||||||
VLD1 (R0), [M1L.B16, M1H.B16, M2L.B16, M2H.B16] \
|
VLD1 (R0), [M1L.B16, M1H.B16, M2L.B16, M2H.B16] \
|
||||||
MOVD $fk_mask<>(SB), R0 \
|
MOVD $fk_mask<>(SB), R0 \
|
||||||
@ -58,12 +58,6 @@
|
|||||||
MOVD $inverse_shift_rows<>(SB), R0 \
|
MOVD $inverse_shift_rows<>(SB), R0 \
|
||||||
VLD1 (R0), [INVERSE_SHIFT_ROWS.B16]
|
VLD1 (R0), [INVERSE_SHIFT_ROWS.B16]
|
||||||
|
|
||||||
|
|
||||||
#define load_global_data_2() \
|
|
||||||
load_global_data_1() \
|
|
||||||
MOVD $r08_mask<>(SB), R0 \
|
|
||||||
VLD1 (R0), [R08_MASK.B16] \
|
|
||||||
|
|
||||||
#define SM4EKEY_EXPORT_KEYS() \
|
#define SM4EKEY_EXPORT_KEYS() \
|
||||||
VMOV V9.S[3], V10.S[0] \
|
VMOV V9.S[3], V10.S[0] \
|
||||||
VMOV V9.S[2], V10.S[1] \
|
VMOV V9.S[2], V10.S[1] \
|
||||||
@ -103,7 +97,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
|||||||
CMP $1, R12
|
CMP $1, R12
|
||||||
BEQ sm4ekey
|
BEQ sm4ekey
|
||||||
|
|
||||||
load_global_data_1()
|
LOAD_SM4KEY_AESNI_CONSTS()
|
||||||
|
|
||||||
VLD1 (R8), [t0.B16]
|
VLD1 (R8), [t0.B16]
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
@ -128,9 +122,8 @@ ksLoop:
|
|||||||
RET
|
RET
|
||||||
|
|
||||||
sm4ekey:
|
sm4ekey:
|
||||||
LDP fk_mask<>(SB), (R0, R1)
|
MOVD $fk_mask<>(SB), R0
|
||||||
VMOV R0, FK_MASK.D[0]
|
VLD1 (R0), [FK_MASK.B16]
|
||||||
VMOV R1, FK_MASK.D[1]
|
|
||||||
VLD1 (R8), [V9.B16]
|
VLD1 (R8), [V9.B16]
|
||||||
VREV32 V9.B16, V9.B16
|
VREV32 V9.B16, V9.B16
|
||||||
VEOR FK_MASK.B16, V9.B16, V9.B16
|
VEOR FK_MASK.B16, V9.B16, V9.B16
|
||||||
@ -166,6 +159,8 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
|||||||
CMP $1, R11
|
CMP $1, R11
|
||||||
BEQ sm4niblocks
|
BEQ sm4niblocks
|
||||||
|
|
||||||
|
LOAD_SM4_AESNI_CONSTS()
|
||||||
|
|
||||||
CMP $128, R12
|
CMP $128, R12
|
||||||
BEQ double_enc
|
BEQ double_enc
|
||||||
|
|
||||||
@ -176,8 +171,6 @@ TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
|||||||
VREV32 t3.B16, t3.B16
|
VREV32 t3.B16, t3.B16
|
||||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||||
|
|
||||||
load_global_data_2()
|
|
||||||
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||||
EOR R0, R0
|
EOR R0, R0
|
||||||
|
|
||||||
@ -214,8 +207,6 @@ double_enc:
|
|||||||
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
PRE_TRANSPOSE_MATRIX(t0, t1, t2, t3, x, y, XTMP6, XTMP7)
|
||||||
PRE_TRANSPOSE_MATRIX(t4, t5, t6, t7, x, y, XTMP6, XTMP7)
|
PRE_TRANSPOSE_MATRIX(t4, t5, t6, t7, x, y, XTMP6, XTMP7)
|
||||||
|
|
||||||
load_global_data_2()
|
|
||||||
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||||
EOR R0, R0
|
EOR R0, R0
|
||||||
|
|
||||||
@ -271,7 +262,7 @@ TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
|||||||
VMOV t0.S[2], t2.S[0]
|
VMOV t0.S[2], t2.S[0]
|
||||||
VMOV t0.S[3], t3.S[0]
|
VMOV t0.S[3], t3.S[0]
|
||||||
|
|
||||||
load_global_data_2()
|
LOAD_SM4_AESNI_CONSTS()
|
||||||
|
|
||||||
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
VEOR ZERO.B16, ZERO.B16, ZERO.B16
|
||||||
EOR R0, R0
|
EOR R0, R0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user