mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
Merge branch 'main' of https://github.com/emmansun/gmsm
This commit is contained in:
commit
4d24880310
@ -8,7 +8,16 @@
|
|||||||
#define t3 V5
|
#define t3 V5
|
||||||
#define ZERO V16
|
#define ZERO V16
|
||||||
#define FLIP_MASK V17
|
#define FLIP_MASK V17
|
||||||
|
#define NIBBLE_MASK V20
|
||||||
|
#define INVERSE_SHIFT_ROWS V30
|
||||||
|
#define M1L V22
|
||||||
|
#define M1H V23
|
||||||
|
#define M2L V24
|
||||||
|
#define M2H V25
|
||||||
|
#define R08_MASK V26
|
||||||
|
#define R16_MASK V27
|
||||||
|
#define R24_MASK V28
|
||||||
|
#define FK_MASK V29
|
||||||
#define XTMP6 V6
|
#define XTMP6 V6
|
||||||
#define XTMP7 V7
|
#define XTMP7 V7
|
||||||
|
|
||||||
@ -23,8 +32,8 @@ DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F
|
|||||||
GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16
|
GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16
|
||||||
|
|
||||||
// inverse shift rows
|
// inverse shift rows
|
||||||
DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00
|
DATA inverse_shift_rows<>+0x00(SB)/8, $0x0106050403020700
|
||||||
DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508
|
DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908
|
||||||
GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16
|
GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16
|
||||||
|
|
||||||
// Affine transform 1 (low and high hibbles)
|
// Affine transform 1 (low and high hibbles)
|
||||||
@ -64,37 +73,19 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
|
|||||||
|
|
||||||
#define SM4_SBOX(x, y) \
|
#define SM4_SBOX(x, y) \
|
||||||
; \ //############################# inner affine ############################//
|
; \ //############################# inner affine ############################//
|
||||||
LDP nibble_mask<>(SB), (R0, R1); \
|
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||||
VMOV R0, XTMP6.D[0]; \
|
VTBL XTMP7.B16, [M1L.B16], y.B16; \
|
||||||
VMOV R1, XTMP6.D[1]; \
|
|
||||||
VAND x.B16, XTMP6.B16, XTMP7.B16; \
|
|
||||||
LDP m1_low<>(SB), (R0, R1); \
|
|
||||||
VMOV R0, y.D[0]; \
|
|
||||||
VMOV R1, y.D[1]; \
|
|
||||||
VTBL XTMP7.B16, [y.B16], y.B16; \
|
|
||||||
VUSHR $4, x.D2, x.D2; \
|
VUSHR $4, x.D2, x.D2; \
|
||||||
VAND x.B16, XTMP6.B16, XTMP7.B16; \
|
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||||
LDP m1_low<>(SB), (R0, R1); \
|
VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \
|
||||||
VMOV R0, V8.D[0]; \
|
|
||||||
VMOV R1, V8.D[1]; \
|
|
||||||
VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \
|
|
||||||
VEOR y.B16, XTMP7.B16, x.B16; \
|
VEOR y.B16, XTMP7.B16, x.B16; \
|
||||||
LDP inverse_shift_rows<>(SB), (R0, R1); \
|
VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \
|
||||||
VMOV R0, V8.D[0]; \
|
|
||||||
VMOV R1, V8.D[1]; \
|
|
||||||
VTBL V8.B16, [x.B16], x.B16; \
|
|
||||||
AESE ZERO.B16, x.B16; \
|
AESE ZERO.B16, x.B16; \
|
||||||
VAND x.B16, XTMP6.B16, XTMP7.B16; \
|
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||||
LDP m2_low<>(SB), (R0, R1); \
|
VTBL XTMP7.B16, [M2L.B16], y.B16; \
|
||||||
VMOV R0, y.D[0]; \
|
|
||||||
VMOV R1, y.D[1]; \
|
|
||||||
VTBL XTMP7.B16, [y.B16], y.B16; \
|
|
||||||
VUSHR $4, x.D2, x.D2; \
|
VUSHR $4, x.D2, x.D2; \
|
||||||
VAND x.B16, XTMP6.B16, XTMP7.B16; \
|
VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \
|
||||||
LDP m2_high<>(SB), (R0, R1); \
|
VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \
|
||||||
VMOV R0, V8.D[0]; \
|
|
||||||
VMOV R1, V8.D[1]; \
|
|
||||||
VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \
|
|
||||||
VEOR y.B16, XTMP7.B16, x.B16
|
VEOR y.B16, XTMP7.B16, x.B16
|
||||||
|
|
||||||
#define SM4_TAO_L1(x, y) \
|
#define SM4_TAO_L1(x, y) \
|
||||||
@ -121,11 +112,7 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
|
|||||||
VEOR y.B16, x.B16, x.B16
|
VEOR y.B16, x.B16, x.B16
|
||||||
|
|
||||||
#define SM4_TAO_L2(x, y) \
|
#define SM4_TAO_L2(x, y) \
|
||||||
VMOV R0, XTMP6.D[0]; \
|
SM4_SBOX(x, y); \
|
||||||
VMOV R1, XTMP6.D[1]; \
|
|
||||||
VAND x.B16, XTMP6.B16, XTMP7.B16; \
|
|
||||||
VMOV R0, y.D[0]; \
|
|
||||||
VMOV R1, y.D[1]; \
|
|
||||||
; \ //#################### 4 parallel L2 linear transforms ##################//
|
; \ //#################### 4 parallel L2 linear transforms ##################//
|
||||||
VSHL $13, x.S4, XTMP6.S4; \
|
VSHL $13, x.S4, XTMP6.S4; \
|
||||||
VUSHR $19, x.S4, y.S4; \
|
VUSHR $19, x.S4, y.S4; \
|
||||||
@ -142,16 +129,42 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
|||||||
MOVD ck+8(FP), R9
|
MOVD ck+8(FP), R9
|
||||||
MOVD enc+16(FP), R10
|
MOVD enc+16(FP), R10
|
||||||
MOVD dec+24(FP), R11
|
MOVD dec+24(FP), R11
|
||||||
|
|
||||||
VLD1 (R8), [t0.B16];
|
|
||||||
LDP flip_mask<>(SB), (R0, R1)
|
LDP flip_mask<>(SB), (R0, R1)
|
||||||
VMOV R0, FLIP_MASK.D[0]
|
VMOV R0, FLIP_MASK.D[0]
|
||||||
VMOV R1, FLIP_MASK.D[1]
|
VMOV R1, FLIP_MASK.D[1]
|
||||||
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
|
|
||||||
|
LDP nibble_mask<>(SB), (R0, R1)
|
||||||
|
VMOV R0, NIBBLE_MASK.D[0]
|
||||||
|
VMOV R1, NIBBLE_MASK.D[1]
|
||||||
|
|
||||||
|
LDP m1_low<>(SB), (R0, R1)
|
||||||
|
VMOV R0, M1L.D[0]
|
||||||
|
VMOV R1, M1L.D[1]
|
||||||
|
|
||||||
|
LDP m1_high<>(SB), (R0, R1)
|
||||||
|
VMOV R0, M1H.D[0]
|
||||||
|
VMOV R1, M1H.D[1]
|
||||||
|
|
||||||
|
LDP m2_low<>(SB), (R0, R1)
|
||||||
|
VMOV R0, M2L.D[0]
|
||||||
|
VMOV R1, M2L.D[1]
|
||||||
|
|
||||||
|
LDP m2_high<>(SB), (R0, R1)
|
||||||
|
VMOV R0, M2H.D[0]
|
||||||
|
VMOV R1, M2H.D[1]
|
||||||
|
|
||||||
LDP fk_mask<>(SB), (R0, R1)
|
LDP fk_mask<>(SB), (R0, R1)
|
||||||
VMOV R0, XTMP7.D[0]
|
VMOV R0, FK_MASK.D[0]
|
||||||
VMOV R1, XTMP7.D[1]
|
VMOV R1, FK_MASK.D[1]
|
||||||
VEOR t0.B16, XTMP7.B16, t0.B16
|
|
||||||
|
LDP inverse_shift_rows<>(SB), (R0, R1)
|
||||||
|
VMOV R0, INVERSE_SHIFT_ROWS.D[0]
|
||||||
|
VMOV R1, INVERSE_SHIFT_ROWS.D[1]
|
||||||
|
|
||||||
|
VLD1 (R8), [t0.B16];
|
||||||
|
VTBL FLIP_MASK.B16, [t0.B16], t0.B16
|
||||||
|
VEOR t0.B16, FK_MASK.B16, t0.B16
|
||||||
VMOV t0.S[1], t1.S[0]
|
VMOV t0.S[1], t1.S[0]
|
||||||
VMOV t0.S[2], t2.S[0]
|
VMOV t0.S[2], t2.S[0]
|
||||||
VMOV t0.S[3], t3.S[0]
|
VMOV t0.S[3], t3.S[0]
|
||||||
@ -208,6 +221,7 @@ ksLoop:
|
|||||||
ADD $16, R0
|
ADD $16, R0
|
||||||
CMP $128, R0
|
CMP $128, R0
|
||||||
BNE ksLoop
|
BNE ksLoop
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func encryptBlocksAsm(xk *uint32, dst, src *byte)
|
// func encryptBlocksAsm(xk *uint32, dst, src *byte)
|
||||||
|
@ -4,9 +4,7 @@
|
|||||||
package sm4
|
package sm4
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"crypto/rand"
|
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -46,15 +44,16 @@ func TestExpandKey(t *testing.T) {
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
func TestExpandKeySimple(t *testing.T) {
|
func TestExpandKeySimple(t *testing.T) {
|
||||||
key := make([]byte, 16)
|
key := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}
|
||||||
|
|
||||||
encRes1 := make([]uint32, 32)
|
encRes1 := make([]uint32, 32)
|
||||||
decRes1 := make([]uint32, 32)
|
decRes1 := make([]uint32, 32)
|
||||||
encRes2 := make([]uint32, 32)
|
encRes2 := make([]uint32, 32)
|
||||||
decRes2 := make([]uint32, 32)
|
decRes2 := make([]uint32, 32)
|
||||||
io.ReadFull(rand.Reader, key)
|
|
||||||
expandKeyGo(key, encRes1, decRes1)
|
expandKeyGo(key, encRes1, decRes1)
|
||||||
expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0])
|
expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0])
|
||||||
fmt.Printf("expected=%v, result=%v\n", encRes1, encRes2)
|
fmt.Printf("expected=%v, result=%v\n", encRes1, encRes2)
|
||||||
fmt.Printf("expected=%v, result=%v\n", decRes1, decRes2)
|
fmt.Printf("expected=%v, result=%v\n", decRes1, decRes2)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user