From cb36dfd0bbaea748c1f4f061bc7fbdb354467983 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 10:40:16 +0800 Subject: [PATCH 01/38] Update asm_arm64.s --- sm4/asm_arm64.s | 50 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index b5038b1..b2a9c1e 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -8,7 +8,16 @@ #define t3 V5 #define ZERO V16 #define FLIP_MASK V17 - +#define NIBBLE_MASK V20 +#define INVERSE_SHIFT_ROWS V21 +#define M1L V22 +#define M1H V23 +#define M2L V24 +#define M2H V25 +#define R08_MASK V26 +#define R16_MASK V27 +#define R24_MASK V28 +#define FK_MASK V29 #define XTMP6 V6 #define XTMP7 V7 @@ -121,11 +130,6 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16 VEOR y.B16, x.B16, x.B16 #define SM4_TAO_L2(x, y) \ - VMOV R0, XTMP6.D[0]; \ - VMOV R1, XTMP6.D[1]; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - VMOV R0, y.D[0]; \ - VMOV R1, y.D[1]; \ ; \ //#################### 4 parallel L2 linear transforms ##################// VSHL $13, x.S4, XTMP6.S4; \ VUSHR $19, x.S4, y.S4; \ @@ -142,16 +146,38 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 MOVD ck+8(FP), R9 MOVD enc+16(FP), R10 MOVD dec+24(FP), R11 - - VLD1 (R8), [t0.B16]; + LDP flip_mask<>(SB), (R0, R1) VMOV R0, FLIP_MASK.D[0] VMOV R1, FLIP_MASK.D[1] - VTBL FLIP_MASK.B16, [t0.B16], t0.B16 + + LDP nibble_mask<>(SB), (R0, R1) + VMOV R0, NIBBLE_MASK.D[0] + VMOV R1, NIBBLE_MASK.D[1] + + LDP m1_low<>(SB), (R0, R1) + VMOV R0, M1L.D[0] + VMOV R1, M1L.D[1] + + LDP m1_high<>(SB), (R0, R1) + VMOV R0, M1H.D[0] + VMOV R1, M1H.D[1] + + LDP m2_low<>(SB), (R0, R1) + VMOV R0, M2L.D[0] + VMOV R1, M2L.D[1] + + LDP m2_high<>(SB), (R0, R1) + VMOV R0, M2H.D[0] + VMOV R1, M2H.D[1] + LDP fk_mask<>(SB), (R0, R1) - VMOV R0, XTMP7.D[0] - VMOV R1, XTMP7.D[1] - VEOR t0.B16, XTMP7.B16, t0.B16 + VMOV R0, FK_MASK.D[0] + VMOV R1, FK_MASK.D[1] + + VLD1 (R8), [t0.B16]; + VTBL FLIP_MASK.B16, [t0.B16], t0.B16 + VEOR t0.B16, FK_MASK.B16, t0.B16 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] From 92d167133b7437e672d21f8a94615ea2f836767b Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 10:57:23 +0800 Subject: [PATCH 02/38] Update asm_arm64.s --- sm4/asm_arm64.s | 37 ++++++++++--------------------------- 1 file changed, 10 insertions(+), 27 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index b2a9c1e..57c82a0 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -73,37 +73,19 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16 #define SM4_SBOX(x, y) \ ; \ //############################# inner affine ############################// - LDP nibble_mask<>(SB), (R0, R1); \ - VMOV R0, XTMP6.D[0]; \ - VMOV R1, XTMP6.D[1]; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m1_low<>(SB), (R0, R1); \ - VMOV R0, y.D[0]; \ - VMOV R1, y.D[1]; \ - VTBL XTMP7.B16, [y.B16], y.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M1L.B16], y.B16; \ VUSHR $4, x.D2, x.D2; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m1_low<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M1H.B16], XTMP7.B16; \ VEOR y.B16, XTMP7.B16, x.B16; \ - LDP inverse_shift_rows<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL V8.B16, [x.B16], x.B16; \ + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16; \ AESE ZERO.B16, x.B16; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m2_low<>(SB), (R0, R1); \ - VMOV R0, y.D[0]; \ - VMOV R1, y.D[1]; \ - VTBL XTMP7.B16, [y.B16], y.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M2L.B16], y.B16; \ VUSHR $4, x.D2, x.D2; \ - VAND x.B16, XTMP6.B16, XTMP7.B16; \ - LDP m2_high<>(SB), (R0, R1); \ - VMOV R0, V8.D[0]; \ - VMOV R1, V8.D[1]; \ - VTBL XTMP7.B16, [V8.B16], XTMP7.B16; \ + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16; \ + VTBL XTMP7.B16, [M2H.B16], XTMP7.B16; \ VEOR y.B16, XTMP7.B16, x.B16 #define SM4_TAO_L1(x, y) \ @@ -130,6 +112,7 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16 VEOR y.B16, x.B16, x.B16 #define SM4_TAO_L2(x, y) \ + SM4_SBOX(x, y); \ ; \ //#################### 4 parallel L2 linear transforms ##################// VSHL $13, x.S4, XTMP6.S4; \ VUSHR $19, x.S4, y.S4; \ From 5373617d6c12e1d99ddeabac031aac28afbb9cb6 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 11:14:31 +0800 Subject: [PATCH 03/38] Update cipher_asm_fuzzy_test.go --- sm4/cipher_asm_fuzzy_test.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sm4/cipher_asm_fuzzy_test.go b/sm4/cipher_asm_fuzzy_test.go index a390971..0ade790 100644 --- a/sm4/cipher_asm_fuzzy_test.go +++ b/sm4/cipher_asm_fuzzy_test.go @@ -4,9 +4,7 @@ package sm4 import ( - "crypto/rand" "fmt" - "io" "testing" ) @@ -46,15 +44,16 @@ func TestExpandKey(t *testing.T) { */ func TestExpandKeySimple(t *testing.T) { - key := make([]byte, 16) + key := []byte{0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef, 0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10} encRes1 := make([]uint32, 32) decRes1 := make([]uint32, 32) encRes2 := make([]uint32, 32) decRes2 := make([]uint32, 32) - io.ReadFull(rand.Reader, key) + expandKeyGo(key, encRes1, decRes1) expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0]) fmt.Printf("expected=%v, result=%v\n", encRes1, encRes2) fmt.Printf("expected=%v, result=%v\n", decRes1, decRes2) } + From 64d02d562b1062a678e29e10367ddb66ca1a6607 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 11:23:55 +0800 Subject: [PATCH 04/38] Update asm_arm64.s --- sm4/asm_arm64.s | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 57c82a0..0320324 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -169,6 +169,15 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 ADD $124, R11 VEOR ZERO.B16, ZERO.B16, ZERO.B16 + VMOV t0.S[0], R2 + MOVW.P R2, 4(R10) + VMOV t1.S[0], R2 + MOVW.P R2, 4(R10) + VMOV t2.S[0], R2 + MOVW.P R2, 4(R10) + VMOV t3.S[0], R2 + MOVW.P R2, 4(R10) +/* ksLoop: MOVW.P 4(R9), R19 VMOV R19, x.S[0] @@ -217,6 +226,7 @@ ksLoop: ADD $16, R0 CMP $128, R0 BNE ksLoop +*/ RET // func encryptBlocksAsm(xk *uint32, dst, src *byte) From 74617aee6a2ee076144fb838a141bfa1cef1ced1 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 11:56:03 +0800 Subject: [PATCH 05/38] Update asm_arm64.s --- sm4/asm_arm64.s | 1 + 1 file changed, 1 insertion(+) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 0320324..a405c1b 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -161,6 +161,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 + VUSHR $4, t0.D2, t0.D2 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] From b31681a9369b28849b0839a884cc983558d17919 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 12:27:54 +0800 Subject: [PATCH 06/38] Update asm_arm64.s --- sm4/asm_arm64.s | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index a405c1b..baa97ec 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -170,6 +170,21 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 ADD $124, R11 VEOR ZERO.B16, ZERO.B16, ZERO.B16 + MOVW.P 4(R9), R19 + VMOV R19, x.S[0] + VEOR t1.B16, x.B16, x.B16 + VEOR t2.B16, x.B16, x.B16 + VEOR t3.B16, x.B16, x.B16 + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + VTBL XTMP7.B16, [M1L.B16], y.B16 + VUSHR $4, x.D2, x.D2 + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 + VEOR y.B16, XTMP7.B16, x.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + + VEOR x.B16, t0.B16, t0.B16 + VMOV t0.S[0], R2 MOVW.P R2, 4(R10) VMOV t1.S[0], R2 From 76e213ac840b985777f6775ba67bed836e6837fe Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 12:38:50 +0800 Subject: [PATCH 07/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index baa97ec..504e62d 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -161,7 +161,6 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 - VUSHR $4, t0.D2, t0.D2 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] @@ -175,14 +174,15 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t1.B16, x.B16, x.B16 VEOR t2.B16, x.B16, x.B16 VEOR t3.B16, x.B16, x.B16 - VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + VAND x.B16, NIBBLE_MASK.B16, x.B16 + /* VTBL XTMP7.B16, [M1L.B16], y.B16 VUSHR $4, x.D2, x.D2 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - +*/ VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 From 235940986e4713dad5c481991a0cf83ee2998855 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 12:44:48 +0800 Subject: [PATCH 08/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 504e62d..1647dea 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -174,16 +174,16 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t1.B16, x.B16, x.B16 VEOR t2.B16, x.B16, x.B16 VEOR t3.B16, x.B16, x.B16 - VAND x.B16, NIBBLE_MASK.B16, x.B16 - /* + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 +/* VUSHR $4, x.D2, x.D2 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 */ - VEOR x.B16, t0.B16, t0.B16 + VEOR y.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From 4b9954fcd9d5e79642828c0e40b2528b7dd36772 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:00:50 +0800 Subject: [PATCH 09/38] Update asm_arm64.s --- sm4/asm_arm64.s | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 1647dea..9b4b9b0 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -158,6 +158,10 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VMOV R0, FK_MASK.D[0] VMOV R1, FK_MASK.D[1] + LDP inverse_shift_rows<>(SB), (R0, R1) + VMOV R0, INVERSE_SHIFT_ROWS.D[0] + VMOV R1, INVERSE_SHIFT_ROWS.D[1] + VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 @@ -176,14 +180,13 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 -/* VUSHR $4, x.D2, x.D2 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 -*/ - VEOR y.B16, t0.B16, t0.B16 + + VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From 83223cd5e91ad46604c8a9a78ad887772b6b916a Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:07:14 +0800 Subject: [PATCH 10/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 9b4b9b0..bb71c9f 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -184,7 +184,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 VEOR x.B16, t0.B16, t0.B16 From 23db50e034ec2e7c3e72e825e6aedd32d5705fd0 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:12:29 +0800 Subject: [PATCH 11/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index bb71c9f..c362d93 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -186,7 +186,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR y.B16, XTMP7.B16, x.B16 //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - VEOR x.B16, t0.B16, t0.B16 + VEOR XTMP7.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From 069739039e4e354dedda379add4766c08d5c9487 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:16:08 +0800 Subject: [PATCH 12/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index c362d93..f841889 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -182,7 +182,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VTBL XTMP7.B16, [M1L.B16], y.B16 VUSHR $4, x.D2, x.D2 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 + //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 From 1e9cb54a270ced649f3e7eeb5e0fe7847ddddb10 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:21:44 +0800 Subject: [PATCH 13/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index f841889..c731062 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -181,12 +181,12 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 VUSHR $4, x.D2, x.D2 - VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 - VEOR y.B16, XTMP7.B16, x.B16 + //VEOR y.B16, XTMP7.B16, x.B16 //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - VEOR XTMP7.B16, t0.B16, t0.B16 + VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From c2c53ee6411bedfa9eb80868dfa88b1190aa4477 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:26:44 +0800 Subject: [PATCH 14/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index c731062..e0eae60 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -186,13 +186,13 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 //VEOR y.B16, XTMP7.B16, x.B16 //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - VEOR x.B16, t0.B16, t0.B16 + //VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) - VMOV t1.S[0], R2 + VMOV x.S[0], R2 MOVW.P R2, 4(R10) - VMOV t2.S[0], R2 + VMOV y.S[0], R2 MOVW.P R2, 4(R10) VMOV t3.S[0], R2 MOVW.P R2, 4(R10) From e74c6587d34f0f69bdfcede832c184d6ea669ab9 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:36:57 +0800 Subject: [PATCH 15/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index e0eae60..718e1e5 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -180,7 +180,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 - VUSHR $4, x.D2, x.D2 + VSHL $-4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 //VEOR y.B16, XTMP7.B16, x.B16 From 8c73dde63e8563da7842c3edb02bc1c0db4dba66 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:45:25 +0800 Subject: [PATCH 16/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 718e1e5..89aba6f 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -180,7 +180,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 - VSHL $-4, x.D2, x.D2 + VSRI $4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 //VEOR y.B16, XTMP7.B16, x.B16 From d621fbd4de662baf4c149541b9d85b55295aa08b Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:51:15 +0800 Subject: [PATCH 17/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 89aba6f..d282013 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -180,7 +180,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 - VSRI $4, x.D2, x.D2 + VSLI $-4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 //VEOR y.B16, XTMP7.B16, x.B16 From 917d31bc111a05227729ed9d2ef93ca10b8c4f15 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 13:58:51 +0800 Subject: [PATCH 18/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index d282013..be73422 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -180,7 +180,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 - VSLI $-4, x.D2, x.D2 + //VSLI $-4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 //VEOR y.B16, XTMP7.B16, x.B16 From ff2859a4e3b77eb5d52e03ae92b03758a2aba214 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:16:32 +0800 Subject: [PATCH 19/38] Update asm_arm64.s --- sm4/asm_arm64.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index be73422..3085f12 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -178,8 +178,8 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t1.B16, x.B16, x.B16 VEOR t2.B16, x.B16, x.B16 VEOR t3.B16, x.B16, x.B16 - VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - VTBL XTMP7.B16, [M1L.B16], y.B16 + // VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + // VTBL XTMP7.B16, [M1L.B16], y.B16 //VSLI $-4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 From 28160e619286858b798714ec123c97f0d8059b33 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:19:56 +0800 Subject: [PATCH 20/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 3085f12..7314ce3 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -175,9 +175,9 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 MOVW.P 4(R9), R19 VMOV R19, x.S[0] - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 + // VEOR t1.B16, x.B16, x.B16 + // VEOR t2.B16, x.B16, x.B16 + // VEOR t3.B16, x.B16, x.B16 // VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 // VTBL XTMP7.B16, [M1L.B16], y.B16 //VSLI $-4, x.D2, x.D2 From fd30717319b8a7fef556fe142dae467b0613238a Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:27:57 +0800 Subject: [PATCH 21/38] Update asm_arm64.s --- sm4/asm_arm64.s | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 7314ce3..be73422 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -175,11 +175,11 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 MOVW.P 4(R9), R19 VMOV R19, x.S[0] - // VEOR t1.B16, x.B16, x.B16 - // VEOR t2.B16, x.B16, x.B16 - // VEOR t3.B16, x.B16, x.B16 - // VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - // VTBL XTMP7.B16, [M1L.B16], y.B16 + VEOR t1.B16, x.B16, x.B16 + VEOR t2.B16, x.B16, x.B16 + VEOR t3.B16, x.B16, x.B16 + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + VTBL XTMP7.B16, [M1L.B16], y.B16 //VSLI $-4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 From ec759be99bb36d0ffb0990fdf7742266a0e86060 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:30:58 +0800 Subject: [PATCH 22/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index be73422..e0eae60 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -180,7 +180,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR t3.B16, x.B16, x.B16 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 - //VSLI $-4, x.D2, x.D2 + VUSHR $4, x.D2, x.D2 //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 //VEOR y.B16, XTMP7.B16, x.B16 From 237ed6f3211663d9cc480cec60836061102b9bc2 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:34:35 +0800 Subject: [PATCH 23/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index e0eae60..2e17a86 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -181,9 +181,9 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1L.B16], y.B16 VUSHR $4, x.D2, x.D2 - //VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - //VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 - //VEOR y.B16, XTMP7.B16, x.B16 + VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 + VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 + VEOR y.B16, XTMP7.B16, x.B16 //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 //VEOR x.B16, t0.B16, t0.B16 From 9c5cc67638d66bb2450d07f0d917aa528dfe1faa Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:41:53 +0800 Subject: [PATCH 24/38] Update asm_arm64.s --- sm4/asm_arm64.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 2e17a86..f91a955 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -184,9 +184,9 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - //VEOR x.B16, t0.B16, t0.B16 + VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From ac26f071bf5c416cbe296a0a0c820bedd04d6484 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:45:53 +0800 Subject: [PATCH 25/38] Update asm_arm64.s --- sm4/asm_arm64.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index f91a955..2e17a86 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -184,9 +184,9 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - VEOR x.B16, t0.B16, t0.B16 + //VEOR x.B16, t0.B16, t0.B16 VMOV t0.S[0], R2 MOVW.P R2, 4(R10) From 1ec2ce9b96675570c48a933069ef877742490777 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 14:52:58 +0800 Subject: [PATCH 26/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 2e17a86..ae186fa 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -184,7 +184,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - //VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 //VEOR x.B16, t0.B16, t0.B16 From e91cec4132b658ff82228a7d8efed8e96389f94f Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:22:00 +0800 Subject: [PATCH 27/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index ae186fa..92c77db 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -184,7 +184,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], y.B16 //VEOR x.B16, t0.B16, t0.B16 From 36f55773314cbc1d17e64f7489019970cdaf835e Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:28:34 +0800 Subject: [PATCH 28/38] Update asm_arm64.s --- sm4/asm_arm64.s | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 92c77db..24933d0 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -32,8 +32,10 @@ DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 // inverse shift rows -DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 -DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 +//DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 +//DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 +DATA inverse_shift_rows<>+0x00(SB)/8, $0x0706050403020100 +DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908 GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 // Affine transform 1 (low and high hibbles) @@ -184,7 +186,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], y.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 //VEOR x.B16, t0.B16, t0.B16 From 392d556dc800c0bb29e3ddaec606f2fb22f21e97 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:37:20 +0800 Subject: [PATCH 29/38] Update asm_arm64.s --- sm4/asm_arm64.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 24933d0..5cb156d 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -34,8 +34,8 @@ GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 // inverse shift rows //DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 //DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 -DATA inverse_shift_rows<>+0x00(SB)/8, $0x0706050403020100 -DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908 +DATA inverse_shift_rows<>+0x00(SB)/8, $0x0706050403020D00 +DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E010C0B0A0908 GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 // Affine transform 1 (low and high hibbles) From b889967d49a8e55192dfaa3c7649b7ab4011b360 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:42:04 +0800 Subject: [PATCH 30/38] Update asm_arm64.s --- sm4/asm_arm64.s | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 5cb156d..b3af7cc 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -161,8 +161,8 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VMOV R1, FK_MASK.D[1] LDP inverse_shift_rows<>(SB), (R0, R1) - VMOV R0, INVERSE_SHIFT_ROWS.D[0] - VMOV R1, INVERSE_SHIFT_ROWS.D[1] + VMOV R0, V21.D[0] + VMOV R1, V21.D[1] VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 @@ -186,7 +186,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 + VTBL V21.B16, [x.B16], x.B16 //VEOR x.B16, t0.B16, t0.B16 From 4d49c93f99f15f673b944aad8f4628b5dba5caac Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:44:39 +0800 Subject: [PATCH 31/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index b3af7cc..ffa6ef7 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -9,7 +9,7 @@ #define ZERO V16 #define FLIP_MASK V17 #define NIBBLE_MASK V20 -#define INVERSE_SHIFT_ROWS V21 +#define INVERSE_SHIFT_ROWS V30 #define M1L V22 #define M1H V23 #define M2L V24 From 1aab497333044d4fc1a19f09afe9dd4f248e73c8 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:45:09 +0800 Subject: [PATCH 32/38] Update asm_arm64.s --- sm4/asm_arm64.s | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index ffa6ef7..9c3386d 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -161,8 +161,8 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VMOV R1, FK_MASK.D[1] LDP inverse_shift_rows<>(SB), (R0, R1) - VMOV R0, V21.D[0] - VMOV R1, V21.D[1] + VMOV R0, INVERSE_SHIFT_ROWS.D[0] + VMOV R1, INVERSE_SHIFT_ROWS.D[1] VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 From 376df04b7248c0b9612dafb0f3c085eeac2b8d41 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 15:48:54 +0800 Subject: [PATCH 33/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 9c3386d..6e64310 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -186,7 +186,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 VEOR y.B16, XTMP7.B16, x.B16 - VTBL V21.B16, [x.B16], x.B16 + VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 //VEOR x.B16, t0.B16, t0.B16 From 03b0c9fc9ece74b640d185dfdf098fdeb31556c9 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 16:11:53 +0800 Subject: [PATCH 34/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 6e64310..a5e5453 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -35,7 +35,7 @@ GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 //DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 //DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 DATA inverse_shift_rows<>+0x00(SB)/8, $0x0706050403020D00 -DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E010C0B0A0908 +DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908 GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 // Affine transform 1 (low and high hibbles) From e6c9bde42442a59a39b3f29e3c526c7c2e8a2fbc Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 16:17:38 +0800 Subject: [PATCH 35/38] Update asm_arm64.s --- sm4/asm_arm64.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index a5e5453..a7a65d3 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -34,7 +34,7 @@ GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 // inverse shift rows //DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 //DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 -DATA inverse_shift_rows<>+0x00(SB)/8, $0x0706050403020D00 +DATA inverse_shift_rows<>+0x00(SB)/8, $0x0106050403020700 DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908 GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 From ee8e44ff36d9a66c146534638cd8810ad4c4d65b Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 16:56:47 +0800 Subject: [PATCH 36/38] Update asm_arm64.s --- sm4/asm_arm64.s | 1 + 1 file changed, 1 insertion(+) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index a7a65d3..7f68829 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -176,6 +176,7 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VEOR ZERO.B16, ZERO.B16, ZERO.B16 MOVW.P 4(R9), R19 + VEOR x.B16, x.B16, x.B16 VMOV R19, x.S[0] VEOR t1.B16, x.B16, x.B16 VEOR t2.B16, x.B16, x.B16 From c0d6e73eaf73b86899c769b5e7b80a86e7276824 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 17:13:58 +0800 Subject: [PATCH 37/38] Update asm_arm64.s --- sm4/asm_arm64.s | 3 +++ 1 file changed, 3 insertions(+) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 7f68829..6752360 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -167,6 +167,9 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 + VEOR t1.B16, t1.B16, t1.B16 + VEOR t2.B16, t2.B16, t2.B16 + VEOR t3.B16, t3.B16, t3.B16 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] From 37c24a57af65bbacc2f47fa6d61fcecb9c7df10e Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Sat, 1 Jan 2022 17:18:50 +0800 Subject: [PATCH 38/38] Update asm_arm64.s --- sm4/asm_arm64.s | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/sm4/asm_arm64.s b/sm4/asm_arm64.s index 6752360..a73abc7 100644 --- a/sm4/asm_arm64.s +++ b/sm4/asm_arm64.s @@ -32,8 +32,6 @@ DATA nibble_mask<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F GLOBL nibble_mask<>(SB), (NOPTR+RODATA), $16 // inverse shift rows -//DATA inverse_shift_rows<>+0x00(SB)/8, $0x0B0E0104070A0D00 -//DATA inverse_shift_rows<>+0x08(SB)/8, $0x0306090C0F020508 DATA inverse_shift_rows<>+0x00(SB)/8, $0x0106050403020700 DATA inverse_shift_rows<>+0x08(SB)/8, $0x0F0E0D0C0B0A0908 GLOBL inverse_shift_rows<>(SB), (NOPTR+RODATA), $16 @@ -167,9 +165,6 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 VLD1 (R8), [t0.B16]; VTBL FLIP_MASK.B16, [t0.B16], t0.B16 VEOR t0.B16, FK_MASK.B16, t0.B16 - VEOR t1.B16, t1.B16, t1.B16 - VEOR t2.B16, t2.B16, t2.B16 - VEOR t3.B16, t3.B16, t3.B16 VMOV t0.S[1], t1.S[0] VMOV t0.S[2], t2.S[0] VMOV t0.S[3], t3.S[0] @@ -178,31 +173,6 @@ TEXT ·expandKeyAsm(SB),NOSPLIT,$0 ADD $124, R11 VEOR ZERO.B16, ZERO.B16, ZERO.B16 - MOVW.P 4(R9), R19 - VEOR x.B16, x.B16, x.B16 - VMOV R19, x.S[0] - VEOR t1.B16, x.B16, x.B16 - VEOR t2.B16, x.B16, x.B16 - VEOR t3.B16, x.B16, x.B16 - VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - VTBL XTMP7.B16, [M1L.B16], y.B16 - VUSHR $4, x.D2, x.D2 - VAND x.B16, NIBBLE_MASK.B16, XTMP7.B16 - VTBL XTMP7.B16, [M1H.B16], XTMP7.B16 - VEOR y.B16, XTMP7.B16, x.B16 - VTBL INVERSE_SHIFT_ROWS.B16, [x.B16], x.B16 - - //VEOR x.B16, t0.B16, t0.B16 - - VMOV t0.S[0], R2 - MOVW.P R2, 4(R10) - VMOV x.S[0], R2 - MOVW.P R2, 4(R10) - VMOV y.S[0], R2 - MOVW.P R2, 4(R10) - VMOV t3.S[0], R2 - MOVW.P R2, 4(R10) -/* ksLoop: MOVW.P 4(R9), R19 VMOV R19, x.S[0] @@ -251,7 +221,7 @@ ksLoop: ADD $16, R0 CMP $128, R0 BNE ksLoop -*/ + RET // func encryptBlocksAsm(xk *uint32, dst, src *byte)