Fix compile error

This commit is contained in:
Emman 2022-04-29 17:29:08 +08:00
parent 37ab6d67f3
commit c71d5ccd96
7 changed files with 424 additions and 423 deletions

View File

@ -58,18 +58,18 @@ func sm3tt2b(Vd, Vn, Vm, imm2 byte) uint32 {
// Used v5 as temp register // Used v5 as temp register
func roundA(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) { func roundA(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2)
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t) fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3TT1A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1a(st1, 5, wt, i), st1, 5, wt, i) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1a(st1, 5, wt, i), st1, 5, wt, i)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3TT2A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2a(st2, 5, w, i), st2, 5, w, i) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2a(st2, 5, w, i), st2, 5, w, i)
} }
// Used v5 as temp register // Used v5 as temp register
func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) { func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2)
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t) fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3TT1B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1b(st1, 5, wt, i), st1, 5, wt, i) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1b(st1, 5, wt, i), st1, 5, wt, i)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3TT2B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2b(st2, 5, w, i), st2, 5, w, i) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2b(st2, 5, w, i), st2, 5, w, i)
} }
// Compress 4 words and generate 4 words, use v6, v7, v10 as temp registers // Compress 4 words and generate 4 words, use v6, v7, v10 as temp registers
@ -82,12 +82,12 @@ func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
// st1, st2, sm3 state // st1, st2, sm3 state
func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) { func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
fmt.Fprintf(buf, "\t// Extension\n") fmt.Fprintf(buf, "\t// Extension\n")
fmt.Fprintf(buf, "\tVEXT 3, V%d, V%d, V%d\n", s2, s1, s4) fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
fmt.Fprintf(buf, "\tVEXT 3, V%d, V%d, V%d\n", s1, s0, 6) fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
fmt.Fprintf(buf, "\tVEXT 2, V%d, V%d, V%d\n", s3, s2, 7) fmt.Fprintf(buf, "\tVEXT $2, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
fmt.Fprintf(buf, "\tVEOR V%d, V%d, V10\n", s1, s0) fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
fmt.Fprintf(buf, "\t// Compression\n") fmt.Fprintf(buf, "\t// Compression\n")
roundA(buf, 0, t, st1, st2, s0, 10) roundA(buf, 0, t, st1, st2, s0, 10)
roundA(buf, 1, t, st1, st2, s0, 10) roundA(buf, 1, t, st1, st2, s0, 10)
@ -100,13 +100,13 @@ func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
func qroundB(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) { func qroundB(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
if s4 != 0xff { if s4 != 0xff {
fmt.Fprintf(buf, "\t// Extension\n") fmt.Fprintf(buf, "\t// Extension\n")
fmt.Fprintf(buf, "\tVEXT 3, V%d, V%d, V%d\n", s2, s1, s4) fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
fmt.Fprintf(buf, "\tVEXT 3, V%d, V%d, V%d\n", s1, s0, 6) fmt.Fprintf(buf, "\tVEXT $3, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
fmt.Fprintf(buf, "\tVEXT 2, V%d, V%d, V%d\n", s3, s2, 7) fmt.Fprintf(buf, "\tVEXT $2, V%d.B16, V%d.B16, V%d.B16\n", s3, s2, 7)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW1 V%d.4S, V%d.4S, V%d.4S\n", sm3partw1(s4, s0, s3), s4, s0, s3)
fmt.Fprintf(buf, "\tWORD 0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6) fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
} }
fmt.Fprintf(buf, "\tVEOR V%d, V%d, V10\n", s1, s0) fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
fmt.Fprintf(buf, "\t// Compression\n") fmt.Fprintf(buf, "\t// Compression\n")
roundB(buf, 0, t, st1, st2, s0, 10) roundB(buf, 0, t, st1, st2, s0, 10)
roundB(buf, 1, t, st1, st2, s0, 10) roundB(buf, 1, t, st1, st2, s0, 10)
@ -165,8 +165,8 @@ blockloop:
fmt.Fprint(buf, ` fmt.Fprint(buf, `
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
VEOR V8.S4, V15.S4, V8.S4 VEOR V8.B16, V15.B16, V8.B16
VEOR V9.S4, V16.S4, V9.S4 VEOR V9.B16, V16.B16, V9.B16
CBNZ R3, blockloop CBNZ R3, blockloop
sm3ret: sm3ret:

View File

@ -23,392 +23,392 @@ blockloop:
// first 16 rounds // first 16 rounds
VMOV R5, V11.S[3] VMOV R5, V11.S[3]
// Extension // Extension
VEXT 3, V2, V1, V4 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT 3, V1, V0, V6 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT 2, V3, V2, V7 VEXT $2, V3.B16, V2.B16, V7.B16
WORD 0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD 0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1, V0, V10 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD 0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0 WORD $0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD 0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1 WORD $0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD 0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2 WORD $0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD 0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3 WORD $0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT 3, V3, V2, V0 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT 3, V2, V1, V6 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT 2, V4, V3, V7 VEXT $2, V4.B16, V3.B16, V7.B16
WORD 0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD 0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2, V1, V10 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD 0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0 WORD $0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD 0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1 WORD $0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD 0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2 WORD $0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD 0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3 WORD $0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT 3, V4, V3, V1 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT 3, V3, V2, V6 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT 2, V0, V4, V7 VEXT $2, V0.B16, V4.B16, V7.B16
WORD 0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD 0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3, V2, V10 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD 0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0 WORD $0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD 0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1 WORD $0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD 0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2 WORD $0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD 0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3 WORD $0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3
// Extension // Extension
VEXT 3, V0, V4, V2 VEXT $3, V0.B16, V4.B16, V2.B16
VEXT 3, V4, V3, V6 VEXT $3, V4.B16, V3.B16, V6.B16
VEXT 2, V1, V0, V7 VEXT $2, V1.B16, V0.B16, V7.B16
WORD 0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD 0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4, V3, V10 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD 0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0 WORD $0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD 0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1 WORD $0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD 0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2 WORD $0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD 0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3 WORD $0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3
// second 48 rounds // second 48 rounds
VMOV R6, V11.S[3] VMOV R6, V11.S[3]
// Extension // Extension
VEXT 3, V1, V0, V3 VEXT $3, V1.B16, V0.B16, V3.B16
VEXT 3, V0, V4, V6 VEXT $3, V0.B16, V4.B16, V6.B16
VEXT 2, V2, V1, V7 VEXT $2, V2.B16, V1.B16, V7.B16
WORD 0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD 0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0, V4, V10 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension // Extension
VEXT 3, V2, V1, V4 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT 3, V1, V0, V6 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT 2, V3, V2, V7 VEXT $2, V3.B16, V2.B16, V7.B16
WORD 0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD 0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1, V0, V10 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT 3, V3, V2, V0 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT 3, V2, V1, V6 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT 2, V4, V3, V7 VEXT $2, V4.B16, V3.B16, V7.B16
WORD 0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD 0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2, V1, V10 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT 3, V4, V3, V1 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT 3, V3, V2, V6 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT 2, V0, V4, V7 VEXT $2, V0.B16, V4.B16, V7.B16
WORD 0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD 0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3, V2, V10 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3
// Extension // Extension
VEXT 3, V0, V4, V2 VEXT $3, V0.B16, V4.B16, V2.B16
VEXT 3, V4, V3, V6 VEXT $3, V4.B16, V3.B16, V6.B16
VEXT 2, V1, V0, V7 VEXT $2, V1.B16, V0.B16, V7.B16
WORD 0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD 0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4, V3, V10 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3
// Extension // Extension
VEXT 3, V1, V0, V3 VEXT $3, V1.B16, V0.B16, V3.B16
VEXT 3, V0, V4, V6 VEXT $3, V0.B16, V4.B16, V6.B16
VEXT 2, V2, V1, V7 VEXT $2, V2.B16, V1.B16, V7.B16
WORD 0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD 0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0, V4, V10 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension // Extension
VEXT 3, V2, V1, V4 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT 3, V1, V0, V6 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT 2, V3, V2, V7 VEXT $2, V3.B16, V2.B16, V7.B16
WORD 0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD 0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1, V0, V10 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT 3, V3, V2, V0 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT 3, V2, V1, V6 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT 2, V4, V3, V7 VEXT $2, V4.B16, V3.B16, V7.B16
WORD 0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD 0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2, V1, V10 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT 3, V4, V3, V1 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT 3, V3, V2, V6 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT 2, V0, V4, V7 VEXT $2, V0.B16, V4.B16, V7.B16
WORD 0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD 0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3, V2, V10 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3
VEOR V4, V3, V10 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3
VEOR V0, V4, V10 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
VEOR V1, V0, V10 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD 0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD 0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD 0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD 0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD 0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD 0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
VEOR V8.S4, V15.S4, V8.S4 VEOR V8.B16, V15.B16, V8.B16
VEOR V9.S4, V16.S4, V9.S4 VEOR V9.B16, V16.B16, V9.B16
CBNZ R3, blockloop CBNZ R3, blockloop
sm3ret: sm3ret:

View File

@ -165,28 +165,29 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
VMOV R1, R24_MASK.D[1] VMOV R1, R24_MASK.D[1]
#define SM4EKEY_EXPORT_KEYS() \ #define SM4EKEY_EXPORT_KEYS() \
VMOV V8.S[3], V10.S[0] \ VMOV V9.S[3], V10.S[0] \
VMOV V8.S[2], V10.S[1] \ VMOV V9.S[2], V10.S[1] \
VMOV V8.S[1], V10.S[2] \ VMOV V9.S[1], V10.S[2] \
VMOV V8.S[0], V10.S[3] \ VMOV V9.S[0], V10.S[3] \
VMOV V9.S[3], V11.S[0] \ VMOV V8.S[3], V11.S[0] \
VMOV V9.S[2], V11.S[1] \ VMOV V8.S[2], V11.S[1] \
VMOV V9.S[1], V11.S[2] \ VMOV V8.S[1], V11.S[2] \
VMOV V9.S[0], V11.S[3] \ VMOV V8.S[0], V11.S[3] \
VST1.P [V9.S4, V8.S4], 32(R10) \ VST1.P [V8.S4, V9.S4], 32(R10) \
VST1.P [V10.S4, V11.S4], -32(R11) VST1 [V10.S4, V11.S4], (R11) \
SUB $32, R11, R11
#define SM4E_ROUND() \ #define SM4E_ROUND() \
VLD1.P 16(R10), [V8.B16] \ VLD1.P 16(R10), [V8.B16] \
VREV32 V8.B16, V8.B16 \ VREV32 V8.B16, V8.B16 \
WORD 0x0884c0ce \ WORD $0x0884c0ce \
WORD 0x2884c0ce \ WORD $0x2884c0ce \
WORD 0x4884c0ce \ WORD $0x4884c0ce \
WORD 0x6884c0ce \ WORD $0x6884c0ce \
WORD 0x8884c0ce \ WORD $0x8884c0ce \
WORD 0xa884c0ce \ WORD $0xa884c0ce \
WORD 0xc884c0ce \ WORD $0xc884c0ce \
WORD 0xe884c0ce \ WORD $0xe884c0ce \
VREV32 V8.B16, V8.B16 \ VREV32 V8.B16, V8.B16 \
VST1.P [V8.B16], 16(R9) VST1.P [V8.B16], 16(R9)
@ -229,27 +230,27 @@ sm4ekey:
LDP fk_mask<>(SB), (R0, R1) LDP fk_mask<>(SB), (R0, R1)
VMOV R0, FK_MASK.D[0] VMOV R0, FK_MASK.D[0]
VMOV R1, FK_MASK.D[1] VMOV R1, FK_MASK.D[1]
VLD1 (R8), [V8.B16] VLD1 (R8), [V9.B16]
VREV32 V8.B16, V8.B16 VREV32 V9.B16, V9.B16
VEOR FK_MASK, V8, V8 VEOR FK_MASK.B16, V9.B16, V9.B16
ADD $96, R11 ADD $96, R11
VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4]
WORD 0x09c960ce //SM4EKEY V9.4S, V8.4S, V0.4S WORD $0x28c960ce //SM4EKEY V8.4S, V9.4S, V0.4S
WORD 0x28c961ce //SM4EKEY V8.4S, V9.4S, V1.4S WORD $0x09c961ce //SM4EKEY V9.4S, V8.4S, V1.4S
SM4EKEY_EXPORT_KEYS() SM4EKEY_EXPORT_KEYS()
WORD 0x09c962ce //SM4EKEY V9.4S, V8.4S, V2.4S WORD $0x28c962ce //SM4EKEY V8.4S, V9.4S, V2.4S
WORD 0x28c963ce //SM4EKEY V8.4S, V9.4S, V3.4S WORD $0x09c963ce //SM4EKEY V9.4S, V8.4S, V3.4S
SM4EKEY_EXPORT_KEYS() SM4EKEY_EXPORT_KEYS()
VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4]
WORD 0x09c960ce //SM4EKEY V9.4S, V8.4S, V0.4S WORD $0x28c960ce //SM4EKEY V8.4S, V9.4S, V0.4S
WORD 0x28c961ce //SM4EKEY V8.4S, V9.4S, V1.4S WORD $0x09c961ce //SM4EKEY V9.4S, V8.4S, V1.4S
SM4EKEY_EXPORT_KEYS() SM4EKEY_EXPORT_KEYS()
WORD 0x09c962ce //SM4EKEY V9.4S, V8.4S, V2.4S WORD $0x28c962ce //SM4EKEY V8.4S, V9.4S, V2.4S
WORD 0x28c963ce //SM4EKEY V8.4S, V9.4S, V3.4S WORD $0x09c963ce //SM4EKEY V9.4S, V8.4S, V3.4S
SM4EKEY_EXPORT_KEYS() SM4EKEY_EXPORT_KEYS()
RET RET
@ -391,15 +392,15 @@ sm4niblock:
VLD1 (R10), [V8.B16] VLD1 (R10), [V8.B16]
VREV32 V8.B16, V8.B16 VREV32 V8.B16, V8.B16
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
WORD 0x0884c0ce //SM4E V8.4S, V0.4S WORD $0x0884c0ce //SM4E V8.4S, V0.4S
WORD 0x2884c0ce //SM4E V8.4S, V1.4S WORD $0x2884c0ce //SM4E V8.4S, V1.4S
WORD 0x4884c0ce //SM4E V8.4S, V2.4S WORD $0x4884c0ce //SM4E V8.4S, V2.4S
WORD 0x6884c0ce //SM4E V8.4S, V3.4S WORD $0x6884c0ce //SM4E V8.4S, V3.4S
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
WORD 0x0884c0ce //SM4E V8.4S, V0.4S WORD $0x0884c0ce //SM4E V8.4S, V0.4S
WORD 0x2884c0ce //SM4E V8.4S, V1.4S WORD $0x2884c0ce //SM4E V8.4S, V1.4S
WORD 0x4884c0ce //SM4E V8.4S, V2.4S WORD $0x4884c0ce //SM4E V8.4S, V2.4S
WORD 0x6884c0ce //SM4E V8.4S, V3.4S WORD $0x6884c0ce //SM4E V8.4S, V3.4S
VREV32 V8.B16, V8.B16 VREV32 V8.B16, V8.B16
VST1 [V8.B16], (R9) VST1 [V8.B16], (R9)
RET RET

View File

@ -2209,4 +2209,3 @@ TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
// func gcmSm4niDec(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32) // func gcmSm4niDec(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
TEXT ·gcmSm4niDec(SB),NOSPLIT,$0 TEXT ·gcmSm4niDec(SB),NOSPLIT,$0
RET RET

View File

@ -298,15 +298,15 @@ sm4InitEncLoop:
sm4InitSM4E: sm4InitSM4E:
VEOR B0.B16, B0.B16, B0.B16 VEOR B0.B16, B0.B16, B0.B16
VLD1.P 64(RK), [T0.S4, T1.S4, T2.S4, T3.S4] VLD1.P 64(RK), [T0.S4, T1.S4, T2.S4, T3.S4]
WORD 0x6085c0ce //SM4E V0.4S, V11.4S WORD $0x6085c0ce //SM4E V0.4S, V11.4S
WORD 0x8085c0ce //SM4E V0.4S, V12.4S WORD $0x8085c0ce //SM4E V0.4S, V12.4S
WORD 0xa085c0ce //SM4E V0.4S, V13.4S WORD $0xa085c0ce //SM4E V0.4S, V13.4S
WORD 0xc085c0ce //SM4E V0.4S, V14.4S WORD $0xc085c0ce //SM4E V0.4S, V14.4S
VLD1.P 64(RK), [T0.S4, T1.S4, T2.S4, T3.S4] VLD1.P 64(RK), [T0.S4, T1.S4, T2.S4, T3.S4]
WORD 0x6085c0ce //SM4E V0.4S, V11.4S WORD $0x6085c0ce //SM4E V0.4S, V11.4S
WORD 0x8085c0ce //SM4E V0.4S, V12.4S WORD $0x8085c0ce //SM4E V0.4S, V12.4S
WORD 0xa085c0ce //SM4E V0.4S, V13.4S WORD $0xa085c0ce //SM4E V0.4S, V13.4S
WORD 0xc085c0ce //SM4E V0.4S, V14.4S WORD $0xc085c0ce //SM4E V0.4S, V14.4S
VREV32 B0.B16, B0.B16 VREV32 B0.B16, B0.B16
VREV64 B0.B16, B0.B16 VREV64 B0.B16, B0.B16
sm4InitEncDone: sm4InitEncDone:

View File

@ -60,73 +60,73 @@
VEOR T3.B16, ACCM.B16, ACCM.B16 VEOR T3.B16, ACCM.B16, ACCM.B16
#define sm4eEnc1block() \ #define sm4eEnc1block() \
WORD 0x6086c0ce \ //SM4E V0.4S, V19.4S WORD $0x6086c0ce \ //SM4E V0.4S, V19.4S
WORD 0x8086c0ce \ //SM4E V0.4S, V20.4S WORD $0x8086c0ce \ //SM4E V0.4S, V20.4S
WORD 0xa086c0ce \ //SM4E V0.4S, V21.4S WORD $0xa086c0ce \ //SM4E V0.4S, V21.4S
WORD 0xc086c0ce \ //SM4E V0.4S, V22.4S WORD $0xc086c0ce \ //SM4E V0.4S, V22.4S
WORD 0xe086c0ce \ //SM4E V0.4S, V23.4S WORD $0xe086c0ce \ //SM4E V0.4S, V23.4S
WORD 0x0087c0ce \ //SM4E V0.4S, V24.4S WORD $0x0087c0ce \ //SM4E V0.4S, V24.4S
WORD 0x2087c0ce \ //SM4E V0.4S, V25.4S WORD $0x2087c0ce \ //SM4E V0.4S, V25.4S
WORD 0x4087c0ce //SM4E V0.4S, V26.4S WORD $0x4087c0ce //SM4E V0.4S, V26.4S
#define sm4eEnc8blocks() \ #define sm4eEnc8blocks() \
sm4eEnc1block() \ sm4eEnc1block() \
WORD 0x6186c0ce \ //SM4E V1.4S, V19.4S WORD $0x6186c0ce \ //SM4E V1.4S, V19.4S
WORD 0x8186c0ce \ //SM4E V1.4S, V20.4S WORD $0x8186c0ce \ //SM4E V1.4S, V20.4S
WORD 0xa186c0ce \ //SM4E V1.4S, V21.4S WORD $0xa186c0ce \ //SM4E V1.4S, V21.4S
WORD 0xc186c0ce \ //SM4E V1.4S, V22.4S WORD $0xc186c0ce \ //SM4E V1.4S, V22.4S
WORD 0xe186c0ce \ //SM4E V1.4S, V23.4S WORD $0xe186c0ce \ //SM4E V1.4S, V23.4S
WORD 0x0187c0ce \ //SM4E V1.4S, V24.4S WORD $0x0187c0ce \ //SM4E V1.4S, V24.4S
WORD 0x2187c0ce \ //SM4E V1.4S, V25.4S WORD $0x2187c0ce \ //SM4E V1.4S, V25.4S
WORD 0x4187c0ce \ //SM4E V1.4S, V26.4S WORD $0x4187c0ce \ //SM4E V1.4S, V26.4S
WORD 0x6286c0ce \ //SM4E V2.4S, V19.4S WORD $0x6286c0ce \ //SM4E V2.4S, V19.4S
WORD 0x8286c0ce \ //SM4E V2.4S, V20.4S WORD $0x8286c0ce \ //SM4E V2.4S, V20.4S
WORD 0xa286c0ce \ //SM4E V2.4S, V21.4S WORD $0xa286c0ce \ //SM4E V2.4S, V21.4S
WORD 0xc286c0ce \ //SM4E V2.4S, V22.4S WORD $0xc286c0ce \ //SM4E V2.4S, V22.4S
WORD 0xe286c0ce \ //SM4E V2.4S, V23.4S WORD $0xe286c0ce \ //SM4E V2.4S, V23.4S
WORD 0x0287c0ce \ //SM4E V2.4S, V24.4S WORD $0x0287c0ce \ //SM4E V2.4S, V24.4S
WORD 0x2287c0ce \ //SM4E V2.4S, V25.4S WORD $0x2287c0ce \ //SM4E V2.4S, V25.4S
WORD 0x4287c0ce \ //SM4E V2.4S, V26.4S WORD $0x4287c0ce \ //SM4E V2.4S, V26.4S
WORD 0x6386c0ce \ //SM4E V3.4S, V19.4S WORD $0x6386c0ce \ //SM4E V3.4S, V19.4S
WORD 0x8386c0ce \ //SM4E V3.4S, V20.4S WORD $0x8386c0ce \ //SM4E V3.4S, V20.4S
WORD 0xa386c0ce \ //SM4E V3.4S, V21.4S WORD $0xa386c0ce \ //SM4E V3.4S, V21.4S
WORD 0xc386c0ce \ //SM4E V3.4S, V22.4S WORD $0xc386c0ce \ //SM4E V3.4S, V22.4S
WORD 0xe386c0ce \ //SM4E V3.4S, V23.4S WORD $0xe386c0ce \ //SM4E V3.4S, V23.4S
WORD 0x0387c0ce \ //SM4E V3.4S, V24.4S WORD $0x0387c0ce \ //SM4E V3.4S, V24.4S
WORD 0x2387c0ce \ //SM4E V3.4S, V25.4S WORD $0x2387c0ce \ //SM4E V3.4S, V25.4S
WORD 0x4387c0ce \ //SM4E V3.4S, V26.4S WORD $0x4387c0ce \ //SM4E V3.4S, V26.4S
WORD 0x6486c0ce \ //SM4E V4.4S, V19.4S WORD $0x6486c0ce \ //SM4E V4.4S, V19.4S
WORD 0x8486c0ce \ //SM4E V4.4S, V20.4S WORD $0x8486c0ce \ //SM4E V4.4S, V20.4S
WORD 0xa486c0ce \ //SM4E V4.4S, V21.4S WORD $0xa486c0ce \ //SM4E V4.4S, V21.4S
WORD 0xc486c0ce \ //SM4E V4.4S, V22.4S WORD $0xc486c0ce \ //SM4E V4.4S, V22.4S
WORD 0xe486c0ce \ //SM4E V4.4S, V23.4S WORD $0xe486c0ce \ //SM4E V4.4S, V23.4S
WORD 0x0487c0ce \ //SM4E V4.4S, V24.4S WORD $0x0487c0ce \ //SM4E V4.4S, V24.4S
WORD 0x2487c0ce \ //SM4E V4.4S, V25.4S WORD $0x2487c0ce \ //SM4E V4.4S, V25.4S
WORD 0x4487c0ce \ //SM4E V4.4S, V26.4S WORD $0x4487c0ce \ //SM4E V4.4S, V26.4S
WORD 0x6586c0ce \ //SM4E V5.4S, V19.4S WORD $0x6586c0ce \ //SM4E V5.4S, V19.4S
WORD 0x8586c0ce \ //SM4E V5.4S, V20.4S WORD $0x8586c0ce \ //SM4E V5.4S, V20.4S
WORD 0xa586c0ce \ //SM4E V5.4S, V21.4S WORD $0xa586c0ce \ //SM4E V5.4S, V21.4S
WORD 0xc586c0ce \ //SM4E V5.4S, V22.4S WORD $0xc586c0ce \ //SM4E V5.4S, V22.4S
WORD 0xe586c0ce \ //SM4E V5.4S, V23.4S WORD $0xe586c0ce \ //SM4E V5.4S, V23.4S
WORD 0x0587c0ce \ //SM4E V5.4S, V24.4S WORD $0x0587c0ce \ //SM4E V5.4S, V24.4S
WORD 0x2587c0ce \ //SM4E V5.4S, V25.4S WORD $0x2587c0ce \ //SM4E V5.4S, V25.4S
WORD 0x4587c0ce \ //SM4E V5.4S, V26.4S WORD $0x4587c0ce \ //SM4E V5.4S, V26.4S
WORD 0x6686c0ce \ //SM4E V6.4S, V19.4S WORD $0x6686c0ce \ //SM4E V6.4S, V19.4S
WORD 0x8686c0ce \ //SM4E V6.4S, V20.4S WORD $0x8686c0ce \ //SM4E V6.4S, V20.4S
WORD 0xa686c0ce \ //SM4E V6.4S, V21.4S WORD $0xa686c0ce \ //SM4E V6.4S, V21.4S
WORD 0xc686c0ce \ //SM4E V6.4S, V22.4S WORD $0xc686c0ce \ //SM4E V6.4S, V22.4S
WORD 0xe686c0ce \ //SM4E V6.4S, V23.4S WORD $0xe686c0ce \ //SM4E V6.4S, V23.4S
WORD 0x0687c0ce \ //SM4E V6.4S, V24.4S WORD $0x0687c0ce \ //SM4E V6.4S, V24.4S
WORD 0x2687c0ce \ //SM4E V6.4S, V25.4S WORD $0x2687c0ce \ //SM4E V6.4S, V25.4S
WORD 0x4687c0ce \ //SM4E V6.4S, V26.4S WORD $0x4687c0ce \ //SM4E V6.4S, V26.4S
WORD 0x6786c0ce \ //SM4E V7.4S, V19.4S WORD $0x6786c0ce \ //SM4E V7.4S, V19.4S
WORD 0x8786c0ce \ //SM4E V7.4S, V20.4S WORD $0x8786c0ce \ //SM4E V7.4S, V20.4S
WORD 0xa786c0ce \ //SM4E V7.4S, V21.4S WORD $0xa786c0ce \ //SM4E V7.4S, V21.4S
WORD 0xc786c0ce \ //SM4E V7.4S, V22.4S WORD $0xc786c0ce \ //SM4E V7.4S, V22.4S
WORD 0xe786c0ce \ //SM4E V7.4S, V23.4S WORD $0xe786c0ce \ //SM4E V7.4S, V23.4S
WORD 0x0787c0ce \ //SM4E V7.4S, V24.4S WORD $0x0787c0ce \ //SM4E V7.4S, V24.4S
WORD 0x2787c0ce \ //SM4E V7.4S, V25.4S WORD $0x2787c0ce \ //SM4E V7.4S, V25.4S
WORD 0x4787c0ce //SM4E V7.4S, V26.4S WORD $0x4787c0ce //SM4E V7.4S, V26.4S
// func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32) // func gcmSm4niEnc(productTable *[256]byte, dst, src []byte, ctr, T *[16]byte, rk []uint32)
TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0 TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
@ -142,6 +142,7 @@ TEXT ·gcmSm4niEnc(SB),NOSPLIT,$0
#define H0 R9 #define H0 R9
#define H1 R10 #define H1 R10
#define pTblSave R11 #define pTblSave R11
#define rkSave R12
MOVD productTable+0(FP), pTbl MOVD productTable+0(FP), pTbl
MOVD dst+8(FP), dstPtr MOVD dst+8(FP), dstPtr
MOVD src_base+32(FP), srcPtr MOVD src_base+32(FP), srcPtr

View File

@ -27,11 +27,11 @@ func sm4ekey(Vd, Vn, Vm byte) uint32 {
} }
func sm4ekeyRound(buf *bytes.Buffer, d, n, m byte) { func sm4ekeyRound(buf *bytes.Buffer, d, n, m byte) {
fmt.Fprintf(buf, "\tWORD 0x%08x //SM4EKEY V%d.4S, V%d.4S, V%d.4S\n", sm4ekey(d, n, m), d, n, m) fmt.Fprintf(buf, "\tWORD $0x%08x //SM4EKEY V%d.4S, V%d.4S, V%d.4S\n", sm4ekey(d, n, m), d, n, m)
} }
func sm4eRound(buf *bytes.Buffer, d, n byte) { func sm4eRound(buf *bytes.Buffer, d, n byte) {
fmt.Fprintf(buf, "\tWORD 0x%08x //SM4E V%d.4S, V%d.4S\n", sm4e(d, n), d, n) fmt.Fprintf(buf, "\tWORD $0x%08x //SM4E V%d.4S, V%d.4S\n", sm4e(d, n), d, n)
} }
func main() { func main() {
@ -48,27 +48,27 @@ TEXT ·expandKeySM4E(SB),NOSPLIT,$0
MOVD ck+16(FP), R10 MOVD ck+16(FP), R10
MOVD enc+24(FP), R11 MOVD enc+24(FP), R11
VLD1 (R8), [V8.B16] VLD1 (R8), [V9.B16]
VREV32 V8.B16, V8.B16 VREV32 V9.B16, V9.B16
VLD1 (R9), [V9.S4] VLD1 (R9), [V8.S4]
VEOR V9, V8, V8 VEOR V9, V8, V9
VLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4] VLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4]
`[1:]) `[1:])
sm4ekeyRound(buf, 9, 8, 0) sm4ekeyRound(buf, 8, 9, 0)
sm4ekeyRound(buf, 8, 9, 1) sm4ekeyRound(buf, 9, 8, 1)
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n") fmt.Fprintf(buf, "\tVST1.P [V8.S4, V9.S4], 32(R11)\n")
sm4ekeyRound(buf, 9, 8, 2) sm4ekeyRound(buf, 8, 9, 2)
sm4ekeyRound(buf, 8, 9, 3) sm4ekeyRound(buf, 9, 8, 3)
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n") fmt.Fprintf(buf, "\tVST1.P [V8.S4, V9.S4], 32(R11)\n")
fmt.Fprintf(buf, "\tVLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4]\n") fmt.Fprintf(buf, "\tVLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4]\n")
sm4ekeyRound(buf, 9, 8, 0) sm4ekeyRound(buf, 8, 9, 0)
sm4ekeyRound(buf, 8, 9, 1) sm4ekeyRound(buf, 9, 8, 1)
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n") fmt.Fprintf(buf, "\tVST1.P [V8.S4, V9.S4], 32(R11)\n")
sm4ekeyRound(buf, 9, 8, 2) sm4ekeyRound(buf, 8, 9, 2)
sm4ekeyRound(buf, 8, 9, 3) sm4ekeyRound(buf, 9, 8, 3)
fmt.Fprintf(buf, ` fmt.Fprintf(buf, `
VST1.P [V9.S4, V8.S4], 32(R11) VST1.P [V8.S4, V9.S4], 32(R11)
RET RET
`[1:]) `[1:])
fmt.Fprint(buf, ` fmt.Fprint(buf, `