mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
sm3: fix rotateleft32 issue
This commit is contained in:
parent
d38216752d
commit
7047aab300
@ -63,17 +63,19 @@ func sm3tt2b(Vd, Vn, Vm, imm2 byte) uint32 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Used v5 as temp register
|
// Used v5 as temp register
|
||||||
func roundA(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
|
func roundA(buf *bytes.Buffer, i, t0, t1, st1, st2, w, wt byte) {
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t0, st2), 5, st1, t0, st2)
|
||||||
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t)
|
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t0, t1)
|
||||||
|
fmt.Fprintf(buf, "\tVSRI $31, V%d.S4, V%d.S4\n", t0, t1)
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1a(st1, 5, wt, i), st1, 5, wt, i)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1a(st1, 5, wt, i), st1, 5, wt, i)
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2a(st2, 5, w, i), st2, 5, w, i)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2A V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2a(st2, 5, w, i), st2, 5, w, i)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used v5 as temp register
|
// Used v5 as temp register
|
||||||
func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
|
func roundB(buf *bytes.Buffer, i, t0, t1, st1, st2, w, wt byte) {
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t, st2), 5, st1, t, st2)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3SS1 V%d.4S, V%d.4S, V%d.4S, V%d.4S\n", sm3ss1(5, st1, t0, st2), 5, st1, t0, st2)
|
||||||
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t, t)
|
fmt.Fprintf(buf, "\tVSHL $1, V%d.S4, V%d.S4\n", t0, t1)
|
||||||
|
fmt.Fprintf(buf, "\tVSRI $31, V%d.S4, V%d.S4\n", t0, t1)
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1b(st1, 5, wt, i), st1, 5, wt, i)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT1B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt1b(st1, 5, wt, i), st1, 5, wt, i)
|
||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2b(st2, 5, w, i), st2, 5, w, i)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3TT2B V%dd.4S, V%d.4S, V%d.S, %d\n", sm3tt2b(st2, 5, w, i), st2, 5, w, i)
|
||||||
}
|
}
|
||||||
@ -86,7 +88,7 @@ func roundB(buf *bytes.Buffer, i, t, st1, st2, w, wt byte) {
|
|||||||
// s3, W(4i+12) W(4i+13) W(4i+14) W(4i+15)
|
// s3, W(4i+12) W(4i+13) W(4i+14) W(4i+15)
|
||||||
// t, t constant
|
// t, t constant
|
||||||
// st1, st2, sm3 state
|
// st1, st2, sm3 state
|
||||||
func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
func qroundA(buf *bytes.Buffer, t0, t1, st1, st2, s0, s1, s2, s3, s4 byte) {
|
||||||
fmt.Fprintf(buf, "\t// Extension\n")
|
fmt.Fprintf(buf, "\t// Extension\n")
|
||||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s1, s0, 6)
|
||||||
@ -95,15 +97,15 @@ func qroundA(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
|||||||
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
|
fmt.Fprintf(buf, "\tWORD $0x%08x //SM3PARTW2 V%d.4S, V%d.4S, V%d.4S\n", sm3partw2(s4, 7, 6), s4, 7, 6)
|
||||||
fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
|
fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
|
||||||
fmt.Fprintf(buf, "\t// Compression\n")
|
fmt.Fprintf(buf, "\t// Compression\n")
|
||||||
roundA(buf, 0, t, st1, st2, s0, 10)
|
roundA(buf, 0, t0, t1, st1, st2, s0, 10)
|
||||||
roundA(buf, 1, t, st1, st2, s0, 10)
|
roundA(buf, 1, t1, t0, st1, st2, s0, 10)
|
||||||
roundA(buf, 2, t, st1, st2, s0, 10)
|
roundA(buf, 2, t0, t1, st1, st2, s0, 10)
|
||||||
roundA(buf, 3, t, st1, st2, s0, 10)
|
roundA(buf, 3, t1, t0, st1, st2, s0, 10)
|
||||||
fmt.Fprintf(buf, "\n")
|
fmt.Fprintf(buf, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used v6, v7, v10 as temp registers
|
// Used v6, v7, v10 as temp registers
|
||||||
func qroundB(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
func qroundB(buf *bytes.Buffer, t0, t1, st1, st2, s0, s1, s2, s3, s4 byte) {
|
||||||
if s4 != 0xff {
|
if s4 != 0xff {
|
||||||
fmt.Fprintf(buf, "\t// Extension\n")
|
fmt.Fprintf(buf, "\t// Extension\n")
|
||||||
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
fmt.Fprintf(buf, "\tVEXT $12, V%d.B16, V%d.B16, V%d.B16\n", s2, s1, s4)
|
||||||
@ -114,10 +116,10 @@ func qroundB(buf *bytes.Buffer, t, st1, st2, s0, s1, s2, s3, s4 byte) {
|
|||||||
}
|
}
|
||||||
fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
|
fmt.Fprintf(buf, "\tVEOR V%d.B16, V%d.B16, V10.B16\n", s1, s0)
|
||||||
fmt.Fprintf(buf, "\t// Compression\n")
|
fmt.Fprintf(buf, "\t// Compression\n")
|
||||||
roundB(buf, 0, t, st1, st2, s0, 10)
|
roundB(buf, 0, t0, t1, st1, st2, s0, 10)
|
||||||
roundB(buf, 1, t, st1, st2, s0, 10)
|
roundB(buf, 1, t1, t0, st1, st2, s0, 10)
|
||||||
roundB(buf, 2, t, st1, st2, s0, 10)
|
roundB(buf, 2, t0, t1, st1, st2, s0, 10)
|
||||||
roundB(buf, 3, t, st1, st2, s0, 10)
|
roundB(buf, 3, t1, t0, st1, st2, s0, 10)
|
||||||
fmt.Fprintf(buf, "\n")
|
fmt.Fprintf(buf, "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -138,6 +140,10 @@ TEXT ·blockSM3NI(SB), 0, $0
|
|||||||
MOVD t_base+48(FP), R2 // t constants first address
|
MOVD t_base+48(FP), R2 // t constants first address
|
||||||
|
|
||||||
VLD1 (R0), [V8.S4, V9.S4] // load h(a,b,c,d,e,f,g,h)
|
VLD1 (R0), [V8.S4, V9.S4] // load h(a,b,c,d,e,f,g,h)
|
||||||
|
VREV64 V8.S4, V8.S4
|
||||||
|
VEXT $8, V8.B16, V8.B16, V8.B16
|
||||||
|
VREV64 V9.S4, V9.S4
|
||||||
|
VEXT $8, V9.B16, V9.B16, V9.B16
|
||||||
LDPW (0*8)(R2), (R5, R6) // load t constants
|
LDPW (0*8)(R2), (R5, R6) // load t constants
|
||||||
|
|
||||||
blockloop:
|
blockloop:
|
||||||
@ -151,25 +157,25 @@ blockloop:
|
|||||||
// first 16 rounds
|
// first 16 rounds
|
||||||
VMOV R5, V11.S[3]
|
VMOV R5, V11.S[3]
|
||||||
`[1:])
|
`[1:])
|
||||||
qroundA(buf, 11, 8, 9, 0, 1, 2, 3, 4)
|
qroundA(buf, 11, 12, 8, 9, 0, 1, 2, 3, 4)
|
||||||
qroundA(buf, 11, 8, 9, 1, 2, 3, 4, 0)
|
qroundA(buf, 11, 12, 8, 9, 1, 2, 3, 4, 0)
|
||||||
qroundA(buf, 11, 8, 9, 2, 3, 4, 0, 1)
|
qroundA(buf, 11, 12, 8, 9, 2, 3, 4, 0, 1)
|
||||||
qroundA(buf, 11, 8, 9, 3, 4, 0, 1, 2)
|
qroundA(buf, 11, 12, 8, 9, 3, 4, 0, 1, 2)
|
||||||
|
|
||||||
fmt.Fprintf(buf, "\t// second 48 rounds\n")
|
fmt.Fprintf(buf, "\t// second 48 rounds\n")
|
||||||
fmt.Fprintf(buf, "\tVMOV R6, V11.S[3]\n")
|
fmt.Fprintf(buf, "\tVMOV R6, V11.S[3]\n")
|
||||||
qroundB(buf, 11, 8, 9, 4, 0, 1, 2, 3)
|
qroundB(buf, 11, 12, 8, 9, 4, 0, 1, 2, 3)
|
||||||
qroundB(buf, 11, 8, 9, 0, 1, 2, 3, 4)
|
qroundB(buf, 11, 12, 8, 9, 0, 1, 2, 3, 4)
|
||||||
qroundB(buf, 11, 8, 9, 1, 2, 3, 4, 0)
|
qroundB(buf, 11, 12, 8, 9, 1, 2, 3, 4, 0)
|
||||||
qroundB(buf, 11, 8, 9, 2, 3, 4, 0, 1)
|
qroundB(buf, 11, 12, 8, 9, 2, 3, 4, 0, 1)
|
||||||
qroundB(buf, 11, 8, 9, 3, 4, 0, 1, 2)
|
qroundB(buf, 11, 12, 8, 9, 3, 4, 0, 1, 2)
|
||||||
qroundB(buf, 11, 8, 9, 4, 0, 1, 2, 3)
|
qroundB(buf, 11, 12, 8, 9, 4, 0, 1, 2, 3)
|
||||||
qroundB(buf, 11, 8, 9, 0, 1, 2, 3, 4)
|
qroundB(buf, 11, 12, 8, 9, 0, 1, 2, 3, 4)
|
||||||
qroundB(buf, 11, 8, 9, 1, 2, 3, 4, 0)
|
qroundB(buf, 11, 12, 8, 9, 1, 2, 3, 4, 0)
|
||||||
qroundB(buf, 11, 8, 9, 2, 3, 4, 0, 1)
|
qroundB(buf, 11, 12, 8, 9, 2, 3, 4, 0, 1)
|
||||||
qroundB(buf, 11, 8, 9, 3, 4, 0xff, 0xff, 0xff)
|
qroundB(buf, 11, 12, 8, 9, 3, 4, 0xff, 0xff, 0xff)
|
||||||
qroundB(buf, 11, 8, 9, 4, 0, 0xff, 0xff, 0xff)
|
qroundB(buf, 11, 12, 8, 9, 4, 0, 0xff, 0xff, 0xff)
|
||||||
qroundB(buf, 11, 8, 9, 0, 1, 0xff, 0xff, 0xff)
|
qroundB(buf, 11, 12, 8, 9, 0, 1, 0xff, 0xff, 0xff)
|
||||||
|
|
||||||
fmt.Fprint(buf, `
|
fmt.Fprint(buf, `
|
||||||
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
|
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
|
||||||
@ -178,6 +184,10 @@ blockloop:
|
|||||||
CBNZ R3, blockloop
|
CBNZ R3, blockloop
|
||||||
|
|
||||||
sm3ret:
|
sm3ret:
|
||||||
|
VREV64 V8.S4, V8.S4
|
||||||
|
VEXT $8, V8.B16, V8.B16, V8.B16
|
||||||
|
VREV64 V9.S4, V9.S4
|
||||||
|
VEXT $8, V9.B16, V9.B16, V9.B16
|
||||||
VST1 [V8.S4, V9.S4], (R0) // store hash value H
|
VST1 [V8.S4, V9.S4], (R0) // store hash value H
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
@ -37,44 +37,52 @@ blockloop:
|
|||||||
VEOR V1.B16, V0.B16, V10.B16
|
VEOR V1.B16, V0.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce4088a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 0
|
WORD $0xce4088a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce4098a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 1
|
WORD $0xce4098a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce40a8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 2
|
WORD $0xce40a8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce40b8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 3
|
WORD $0xce40b8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 3
|
||||||
|
|
||||||
// Extension
|
// Extension
|
||||||
VEXT $3, V3.B16, V2.B16, V0.B16
|
VEXT $12, V3.B16, V2.B16, V0.B16
|
||||||
VEXT $3, V2.B16, V1.B16, V6.B16
|
VEXT $12, V2.B16, V1.B16, V6.B16
|
||||||
VEXT $2, V4.B16, V3.B16, V7.B16
|
VEXT $8, V4.B16, V3.B16, V7.B16
|
||||||
WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
|
WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
|
||||||
WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
|
WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
|
||||||
VEOR V2.B16, V1.B16, V10.B16
|
VEOR V2.B16, V1.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce4188a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 0
|
WORD $0xce4188a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce4198a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 1
|
WORD $0xce4198a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce41a8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 2
|
WORD $0xce41a8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3
|
WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3
|
||||||
|
|
||||||
@ -87,19 +95,23 @@ blockloop:
|
|||||||
VEOR V3.B16, V2.B16, V10.B16
|
VEOR V3.B16, V2.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce4288a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 0
|
WORD $0xce4288a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce4298a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 1
|
WORD $0xce4298a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce42a8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 2
|
WORD $0xce42a8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3
|
WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3
|
||||||
|
|
||||||
@ -112,19 +124,23 @@ blockloop:
|
|||||||
VEOR V4.B16, V3.B16, V10.B16
|
VEOR V4.B16, V3.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce4388a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 0
|
WORD $0xce4388a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce4398a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 1
|
WORD $0xce4398a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce43a8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 2
|
WORD $0xce43a8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce43b8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 3
|
WORD $0xce43b8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 3
|
||||||
|
|
||||||
@ -139,19 +155,23 @@ blockloop:
|
|||||||
VEOR V0.B16, V4.B16, V10.B16
|
VEOR V0.B16, V4.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
||||||
|
|
||||||
@ -164,19 +184,23 @@ blockloop:
|
|||||||
VEOR V1.B16, V0.B16, V10.B16
|
VEOR V1.B16, V0.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
||||||
|
|
||||||
@ -189,19 +213,23 @@ blockloop:
|
|||||||
VEOR V2.B16, V1.B16, V10.B16
|
VEOR V2.B16, V1.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
|
WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
|
WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
|
WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
||||||
|
|
||||||
@ -214,19 +242,23 @@ blockloop:
|
|||||||
VEOR V3.B16, V2.B16, V10.B16
|
VEOR V3.B16, V2.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
|
WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
|
WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
|
WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
|
WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
|
||||||
|
|
||||||
@ -239,19 +271,23 @@ blockloop:
|
|||||||
VEOR V4.B16, V3.B16, V10.B16
|
VEOR V4.B16, V3.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
|
WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
|
WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
|
WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
|
WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
|
||||||
|
|
||||||
@ -264,19 +300,23 @@ blockloop:
|
|||||||
VEOR V0.B16, V4.B16, V10.B16
|
VEOR V0.B16, V4.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
||||||
|
|
||||||
@ -289,19 +329,23 @@ blockloop:
|
|||||||
VEOR V1.B16, V0.B16, V10.B16
|
VEOR V1.B16, V0.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
||||||
|
|
||||||
@ -314,19 +358,23 @@ blockloop:
|
|||||||
VEOR V2.B16, V1.B16, V10.B16
|
VEOR V2.B16, V1.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
|
WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
|
WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
|
WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
|
||||||
|
|
||||||
@ -339,76 +387,92 @@ blockloop:
|
|||||||
VEOR V3.B16, V2.B16, V10.B16
|
VEOR V3.B16, V2.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
|
WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
|
WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
|
WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
|
WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
|
||||||
|
|
||||||
VEOR V4.B16, V3.B16, V10.B16
|
VEOR V4.B16, V3.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
|
WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
|
WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
|
WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
|
WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
|
||||||
|
|
||||||
VEOR V0.B16, V4.B16, V10.B16
|
VEOR V0.B16, V4.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
|
||||||
|
|
||||||
VEOR V1.B16, V0.B16, V10.B16
|
VEOR V1.B16, V0.B16, V10.B16
|
||||||
// Compression
|
// Compression
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
|
||||||
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
|
||||||
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V11.S4, V12.S4
|
||||||
|
VSRI $31, V11.S4, V12.S4
|
||||||
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
|
||||||
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
|
||||||
WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
|
WORD $0xce4c2505 //SM3SS1 V5.4S, V8.4S, V12.4S, V9.4S
|
||||||
VSHL $1, V11.S4, V11.S4
|
VSHL $1, V12.S4, V11.S4
|
||||||
|
VSRI $31, V12.S4, V11.S4
|
||||||
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
|
||||||
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
|
||||||
|
|
||||||
@ -424,3 +488,4 @@ sm3ret:
|
|||||||
VEXT $8, V9.B16, V9.B16, V9.B16
|
VEXT $8, V9.B16, V9.B16, V9.B16
|
||||||
VST1 [V8.S4, V9.S4], (R0) // store hash value H
|
VST1 [V8.S4, V9.S4], (R0) // store hash value H
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user