sm3: fix arm sm3 ni issue

This commit is contained in:
Sun Yimin 2023-03-30 13:03:31 +08:00 committed by GitHub
parent b2b56fae32
commit bb3529894c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 237 additions and 228 deletions

View File

@ -43,7 +43,7 @@ jobs:
key: ${{ runner.os }}-${{ env.cache-name }}-${{ hashFiles('**/go.sum') }} key: ${{ runner.os }}-${{ env.cache-name }}-${{ hashFiles('**/go.sum') }}
- name: Test - name: Test
run: go test -v -short ./sm4/... run: go test -v -short ./sm3/...
env: env:
GODEBUG: x509sha1=1 GODEBUG: x509sha1=1
GOARCH: ${{ matrix.arch }} GOARCH: ${{ matrix.arch }}

View File

@ -10,50 +10,56 @@ import (
"bytes" "bytes"
"fmt" "fmt"
"log" "log"
"math/bits"
"os" "os"
) )
//SM3PARTW1 <Vd>.4S, <Vn>.4S, <Vm>.4S //SM3PARTW1 <Vd>.4S, <Vn>.4S, <Vm>.4S
func sm3partw1(Vd, Vn, Vm byte) uint32 { func sm3partw1(Vd, Vn, Vm byte) uint32 {
inst := uint32(0xce60c000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16) inst := uint32(0xce60c000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16)
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3PARTW2 <Vd>.4S, <Vn>.4S, <Vm>.4S //SM3PARTW2 <Vd>.4S, <Vn>.4S, <Vm>.4S
func sm3partw2(Vd, Vn, Vm byte) uint32 { func sm3partw2(Vd, Vn, Vm byte) uint32 {
inst := uint32(0xce60c400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16) inst := uint32(0xce60c400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16)
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3SS1 <Vd>.4S, <Vn>.4S, <Vm>.4S, <Va>.4S //SM3SS1 <Vd>.4S, <Vn>.4S, <Vm>.4S, <Va>.4S
func sm3ss1(Vd, Vn, Vm, Va byte) uint32 { func sm3ss1(Vd, Vn, Vm, Va byte) uint32 {
inst := uint32(0xce400000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(Va&0x1f)<<10 | uint32(Vm&0x1f)<<16 inst := uint32(0xce400000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(Va&0x1f)<<10 | uint32(Vm&0x1f)<<16
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3TT1A <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>] //SM3TT1A <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>]
func sm3tt1a(Vd, Vn, Vm, imm2 byte) uint32 { func sm3tt1a(Vd, Vn, Vm, imm2 byte) uint32 {
inst := uint32(0xce408000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 inst := uint32(0xce408000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3TT1B <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>] //SM3TT1B <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>]
func sm3tt1b(Vd, Vn, Vm, imm2 byte) uint32 { func sm3tt1b(Vd, Vn, Vm, imm2 byte) uint32 {
inst := uint32(0xce408400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 inst := uint32(0xce408400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3TT2A <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>] //SM3TT2A <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>]
func sm3tt2a(Vd, Vn, Vm, imm2 byte) uint32 { func sm3tt2a(Vd, Vn, Vm, imm2 byte) uint32 {
inst := uint32(0xce408800) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 inst := uint32(0xce408800) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
//SM3TT2B <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>] //SM3TT2B <Vd>.4S, <Vn>.4S, <Vm>.S[<imm2>]
func sm3tt2b(Vd, Vn, Vm, imm2 byte) uint32 { func sm3tt2b(Vd, Vn, Vm, imm2 byte) uint32 {
inst := uint32(0xce408c00) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 inst := uint32(0xce408c00) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16
return bits.ReverseBytes32(inst) // return bits.ReverseBytes32(inst)
return inst
} }
// Used v5 as temp register // Used v5 as temp register
@ -119,6 +125,8 @@ func main() {
buf := new(bytes.Buffer) buf := new(bytes.Buffer)
fmt.Fprint(buf, ` fmt.Fprint(buf, `
// Generated by gen_sm3block_ni.go. DO NOT EDIT. // Generated by gen_sm3block_ni.go. DO NOT EDIT.
//go:build arm64 && !purego
// +build arm64,!purego
#include "textflag.h" #include "textflag.h"
@ -172,9 +180,10 @@ blockloop:
sm3ret: sm3ret:
VST1 [V8.S4, V9.S4], (R0) // store hash value H VST1 [V8.S4, V9.S4], (R0) // store hash value H
RET RET
`[1:]) `[1:])
src := buf.Bytes() src := buf.Bytes()
// fmt.Println(string(src)) fmt.Println(string(src))
err := os.WriteFile("sm3blockni_arm64.s", src, 0644) err := os.WriteFile("sm3blockni_arm64.s", src, 0644)
if err != nil { if err != nil {

View File

@ -28,101 +28,101 @@ blockloop:
VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16 VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0 WORD $0xce4088a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1 WORD $0xce4098a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2 WORD $0xce40a8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3 WORD $0xce40b8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16 VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0 WORD $0xce4188a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1 WORD $0xce4198a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2 WORD $0xce41a8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3 WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16 VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0 WORD $0xce4288a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1 WORD $0xce4298a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2 WORD $0xce42a8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3 WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3
// Extension // Extension
VEXT $3, V0.B16, V4.B16, V2.B16 VEXT $3, V0.B16, V4.B16, V2.B16
VEXT $3, V4.B16, V3.B16, V6.B16 VEXT $3, V4.B16, V3.B16, V6.B16
VEXT $2, V1.B16, V0.B16, V7.B16 VEXT $2, V1.B16, V0.B16, V7.B16
WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4.B16, V3.B16, V10.B16 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0 WORD $0xce4388a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1 WORD $0xce4398a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2 WORD $0xce43a8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3 WORD $0xce43b8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 3
// second 48 rounds // second 48 rounds
VMOV R6, V11.S[3] VMOV R6, V11.S[3]
@ -130,283 +130,283 @@ blockloop:
VEXT $3, V1.B16, V0.B16, V3.B16 VEXT $3, V1.B16, V0.B16, V3.B16
VEXT $3, V0.B16, V4.B16, V6.B16 VEXT $3, V0.B16, V4.B16, V6.B16
VEXT $2, V2.B16, V1.B16, V7.B16 VEXT $2, V2.B16, V1.B16, V7.B16
WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0.B16, V4.B16, V10.B16 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension // Extension
VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16 VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16 VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16 VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
// Extension // Extension
VEXT $3, V0.B16, V4.B16, V2.B16 VEXT $3, V0.B16, V4.B16, V2.B16
VEXT $3, V4.B16, V3.B16, V6.B16 VEXT $3, V4.B16, V3.B16, V6.B16
VEXT $2, V1.B16, V0.B16, V7.B16 VEXT $2, V1.B16, V0.B16, V7.B16
WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4.B16, V3.B16, V10.B16 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
// Extension // Extension
VEXT $3, V1.B16, V0.B16, V3.B16 VEXT $3, V1.B16, V0.B16, V3.B16
VEXT $3, V0.B16, V4.B16, V6.B16 VEXT $3, V0.B16, V4.B16, V6.B16
VEXT $2, V2.B16, V1.B16, V7.B16 VEXT $2, V2.B16, V1.B16, V7.B16
WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0.B16, V4.B16, V10.B16 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension // Extension
VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16 VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension // Extension
VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16 VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16 VEOR V2.B16, V1.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension // Extension
VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16 VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16 VEOR V3.B16, V2.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3
VEOR V4.B16, V3.B16, V10.B16 VEOR V4.B16, V3.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3
VEOR V0.B16, V4.B16, V10.B16 VEOR V0.B16, V4.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3
VEOR V1.B16, V0.B16, V10.B16 VEOR V1.B16, V0.B16, V10.B16
// Compression // Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4 VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
VEOR V8.B16, V15.B16, V8.B16 VEOR V8.B16, V15.B16, V8.B16