From bb3529894c8884a4e26ffec14355d76525edec29 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Thu, 30 Mar 2023 13:03:31 +0800 Subject: [PATCH] sm3: fix arm sm3 ni issue --- .github/workflows/test_sm_ni.yml | 2 +- sm3/gen_sm3block_ni.go | 27 +- sm3/sm3blockni_arm64.s | 436 +++++++++++++++---------------- 3 files changed, 237 insertions(+), 228 deletions(-) diff --git a/.github/workflows/test_sm_ni.yml b/.github/workflows/test_sm_ni.yml index 49cdf2a..d0583ec 100644 --- a/.github/workflows/test_sm_ni.yml +++ b/.github/workflows/test_sm_ni.yml @@ -43,7 +43,7 @@ jobs: key: ${{ runner.os }}-${{ env.cache-name }}-${{ hashFiles('**/go.sum') }} - name: Test - run: go test -v -short ./sm4/... + run: go test -v -short ./sm3/... env: GODEBUG: x509sha1=1 GOARCH: ${{ matrix.arch }} diff --git a/sm3/gen_sm3block_ni.go b/sm3/gen_sm3block_ni.go index 64d2e37..e090105 100644 --- a/sm3/gen_sm3block_ni.go +++ b/sm3/gen_sm3block_ni.go @@ -10,50 +10,56 @@ import ( "bytes" "fmt" "log" - "math/bits" "os" ) //SM3PARTW1 .4S, .4S, .4S func sm3partw1(Vd, Vn, Vm byte) uint32 { inst := uint32(0xce60c000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16) - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3PARTW2 .4S, .4S, .4S func sm3partw2(Vd, Vn, Vm byte) uint32 { inst := uint32(0xce60c400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16) - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3SS1 .4S, .4S, .4S, .4S func sm3ss1(Vd, Vn, Vm, Va byte) uint32 { inst := uint32(0xce400000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(Va&0x1f)<<10 | uint32(Vm&0x1f)<<16 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3TT1A .4S, .4S, .S[] func sm3tt1a(Vd, Vn, Vm, imm2 byte) uint32 { inst := uint32(0xce408000) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3TT1B .4S, .4S, .S[] func sm3tt1b(Vd, Vn, Vm, imm2 byte) uint32 { inst := uint32(0xce408400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3TT2A .4S, .4S, .S[] func sm3tt2a(Vd, Vn, Vm, imm2 byte) uint32 { inst := uint32(0xce408800) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } //SM3TT2B .4S, .4S, .S[] func sm3tt2b(Vd, Vn, Vm, imm2 byte) uint32 { inst := uint32(0xce408c00) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | uint32(imm2&0x3)<<12 | uint32(Vm&0x1f)<<16 - return bits.ReverseBytes32(inst) + // return bits.ReverseBytes32(inst) + return inst } // Used v5 as temp register @@ -119,6 +125,8 @@ func main() { buf := new(bytes.Buffer) fmt.Fprint(buf, ` // Generated by gen_sm3block_ni.go. DO NOT EDIT. +//go:build arm64 && !purego +// +build arm64,!purego #include "textflag.h" @@ -172,9 +180,10 @@ blockloop: sm3ret: VST1 [V8.S4, V9.S4], (R0) // store hash value H RET + `[1:]) src := buf.Bytes() - // fmt.Println(string(src)) + fmt.Println(string(src)) err := os.WriteFile("sm3blockni_arm64.s", src, 0644) if err != nil { diff --git a/sm3/sm3blockni_arm64.s b/sm3/sm3blockni_arm64.s index 49b9f72..d59ca39 100644 --- a/sm3/sm3blockni_arm64.s +++ b/sm3/sm3blockni_arm64.s @@ -28,101 +28,101 @@ blockloop: VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $2, V3.B16, V2.B16, V7.B16 - WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S - WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S + WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S + WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S VEOR V1.B16, V0.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 + WORD $0xce4088a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 + WORD $0xce4098a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 + WORD $0xce40a8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3 + WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 + WORD $0xce40b8a9 //SM3TT2A V9d.4S, V5.4S, V0.S, 3 // Extension VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $2, V4.B16, V3.B16, V7.B16 - WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S - WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S + WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S + WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S VEOR V2.B16, V1.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 + WORD $0xce4188a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 + WORD $0xce4198a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 + WORD $0xce41a8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3 + WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 + WORD $0xce41b8a9 //SM3TT2A V9d.4S, V5.4S, V1.S, 3 // Extension VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $2, V0.B16, V4.B16, V7.B16 - WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S - WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S + WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S + WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S VEOR V3.B16, V2.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 + WORD $0xce4288a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 + WORD $0xce4298a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 + WORD $0xce42a8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3 + WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 + WORD $0xce42b8a9 //SM3TT2A V9d.4S, V5.4S, V2.S, 3 // Extension VEXT $3, V0.B16, V4.B16, V2.B16 VEXT $3, V4.B16, V3.B16, V6.B16 VEXT $2, V1.B16, V0.B16, V7.B16 - WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S - WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S + WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S + WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S VEOR V4.B16, V3.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a80a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 0 + WORD $0xce4388a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a90a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 1 + WORD $0xce4398a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 2 + WORD $0xce43a8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3 + WORD $0xce4ab0a8 //SM3TT1A V8d.4S, V5.4S, V10.S, 3 + WORD $0xce43b8a9 //SM3TT2A V9d.4S, V5.4S, V3.S, 3 // second 48 rounds VMOV R6, V11.S[3] @@ -130,283 +130,283 @@ blockloop: VEXT $3, V1.B16, V0.B16, V3.B16 VEXT $3, V0.B16, V4.B16, V6.B16 VEXT $2, V2.B16, V1.B16, V7.B16 - WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S - WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S + WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S + WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S VEOR V0.B16, V4.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 // Extension VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $2, V3.B16, V2.B16, V7.B16 - WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S - WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S + WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S + WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S VEOR V1.B16, V0.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 // Extension VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $2, V4.B16, V3.B16, V7.B16 - WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S - WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S + WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S + WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S VEOR V2.B16, V1.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3 // Extension VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $2, V0.B16, V4.B16, V7.B16 - WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S - WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S + WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S + WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S VEOR V3.B16, V2.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3 // Extension VEXT $3, V0.B16, V4.B16, V2.B16 VEXT $3, V4.B16, V3.B16, V6.B16 VEXT $2, V1.B16, V0.B16, V7.B16 - WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S - WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S + WORD $0xce61c062 //SM3PARTW1 V2.4S, V3.4S, V1.4S + WORD $0xce66c4e2 //SM3PARTW2 V2.4S, V7.4S, V6.4S VEOR V4.B16, V3.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3 // Extension VEXT $3, V1.B16, V0.B16, V3.B16 VEXT $3, V0.B16, V4.B16, V6.B16 VEXT $2, V2.B16, V1.B16, V7.B16 - WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S - WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S + WORD $0xce62c083 //SM3PARTW1 V3.4S, V4.4S, V2.4S + WORD $0xce66c4e3 //SM3PARTW2 V3.4S, V7.4S, V6.4S VEOR V0.B16, V4.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 // Extension VEXT $3, V2.B16, V1.B16, V4.B16 VEXT $3, V1.B16, V0.B16, V6.B16 VEXT $2, V3.B16, V2.B16, V7.B16 - WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S - WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S + WORD $0xce63c004 //SM3PARTW1 V4.4S, V0.4S, V3.4S + WORD $0xce66c4e4 //SM3PARTW2 V4.4S, V7.4S, V6.4S VEOR V1.B16, V0.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 // Extension VEXT $3, V3.B16, V2.B16, V0.B16 VEXT $3, V2.B16, V1.B16, V6.B16 VEXT $2, V4.B16, V3.B16, V7.B16 - WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S - WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S + WORD $0xce64c020 //SM3PARTW1 V0.4S, V1.4S, V4.4S + WORD $0xce66c4e0 //SM3PARTW2 V0.4S, V7.4S, V6.4S VEOR V2.B16, V1.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce418ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce419ca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce41aca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce41bca9 //SM3TT2B V9d.4S, V5.4S, V1.S, 3 // Extension VEXT $3, V4.B16, V3.B16, V1.B16 VEXT $3, V3.B16, V2.B16, V6.B16 VEXT $2, V0.B16, V4.B16, V7.B16 - WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S - WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S + WORD $0xce60c041 //SM3PARTW1 V1.4S, V2.4S, V0.4S + WORD $0xce66c4e1 //SM3PARTW2 V1.4S, V7.4S, V6.4S VEOR V3.B16, V2.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce428ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce429ca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce42aca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce42bca9 //SM3TT2B V9d.4S, V5.4S, V2.S, 3 VEOR V4.B16, V3.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce438ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce439ca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce43aca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce43bca9 //SM3TT2B V9d.4S, V5.4S, V3.S, 3 VEOR V0.B16, V4.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce448ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce449ca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce44aca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce44bca9 //SM3TT2B V9d.4S, V5.4S, V4.S, 3 VEOR V1.B16, V0.B16, V10.B16 // Compression - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0 - WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a84a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 0 + WORD $0xce408ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 0 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1 - WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4a94a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 1 + WORD $0xce409ca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 1 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2 - WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2 - WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S + WORD $0xce4aa4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 2 + WORD $0xce40aca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 2 + WORD $0xce4b2505 //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S VSHL $1, V11.S4, V11.S4 - WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3 - WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3 + WORD $0xce4ab4a8 //SM3TT1B V8d.4S, V5.4S, V10.S, 3 + WORD $0xce40bca9 //SM3TT2B V9d.4S, V5.4S, V0.S, 3 SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes VEOR V8.B16, V15.B16, V8.B16