gmsm/sm3/sm3blockni_arm64.s

419 lines
18 KiB
ArmAsm

// Generated by gen_sm3block_ni.go. DO NOT EDIT.
//go:build arm64 && !purego
// +build arm64,!purego
#include "textflag.h"
// func blockSM3NI(h []uint32, p []byte, t []uint32)
TEXT ·blockSM3NI(SB), 0, $0
MOVD h_base+0(FP), R0 // Hash value first address
MOVD p_base+24(FP), R1 // message first address
MOVD p_len+32(FP), R3 // message length
MOVD t_base+48(FP), R2 // t constants first address
VLD1 (R0), [V8.S4, V9.S4] // load h(a,b,c,d,e,f,g,h)
LDPW (0*8)(R2), (R5, R6) // load t constants
blockloop:
VLD1.P 64(R1), [V0.B16, V1.B16, V2.B16, V3.B16] // load 64bytes message
VMOV V8.B16, V15.B16 // backup: V8 h(dcba)
VMOV V9.B16, V16.B16 // backup: V9 h(hgfe)
VREV32 V0.B16, V0.B16 // prepare for using message in Byte format
VREV32 V1.B16, V1.B16
VREV32 V2.B16, V2.B16
VREV32 V3.B16, V3.B16
// first 16 rounds
VMOV R5, V11.S[3]
// Extension
VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b840ce //SM3TT2A V9d.4S, V5.4S, V0.S, 3
// Extension
VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b841ce //SM3TT2A V9d.4S, V5.4S, V1.S, 3
// Extension
VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b842ce //SM3TT2A V9d.4S, V5.4S, V2.S, 3
// Extension
VEXT $3, V0.B16, V4.B16, V2.B16
VEXT $3, V4.B16, V3.B16, V6.B16
VEXT $2, V1.B16, V0.B16, V7.B16
WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4.B16, V3.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8804ace //SM3TT1A V8d.4S, V5.4S, V10.S, 0
WORD $0xa98843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8904ace //SM3TT1A V8d.4S, V5.4S, V10.S, 1
WORD $0xa99843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 2
WORD $0xa9a843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b04ace //SM3TT1A V8d.4S, V5.4S, V10.S, 3
WORD $0xa9b843ce //SM3TT2A V9d.4S, V5.4S, V3.S, 3
// second 48 rounds
VMOV R6, V11.S[3]
// Extension
VEXT $3, V1.B16, V0.B16, V3.B16
VEXT $3, V0.B16, V4.B16, V6.B16
VEXT $2, V2.B16, V1.B16, V7.B16
WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0.B16, V4.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension
VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension
VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension
VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3
// Extension
VEXT $3, V0.B16, V4.B16, V2.B16
VEXT $3, V4.B16, V3.B16, V6.B16
VEXT $2, V1.B16, V0.B16, V7.B16
WORD $0x62c061ce //SM3PARTW1 V2.4S, V3.4S, V1.4S
WORD $0xe2c466ce //SM3PARTW2 V2.4S, V7.4S, V6.4S
VEOR V4.B16, V3.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3
// Extension
VEXT $3, V1.B16, V0.B16, V3.B16
VEXT $3, V0.B16, V4.B16, V6.B16
VEXT $2, V2.B16, V1.B16, V7.B16
WORD $0x83c062ce //SM3PARTW1 V3.4S, V4.4S, V2.4S
WORD $0xe3c466ce //SM3PARTW2 V3.4S, V7.4S, V6.4S
VEOR V0.B16, V4.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
// Extension
VEXT $3, V2.B16, V1.B16, V4.B16
VEXT $3, V1.B16, V0.B16, V6.B16
VEXT $2, V3.B16, V2.B16, V7.B16
WORD $0x04c063ce //SM3PARTW1 V4.4S, V0.4S, V3.4S
WORD $0xe4c466ce //SM3PARTW2 V4.4S, V7.4S, V6.4S
VEOR V1.B16, V0.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
// Extension
VEXT $3, V3.B16, V2.B16, V0.B16
VEXT $3, V2.B16, V1.B16, V6.B16
VEXT $2, V4.B16, V3.B16, V7.B16
WORD $0x20c064ce //SM3PARTW1 V0.4S, V1.4S, V4.4S
WORD $0xe0c466ce //SM3PARTW2 V0.4S, V7.4S, V6.4S
VEOR V2.B16, V1.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc41ce //SM3TT2B V9d.4S, V5.4S, V1.S, 3
// Extension
VEXT $3, V4.B16, V3.B16, V1.B16
VEXT $3, V3.B16, V2.B16, V6.B16
VEXT $2, V0.B16, V4.B16, V7.B16
WORD $0x41c060ce //SM3PARTW1 V1.4S, V2.4S, V0.4S
WORD $0xe1c466ce //SM3PARTW2 V1.4S, V7.4S, V6.4S
VEOR V3.B16, V2.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc42ce //SM3TT2B V9d.4S, V5.4S, V2.S, 3
VEOR V4.B16, V3.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc43ce //SM3TT2B V9d.4S, V5.4S, V3.S, 3
VEOR V0.B16, V4.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc44ce //SM3TT2B V9d.4S, V5.4S, V4.S, 3
VEOR V1.B16, V0.B16, V10.B16
// Compression
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8844ace //SM3TT1B V8d.4S, V5.4S, V10.S, 0
WORD $0xa98c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 0
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8944ace //SM3TT1B V8d.4S, V5.4S, V10.S, 1
WORD $0xa99c40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 1
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8a44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 2
WORD $0xa9ac40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 2
WORD $0x05254bce //SM3SS1 V5.4S, V8.4S, V11.4S, V9.4S
VSHL $1, V11.S4, V11.S4
WORD $0xa8b44ace //SM3TT1B V8d.4S, V5.4S, V10.S, 3
WORD $0xa9bc40ce //SM3TT2B V9d.4S, V5.4S, V0.S, 3
SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
VEOR V8.B16, V15.B16, V8.B16
VEOR V9.B16, V16.B16, V9.B16
CBNZ R3, blockloop
sm3ret:
VST1 [V8.S4, V9.S4], (R0) // store hash value H
RET