mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
sm4 use sm4e sm4ekey, without gcm integration part
This commit is contained in:
parent
697d1dbd35
commit
0450200249
@ -290,7 +290,7 @@ GLOBL fk_mask<>(SB), RODATA, $16
|
|||||||
AVX_SM4_TAO_L1(x, y); \
|
AVX_SM4_TAO_L1(x, y); \
|
||||||
VPXOR x, t0, t0
|
VPXOR x, t0, t0
|
||||||
|
|
||||||
// func expandKeyAsm(key *byte, ck, enc, dec *uint32)
|
// func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int)
|
||||||
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
||||||
MOVQ key+0(FP), AX
|
MOVQ key+0(FP), AX
|
||||||
MOVQ ck+8(FP), BX
|
MOVQ ck+8(FP), BX
|
||||||
@ -321,7 +321,7 @@ loop:
|
|||||||
expand_end:
|
expand_end:
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func encryptBlocksAsm(xk *uint32, dst, src []byte)
|
// func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int)
|
||||||
TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
||||||
MOVQ xk+0(FP), AX
|
MOVQ xk+0(FP), AX
|
||||||
MOVQ dst+8(FP), BX
|
MOVQ dst+8(FP), BX
|
||||||
@ -497,7 +497,7 @@ avx2_sm4_done:
|
|||||||
VZEROUPPER
|
VZEROUPPER
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func encryptBlockAsm(xk *uint32, dst, src *byte)
|
// func encryptBlockAsm(xk *uint32, dst, src *byte, inst int)
|
||||||
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
||||||
MOVQ xk+0(FP), AX
|
MOVQ xk+0(FP), AX
|
||||||
MOVQ dst+8(FP), BX
|
MOVQ dst+8(FP), BX
|
||||||
|
100
sm4/asm_arm64.s
100
sm4/asm_arm64.s
@ -164,13 +164,43 @@ GLOBL fk_mask<>(SB), (NOPTR+RODATA), $16
|
|||||||
VMOV R0, R24_MASK.D[0] \
|
VMOV R0, R24_MASK.D[0] \
|
||||||
VMOV R1, R24_MASK.D[1]
|
VMOV R1, R24_MASK.D[1]
|
||||||
|
|
||||||
// func expandKeyAsm(key *byte, ck, enc, dec *uint32)
|
#define SM4EKEY_EXPORT_KEYS() \
|
||||||
|
VMOV V8.S[3], V10.S[0] \
|
||||||
|
VMOV V8.S[2], V10.S[1] \
|
||||||
|
VMOV V8.S[1], V10.S[2] \
|
||||||
|
VMOV V8.S[0], V10.S[3] \
|
||||||
|
VMOV V9.S[3], V11.S[0] \
|
||||||
|
VMOV V9.S[2], V11.S[1] \
|
||||||
|
VMOV V9.S[1], V11.S[2] \
|
||||||
|
VMOV V9.S[0], V11.S[3] \
|
||||||
|
VST1.P [V9.S4, V8.S4], 32(R10) \
|
||||||
|
VST1.P [V10.S4, V11.S4], -32(R11)
|
||||||
|
|
||||||
|
#define SM4E_ROUND() \
|
||||||
|
VLD1.P 16(R10), [V8.B16] \
|
||||||
|
VREV32 V8.B16, V8.B16 \
|
||||||
|
WORD 0x0884c0ce \
|
||||||
|
WORD 0x2884c0ce \
|
||||||
|
WORD 0x4884c0ce \
|
||||||
|
WORD 0x6884c0ce \
|
||||||
|
WORD 0x8884c0ce \
|
||||||
|
WORD 0xa884c0ce \
|
||||||
|
WORD 0xc884c0ce \
|
||||||
|
WORD 0xe884c0ce \
|
||||||
|
VREV32 V8.B16, V8.B16 \
|
||||||
|
VST1.P [V8.B16], 16(R9)
|
||||||
|
|
||||||
|
// func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int)
|
||||||
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
TEXT ·expandKeyAsm(SB),NOSPLIT,$0
|
||||||
MOVD key+0(FP), R8
|
MOVD key+0(FP), R8
|
||||||
MOVD ck+8(FP), R9
|
MOVD ck+8(FP), R9
|
||||||
MOVD enc+16(FP), R10
|
MOVD enc+16(FP), R10
|
||||||
MOVD dec+24(FP), R11
|
MOVD dec+24(FP), R11
|
||||||
|
MOVD inst+32(FP), R12
|
||||||
|
|
||||||
|
CMP $1, R12
|
||||||
|
BEQ sm4ekey
|
||||||
|
|
||||||
load_global_data_1()
|
load_global_data_1()
|
||||||
|
|
||||||
VLD1 (R8), [t0.B16]
|
VLD1 (R8), [t0.B16]
|
||||||
@ -193,14 +223,45 @@ ksLoop:
|
|||||||
ADD $16, R0
|
ADD $16, R0
|
||||||
CMP $128, R0
|
CMP $128, R0
|
||||||
BNE ksLoop
|
BNE ksLoop
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
// func encryptBlocksAsm(xk *uint32, dst, src []byte)
|
sm4ekey:
|
||||||
|
LDP fk_mask<>(SB), (R0, R1)
|
||||||
|
VMOV R0, FK_MASK.D[0]
|
||||||
|
VMOV R1, FK_MASK.D[1]
|
||||||
|
VLD1 (R8), [V8.B16]
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VEOR FK_MASK, V8, V8
|
||||||
|
ADD $96, R11
|
||||||
|
|
||||||
|
VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
WORD 0x09c960ce //SM4EKEY V9.4S, V8.4S, V0.4S
|
||||||
|
WORD 0x28c961ce //SM4EKEY V8.4S, V9.4S, V1.4S
|
||||||
|
SM4EKEY_EXPORT_KEYS()
|
||||||
|
|
||||||
|
WORD 0x09c962ce //SM4EKEY V9.4S, V8.4S, V2.4S
|
||||||
|
WORD 0x28c963ce //SM4EKEY V8.4S, V9.4S, V3.4S
|
||||||
|
SM4EKEY_EXPORT_KEYS()
|
||||||
|
|
||||||
|
VLD1.P 64(R9), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
WORD 0x09c960ce //SM4EKEY V9.4S, V8.4S, V0.4S
|
||||||
|
WORD 0x28c961ce //SM4EKEY V8.4S, V9.4S, V1.4S
|
||||||
|
SM4EKEY_EXPORT_KEYS()
|
||||||
|
|
||||||
|
WORD 0x09c962ce //SM4EKEY V9.4S, V8.4S, V2.4S
|
||||||
|
WORD 0x28c963ce //SM4EKEY V8.4S, V9.4S, V3.4S
|
||||||
|
SM4EKEY_EXPORT_KEYS()
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int)
|
||||||
TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
TEXT ·encryptBlocksAsm(SB),NOSPLIT,$0
|
||||||
MOVD xk+0(FP), R8
|
MOVD xk+0(FP), R8
|
||||||
MOVD dst+8(FP), R9
|
MOVD dst+8(FP), R9
|
||||||
MOVD src+32(FP), R10
|
MOVD src+32(FP), R10
|
||||||
|
MOVD inst+24(FP), R11
|
||||||
|
|
||||||
|
CMP $1, R11
|
||||||
|
BEQ sm4niblocks
|
||||||
|
|
||||||
VLD1 (R10), [V5.S4, V6.S4, V7.S4, V8.S4]
|
VLD1 (R10), [V5.S4, V6.S4, V7.S4, V8.S4]
|
||||||
VMOV V5.S[0], t0.S[0]
|
VMOV V5.S[0], t0.S[0]
|
||||||
@ -271,15 +332,26 @@ encryptBlocksLoop:
|
|||||||
VMOV t1.S[3], V8.S[2]
|
VMOV t1.S[3], V8.S[2]
|
||||||
VMOV t0.S[3], V8.S[3]
|
VMOV t0.S[3], V8.S[3]
|
||||||
VST1 [V8.B16], (R9)
|
VST1 [V8.B16], (R9)
|
||||||
|
|
||||||
RET
|
RET
|
||||||
|
|
||||||
|
sm4niblocks:
|
||||||
|
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
VLD1.P 64(R8), [V4.S4, V5.S4, V6.S4, V7.S4]
|
||||||
|
SM4E_ROUND()
|
||||||
|
SM4E_ROUND()
|
||||||
|
SM4E_ROUND()
|
||||||
|
SM4E_ROUND()
|
||||||
|
RET
|
||||||
|
|
||||||
// func encryptBlockAsm(xk *uint32, dst, src *byte)
|
// func encryptBlockAsm(xk *uint32, dst, src *byte, inst int)
|
||||||
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
|
||||||
MOVD xk+0(FP), R8
|
MOVD xk+0(FP), R8
|
||||||
MOVD dst+8(FP), R9
|
MOVD dst+8(FP), R9
|
||||||
MOVD src+16(FP), R10
|
MOVD src+16(FP), R10
|
||||||
|
MOVD inst+24(FP), R11
|
||||||
|
|
||||||
|
CMP $1, R11
|
||||||
|
BEQ sm4niblock
|
||||||
|
|
||||||
VLD1 (R10), [t0.S4]
|
VLD1 (R10), [t0.S4]
|
||||||
VREV32 t0.B16, t0.B16
|
VREV32 t0.B16, t0.B16
|
||||||
@ -312,5 +384,21 @@ encryptBlockLoop:
|
|||||||
VMOV t1.S[0], V8.S[2]
|
VMOV t1.S[0], V8.S[2]
|
||||||
VMOV t0.S[0], V8.S[3]
|
VMOV t0.S[0], V8.S[3]
|
||||||
VST1 [V8.B16], (R9)
|
VST1 [V8.B16], (R9)
|
||||||
|
RET
|
||||||
|
|
||||||
|
sm4niblock:
|
||||||
|
VLD1 (R10), [V8.B16]
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
WORD 0x0884c0ce //SM4E V8.4S, V0.4S
|
||||||
|
WORD 0x2884c0ce //SM4E V8.4S, V1.4S
|
||||||
|
WORD 0x4884c0ce //SM4E V8.4S, V2.4S
|
||||||
|
WORD 0x6884c0ce //SM4E V8.4S, V3.4S
|
||||||
|
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
WORD 0x0884c0ce //SM4E V8.4S, V0.4S
|
||||||
|
WORD 0x2884c0ce //SM4E V8.4S, V1.4S
|
||||||
|
WORD 0x4884c0ce //SM4E V8.4S, V2.4S
|
||||||
|
WORD 0x6884c0ce //SM4E V8.4S, V3.4S
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VST1 [V8.B16], (R9)
|
||||||
RET
|
RET
|
||||||
|
@ -16,13 +16,13 @@ var supportsGFMUL = cpu.X86.HasPCLMULQDQ || cpu.ARM64.HasPMULL
|
|||||||
var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
|
var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func encryptBlocksAsm(xk *uint32, dst, src []byte)
|
func encryptBlocksAsm(xk *uint32, dst, src []byte, inst int)
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func encryptBlockAsm(xk *uint32, dst, src *byte)
|
func encryptBlockAsm(xk *uint32, dst, src *byte, inst int)
|
||||||
|
|
||||||
//go:noescape
|
//go:noescape
|
||||||
func expandKeyAsm(key *byte, ck, enc, dec *uint32)
|
func expandKeyAsm(key *byte, ck, enc, dec *uint32, inst int)
|
||||||
|
|
||||||
type sm4CipherAsm struct {
|
type sm4CipherAsm struct {
|
||||||
sm4Cipher
|
sm4Cipher
|
||||||
@ -31,7 +31,7 @@ type sm4CipherAsm struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func newCipher(key []byte) (cipher.Block, error) {
|
func newCipher(key []byte) (cipher.Block, error) {
|
||||||
if !supportsAES {
|
if !(supportsAES || supportSM4) {
|
||||||
return newCipherGeneric(key)
|
return newCipherGeneric(key)
|
||||||
}
|
}
|
||||||
blocks := 4
|
blocks := 4
|
||||||
@ -39,7 +39,11 @@ func newCipher(key []byte) (cipher.Block, error) {
|
|||||||
blocks = 8
|
blocks = 8
|
||||||
}
|
}
|
||||||
c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}, blocks, blocks * BlockSize}
|
c := sm4CipherAsm{sm4Cipher{make([]uint32, rounds), make([]uint32, rounds)}, blocks, blocks * BlockSize}
|
||||||
expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0])
|
if supportSM4 {
|
||||||
|
expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0], 1)
|
||||||
|
} else {
|
||||||
|
expandKeyAsm(&key[0], &ck[0], &c.enc[0], &c.dec[0], 0)
|
||||||
|
}
|
||||||
if supportsAES && supportsGFMUL {
|
if supportsAES && supportsGFMUL {
|
||||||
return &sm4CipherGCM{c}, nil
|
return &sm4CipherGCM{c}, nil
|
||||||
}
|
}
|
||||||
@ -50,6 +54,22 @@ func (c *sm4CipherAsm) BlockSize() int { return BlockSize }
|
|||||||
|
|
||||||
func (c *sm4CipherAsm) Concurrency() int { return c.batchBlocks }
|
func (c *sm4CipherAsm) Concurrency() int { return c.batchBlocks }
|
||||||
|
|
||||||
|
func encryptBlockAsmInst(xk *uint32, dst, src *byte) {
|
||||||
|
if supportSM4 {
|
||||||
|
encryptBlockAsm(xk, dst, src, 1)
|
||||||
|
} else {
|
||||||
|
encryptBlockAsm(xk, dst, src, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func encryptBlocksAsmInst(xk *uint32, dst, src []byte) {
|
||||||
|
if supportSM4 {
|
||||||
|
encryptBlocksAsm(xk, dst, src, 1)
|
||||||
|
} else {
|
||||||
|
encryptBlocksAsm(xk, dst, src, 0)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func (c *sm4CipherAsm) Encrypt(dst, src []byte) {
|
func (c *sm4CipherAsm) Encrypt(dst, src []byte) {
|
||||||
if len(src) < BlockSize {
|
if len(src) < BlockSize {
|
||||||
panic("sm4: input not full block")
|
panic("sm4: input not full block")
|
||||||
@ -60,7 +80,7 @@ func (c *sm4CipherAsm) Encrypt(dst, src []byte) {
|
|||||||
if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
|
if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
|
||||||
panic("sm4: invalid buffer overlap")
|
panic("sm4: invalid buffer overlap")
|
||||||
}
|
}
|
||||||
encryptBlockAsm(&c.enc[0], &dst[0], &src[0])
|
encryptBlockAsmInst(&c.enc[0], &dst[0], &src[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *sm4CipherAsm) EncryptBlocks(dst, src []byte) {
|
func (c *sm4CipherAsm) EncryptBlocks(dst, src []byte) {
|
||||||
@ -73,7 +93,7 @@ func (c *sm4CipherAsm) EncryptBlocks(dst, src []byte) {
|
|||||||
if subtle.InexactOverlap(dst[:c.blocksSize], src[:c.blocksSize]) {
|
if subtle.InexactOverlap(dst[:c.blocksSize], src[:c.blocksSize]) {
|
||||||
panic("sm4: invalid buffer overlap")
|
panic("sm4: invalid buffer overlap")
|
||||||
}
|
}
|
||||||
encryptBlocksAsm(&c.enc[0], dst, src)
|
encryptBlocksAsmInst(&c.enc[0], dst, src)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *sm4CipherAsm) Decrypt(dst, src []byte) {
|
func (c *sm4CipherAsm) Decrypt(dst, src []byte) {
|
||||||
@ -86,7 +106,7 @@ func (c *sm4CipherAsm) Decrypt(dst, src []byte) {
|
|||||||
if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
|
if subtle.InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
|
||||||
panic("sm4: invalid buffer overlap")
|
panic("sm4: invalid buffer overlap")
|
||||||
}
|
}
|
||||||
encryptBlockAsm(&c.dec[0], &dst[0], &src[0])
|
encryptBlockAsmInst(&c.dec[0], &dst[0], &src[0])
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *sm4CipherAsm) DecryptBlocks(dst, src []byte) {
|
func (c *sm4CipherAsm) DecryptBlocks(dst, src []byte) {
|
||||||
@ -99,14 +119,16 @@ func (c *sm4CipherAsm) DecryptBlocks(dst, src []byte) {
|
|||||||
if subtle.InexactOverlap(dst[:c.blocksSize], src[:c.blocksSize]) {
|
if subtle.InexactOverlap(dst[:c.blocksSize], src[:c.blocksSize]) {
|
||||||
panic("sm4: invalid buffer overlap")
|
panic("sm4: invalid buffer overlap")
|
||||||
}
|
}
|
||||||
encryptBlocksAsm(&c.dec[0], dst, src)
|
encryptBlocksAsmInst(&c.dec[0], dst, src)
|
||||||
}
|
}
|
||||||
|
|
||||||
// expandKey is used by BenchmarkExpand to ensure that the asm implementation
|
// expandKey is used by BenchmarkExpand to ensure that the asm implementation
|
||||||
// of key expansion is used for the benchmark when it is available.
|
// of key expansion is used for the benchmark when it is available.
|
||||||
func expandKey(key []byte, enc, dec []uint32) {
|
func expandKey(key []byte, enc, dec []uint32) {
|
||||||
if supportsAES {
|
if supportSM4 {
|
||||||
expandKeyAsm(&key[0], &ck[0], &enc[0], &dec[0])
|
expandKeyAsm(&key[0], &ck[0], &enc[0], &dec[0], 1)
|
||||||
|
} else if supportsAES {
|
||||||
|
expandKeyAsm(&key[0], &ck[0], &enc[0], &dec[0], 0)
|
||||||
} else {
|
} else {
|
||||||
expandKeyGo(key, enc, dec)
|
expandKeyGo(key, enc, dec)
|
||||||
}
|
}
|
||||||
|
@ -34,7 +34,7 @@ func TestExpandKey(t *testing.T) {
|
|||||||
}
|
}
|
||||||
io.ReadFull(rand.Reader, key)
|
io.ReadFull(rand.Reader, key)
|
||||||
expandKeyGo(key, encRes1, decRes1)
|
expandKeyGo(key, encRes1, decRes1)
|
||||||
expandKeyAsm(&key[0], &ck[0], &encRes2[0], &decRes2[0])
|
expandKey(key, encRes2, decRes2)
|
||||||
if !reflect.DeepEqual(encRes1, encRes2) {
|
if !reflect.DeepEqual(encRes1, encRes2) {
|
||||||
t.Errorf("expected=%v, result=%v\n", encRes1, encRes2)
|
t.Errorf("expected=%v, result=%v\n", encRes1, encRes2)
|
||||||
}
|
}
|
||||||
|
137
sm4/gen_arm64_ni.go
Normal file
137
sm4/gen_arm64_ni.go
Normal file
@ -0,0 +1,137 @@
|
|||||||
|
// Not used yet!!!
|
||||||
|
// go run gen_arm64_ni.go
|
||||||
|
|
||||||
|
//go:build ignore
|
||||||
|
// +build ignore
|
||||||
|
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"fmt"
|
||||||
|
"log"
|
||||||
|
"math/bits"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
//SM4E <Vd>.4S, <Vn>.4S
|
||||||
|
func sm4e(Vd, Vn byte) uint32 {
|
||||||
|
inst := uint32(0xcec08400) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5
|
||||||
|
return bits.ReverseBytes32(inst)
|
||||||
|
}
|
||||||
|
|
||||||
|
//SM4EKEY <Vd>.4S, <Vn>.4S, <Vm>.4S
|
||||||
|
func sm4ekey(Vd, Vn, Vm byte) uint32 {
|
||||||
|
inst := uint32(0xce60c800) | uint32(Vd&0x1f) | uint32(Vn&0x1f)<<5 | (uint32(Vm&0x1f) << 16)
|
||||||
|
return bits.ReverseBytes32(inst)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sm4ekeyRound(buf *bytes.Buffer, d, n, m byte) {
|
||||||
|
fmt.Fprintf(buf, "\tWORD 0x%08x //SM4EKEY V%d.4S, V%d.4S, V%d.4S\n", sm4ekey(d, n, m), d, n, m)
|
||||||
|
}
|
||||||
|
|
||||||
|
func sm4eRound(buf *bytes.Buffer, d, n byte) {
|
||||||
|
fmt.Fprintf(buf, "\tWORD 0x%08x //SM4E V%d.4S, V%d.4S\n", sm4e(d, n), d, n)
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
buf := new(bytes.Buffer)
|
||||||
|
fmt.Fprint(buf, `
|
||||||
|
// Generated by gen_arm64_ni.go. DO NOT EDIT.
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
// func expandKeySM4E(key *byte, fk, ck, enc *uint32)
|
||||||
|
TEXT ·expandKeySM4E(SB),NOSPLIT,$0
|
||||||
|
MOVD key+0(FP), R8
|
||||||
|
MOVD fk+8(FP), R9
|
||||||
|
MOVD ck+16(FP), R10
|
||||||
|
MOVD enc+24(FP), R11
|
||||||
|
|
||||||
|
VLD1 (R8), [V8.B16]
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VLD1 (R9), [V9.S4]
|
||||||
|
VEOR V9, V8, V8
|
||||||
|
VLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
`[1:])
|
||||||
|
|
||||||
|
sm4ekeyRound(buf, 9, 8, 0)
|
||||||
|
sm4ekeyRound(buf, 8, 9, 1)
|
||||||
|
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n")
|
||||||
|
sm4ekeyRound(buf, 9, 8, 2)
|
||||||
|
sm4ekeyRound(buf, 8, 9, 3)
|
||||||
|
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n")
|
||||||
|
fmt.Fprintf(buf, "\tVLD1.P 64(R10), [V0.S4, V1.S4, V2.S4, V3.S4]\n")
|
||||||
|
sm4ekeyRound(buf, 9, 8, 0)
|
||||||
|
sm4ekeyRound(buf, 8, 9, 1)
|
||||||
|
fmt.Fprintf(buf, "\tVST1.P [V9.S4, V8.S4], 32(R11)\n")
|
||||||
|
sm4ekeyRound(buf, 9, 8, 2)
|
||||||
|
sm4ekeyRound(buf, 8, 9, 3)
|
||||||
|
fmt.Fprintf(buf, `
|
||||||
|
VST1.P [V9.S4, V8.S4], 32(R11)
|
||||||
|
RET
|
||||||
|
`[1:])
|
||||||
|
fmt.Fprint(buf, `
|
||||||
|
|
||||||
|
// func encryptBlockSM4E(xk *uint32, dst, src *byte)
|
||||||
|
TEXT ·encryptBlockSM4E(SB),NOSPLIT,$0
|
||||||
|
MOVD xk+0(FP), R8
|
||||||
|
MOVD dst+8(FP), R9
|
||||||
|
MOVD src+16(FP), R10
|
||||||
|
|
||||||
|
VLD1 (R10), [V8.B16]
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
`[1:])
|
||||||
|
sm4eRound(buf, 8, 0)
|
||||||
|
sm4eRound(buf, 8, 1)
|
||||||
|
sm4eRound(buf, 8, 2)
|
||||||
|
sm4eRound(buf, 8, 3)
|
||||||
|
fmt.Fprintf(buf, "\tVLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]\n")
|
||||||
|
sm4eRound(buf, 8, 0)
|
||||||
|
sm4eRound(buf, 8, 1)
|
||||||
|
sm4eRound(buf, 8, 2)
|
||||||
|
sm4eRound(buf, 8, 3)
|
||||||
|
fmt.Fprintf(buf, `
|
||||||
|
VREV32 V8.B16, V8.B16
|
||||||
|
VST1 [V8.B16], (R9)
|
||||||
|
RET
|
||||||
|
`[1:])
|
||||||
|
|
||||||
|
fmt.Fprint(buf, `
|
||||||
|
|
||||||
|
// func encryptBlocksSM4E(xk *uint32, dst, src *byte)
|
||||||
|
TEXT ·encryptBlocksSM4E(SB),NOSPLIT,$0
|
||||||
|
MOVD xk+0(FP), R8
|
||||||
|
MOVD dst+8(FP), R9
|
||||||
|
MOVD src+16(FP), R10
|
||||||
|
|
||||||
|
VLD1.P 64(R8), [V0.S4, V1.S4, V2.S4, V3.S4]
|
||||||
|
VLD1.P 64(R8), [V4.S4, V5.S4, V6.S4, V7.S4]
|
||||||
|
|
||||||
|
`[1:])
|
||||||
|
for i := 0; i < 4; i++ {
|
||||||
|
fmt.Fprintf(buf, "\tVLD1.P 16(R10), [V8.B16]\n")
|
||||||
|
fmt.Fprintf(buf, "\tVREV32 V8.B16, V8.B16\n")
|
||||||
|
sm4eRound(buf, 8, 0)
|
||||||
|
sm4eRound(buf, 8, 1)
|
||||||
|
sm4eRound(buf, 8, 2)
|
||||||
|
sm4eRound(buf, 8, 3)
|
||||||
|
sm4eRound(buf, 8, 4)
|
||||||
|
sm4eRound(buf, 8, 5)
|
||||||
|
sm4eRound(buf, 8, 6)
|
||||||
|
sm4eRound(buf, 8, 7)
|
||||||
|
fmt.Fprintf(buf, "\tVREV32 V8.B16, V8.B16\n")
|
||||||
|
fmt.Fprintf(buf, "\tVST1.P [V8.B16], 16(R9)\n\n")
|
||||||
|
}
|
||||||
|
fmt.Fprintf(buf, `
|
||||||
|
RET
|
||||||
|
`[1:])
|
||||||
|
|
||||||
|
src := buf.Bytes()
|
||||||
|
// fmt.Println(string(src))
|
||||||
|
err := os.WriteFile("sm4e_arm64.s", src, 0644)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user