MAGIC - continue tuning

This commit is contained in:
emmansun 2021-03-21 19:20:23 +08:00
parent 4443db3f4d
commit b8d89ee417
3 changed files with 226 additions and 45 deletions

View File

@ -321,6 +321,74 @@ loop:
done_sm4:
RET
// func encryptBlockAsm(xk *uint32, dst, src *byte)
TEXT ·encryptBlockAsm(SB),NOSPLIT,$0
MOVQ xk+0(FP), AX
MOVQ dst+8(FP), BX
MOVQ src+16(FP), DX
PINSRD $0, 0(DX), t0
PSHUFB flip_mask<>(SB), t0
PINSRD $0, 4(DX), t1
PSHUFB flip_mask<>(SB), t1
PINSRD $0, 8(DX), t2
PSHUFB flip_mask<>(SB), t2
PINSRD $0, 12(DX), t3
PSHUFB flip_mask<>(SB), t3
XORL CX, CX
loop:
PINSRD $0, 0(AX)(CX*1), x
PXOR t1, x
PXOR t2, x
PXOR t3, x
SM4_TAO_L1(x, y)
PXOR x, t0
PINSRD $0, 4(AX)(CX*1), x
PXOR t0, x
PXOR t2, x
PXOR t3, x
SM4_TAO_L1(x, y)
PXOR x, t1
PINSRD $0, 8(AX)(CX*1), x
PXOR t0, x
PXOR t1, x
PXOR t3, x
SM4_TAO_L1(x, y)
PXOR x, t2
PINSRD $0, 12(AX)(CX*1), x
PXOR t0, x
PXOR t1, x
PXOR t2, x
SM4_TAO_L1(x, y)
PXOR x, t3
ADDL $16, CX
CMPL CX, $4*32
JB loop
PSHUFB flip_mask<>(SB), t3
PSHUFB flip_mask<>(SB), t2
PSHUFB flip_mask<>(SB), t1
PSHUFB flip_mask<>(SB), t0
MOVUPS t3, 0(BX)
PEXTRD $0, t2, R8
MOVL R8, 4(BX)
PEXTRD $0, t1, R8
MOVL R8, 8(BX)
PEXTRD $0, t0, R8
MOVL R8, 12(BX)
done_sm4:
RET
// func xorBytesSSE2(dst, a, b *byte, n int)
TEXT ·xorBytesSSE2(SB), NOSPLIT, $0
MOVQ dst+0(FP), BX

View File

@ -11,6 +11,9 @@ import (
//go:noescape
func encryptBlocksAsm(xk *uint32, dst, src *byte)
//go:noescape
func encryptBlockAsm(xk *uint32, dst, src *byte)
//go:noescape
func expandKeyAsm(key *byte, ck, enc, dec *uint32)
@ -47,11 +50,7 @@ func (c *sm4CipherAsm) Encrypt(dst, src []byte) {
if InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("sm4: invalid buffer overlap")
}
var src64 []byte = make([]byte, FourBlocksSize)
var dst64 []byte = make([]byte, FourBlocksSize)
copy(src64, src)
encryptBlocksAsm(&c.enc[0], &dst64[0], &src64[0])
copy(dst, dst64[:BlockSize])
encryptBlockAsm(&c.enc[0], &dst[0], &src[0])
}
func (c *sm4CipherAsm) Decrypt(dst, src []byte) {
@ -64,9 +63,5 @@ func (c *sm4CipherAsm) Decrypt(dst, src []byte) {
if InexactOverlap(dst[:BlockSize], src[:BlockSize]) {
panic("sm4: invalid buffer overlap")
}
var src64 []byte = make([]byte, FourBlocksSize)
var dst64 []byte = make([]byte, FourBlocksSize)
copy(src64, src)
encryptBlocksAsm(&c.dec[0], &dst64[0], &src64[0])
copy(dst, dst64[:BlockSize])
encryptBlockAsm(&c.dec[0], &dst[0], &src[0])
}

View File

@ -1,50 +1,87 @@
package sm4_test
import (
"crypto/aes"
"crypto/cipher"
"testing"
"github.com/emmansun/gmsm/sm4"
)
func BenchmarkSM4CBCEncrypt1K(b *testing.B) {
func benchmarkCBCEncrypt1K(b *testing.B, block cipher.Block) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
c, _ := sm4.NewCipher(key[:])
cbc := cipher.NewCBCEncrypter(c, iv[:])
cbc := cipher.NewCBCEncrypter(block, iv[:])
for i := 0; i < b.N; i++ {
cbc.CryptBlocks(buf, buf)
}
}
func BenchmarkSM4CBCDecrypt1K(b *testing.B) {
func BenchmarkAESCBCEncrypt1K(b *testing.B) {
var key [16]byte
c, _ := aes.NewCipher(key[:])
benchmarkCBCEncrypt1K(b, c)
}
func BenchmarkSM4CBCEncrypt1K(b *testing.B) {
var key [16]byte
c, _ := sm4.NewCipher(key[:])
benchmarkCBCEncrypt1K(b, c)
}
func benchmarkSM4CBCDecrypt1K(b *testing.B, block cipher.Block) {
buf := make([]byte, 1024)
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
c, _ := sm4.NewCipher(key[:])
cbc := cipher.NewCBCDecrypter(c, iv[:])
cbc := cipher.NewCBCDecrypter(block, iv[:])
for i := 0; i < b.N; i++ {
cbc.CryptBlocks(buf, buf)
}
}
func BenchmarkAESCBCDecrypt1K(b *testing.B) {
var key [16]byte
c, _ := aes.NewCipher(key[:])
benchmarkSM4CBCDecrypt1K(b, c)
}
func BenchmarkSM4CBCDecrypt1K(b *testing.B) {
var key [16]byte
c, _ := sm4.NewCipher(key[:])
benchmarkSM4CBCDecrypt1K(b, c)
}
func benchmarkStream(b *testing.B, block cipher.Block, mode func(cipher.Block, []byte) cipher.Stream, buf []byte) {
b.SetBytes(int64(len(buf)))
//var key [16]byte
var iv [16]byte
//c, _ := sm4.NewCipher(key[:])
stream := mode(block, iv[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
stream.XORKeyStream(buf, buf)
}
}
func benchmarkSM4Stream(b *testing.B, mode func(cipher.Block, []byte) cipher.Stream, buf []byte) {
b.SetBytes(int64(len(buf)))
var key [16]byte
var iv [16]byte
c, _ := sm4.NewCipher(key[:])
stream := mode(c, iv[:])
benchmarkStream(b, c, mode, buf)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stream.XORKeyStream(buf, buf)
}
func benchmarkAESStream(b *testing.B, mode func(cipher.Block, []byte) cipher.Stream, buf []byte) {
b.SetBytes(int64(len(buf)))
var key [16]byte
c, _ := aes.NewCipher(key[:])
benchmarkStream(b, c, mode, buf)
}
// If we test exactly 1K blocks, we would generate exact multiples of
@ -54,101 +91,182 @@ func benchmarkSM4Stream(b *testing.B, mode func(cipher.Block, []byte) cipher.Str
const almost1K = 1024 - 5
const almost8K = 8*1024 - 5
func BenchmarkAESCFBEncrypt1K(b *testing.B) {
benchmarkAESStream(b, cipher.NewCFBEncrypter, make([]byte, almost1K))
}
func BenchmarkSM4CFBEncrypt1K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewCFBEncrypter, make([]byte, almost1K))
}
func BenchmarkAESCFBDecrypt1K(b *testing.B) {
benchmarkAESStream(b, cipher.NewCFBDecrypter, make([]byte, almost1K))
}
func BenchmarkSM4CFBDecrypt1K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewCFBDecrypter, make([]byte, almost1K))
}
func BenchmarkAESCFBDecrypt8K(b *testing.B) {
benchmarkAESStream(b, cipher.NewCFBDecrypter, make([]byte, almost8K))
}
func BenchmarkSM4CFBDecrypt8K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewCFBDecrypter, make([]byte, almost8K))
}
func BenchmarkAESOFB1K(b *testing.B) {
benchmarkAESStream(b, cipher.NewOFB, make([]byte, almost1K))
}
func BenchmarkSM4OFB1K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewOFB, make([]byte, almost1K))
}
func BenchmarkAESCTR1K(b *testing.B) {
benchmarkAESStream(b, cipher.NewCTR, make([]byte, almost1K))
}
func BenchmarkSM4CTR1K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewCTR, make([]byte, almost1K))
}
func BenchmarkAESCTR8K(b *testing.B) {
benchmarkAESStream(b, cipher.NewCTR, make([]byte, almost8K))
}
func BenchmarkSM4CTR8K(b *testing.B) {
benchmarkSM4Stream(b, cipher.NewCTR, make([]byte, almost8K))
}
func benchmarkSM4GCMSign(b *testing.B, buf []byte) {
func benchmarkGCMSign(b *testing.B, aead cipher.AEAD, buf []byte) {
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
c, _ := sm4.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
var out []byte
b.ResetTimer()
for i := 0; i < b.N; i++ {
out = sm4gcm.Seal(out[:0], nonce[:], nil, buf)
out = aead.Seal(out[:0], nonce[:], nil, buf)
}
}
func benchmarkAESGCMSign(b *testing.B, buf []byte) {
var key [16]byte
c, _ := aes.NewCipher(key[:])
aesgcm, _ := cipher.NewGCM(c)
benchmarkGCMSign(b, aesgcm, buf)
}
func benchmarkSM4GCMSign(b *testing.B, buf []byte) {
var key [16]byte
c, _ := sm4.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
benchmarkGCMSign(b, sm4gcm, buf)
}
func benchmarkGCMSeal(b *testing.B, aead cipher.AEAD, buf []byte) {
b.SetBytes(int64(len(buf)))
var nonce [12]byte
var ad [13]byte
var out []byte
b.ResetTimer()
for i := 0; i < b.N; i++ {
out = aead.Seal(out[:0], nonce[:], buf, ad[:])
}
}
func benchmarkAESGCMSeal(b *testing.B, buf []byte) {
var key [16]byte
c, _ := aes.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
benchmarkGCMSeal(b, sm4gcm, buf)
}
func benchmarkSM4GCMSeal(b *testing.B, buf []byte) {
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
var ad [13]byte
c, _ := sm4.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
var out []byte
b.ResetTimer()
for i := 0; i < b.N; i++ {
out = sm4gcm.Seal(out[:0], nonce[:], buf, ad[:])
}
benchmarkGCMSeal(b, sm4gcm, buf)
}
func benchmarkSM4GCMOpen(b *testing.B, buf []byte) {
func benchmarkGCMOpen(b *testing.B, aead cipher.AEAD, buf []byte) {
b.SetBytes(int64(len(buf)))
var key [16]byte
var nonce [12]byte
var ad [13]byte
c, _ := sm4.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
var out []byte
out = sm4gcm.Seal(out[:0], nonce[:], buf, ad[:])
out = aead.Seal(out[:0], nonce[:], buf, ad[:])
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := sm4gcm.Open(buf[:0], nonce[:], out, ad[:])
_, err := aead.Open(buf[:0], nonce[:], out, ad[:])
if err != nil {
b.Errorf("Open: %v", err)
}
}
}
func benchmarkAESGCMOpen(b *testing.B, buf []byte) {
var key [16]byte
c, _ := aes.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
benchmarkGCMOpen(b, sm4gcm, buf)
}
func benchmarkSM4GCMOpen(b *testing.B, buf []byte) {
var key [16]byte
c, _ := sm4.NewCipher(key[:])
sm4gcm, _ := cipher.NewGCM(c)
benchmarkGCMOpen(b, sm4gcm, buf)
}
func BenchmarkAESGCMSeal1K(b *testing.B) {
benchmarkAESGCMSeal(b, make([]byte, 1024))
}
func BenchmarkSM4GCMSeal1K(b *testing.B) {
benchmarkSM4GCMSeal(b, make([]byte, 1024))
}
func BenchmarkAESGCMOpen1K(b *testing.B) {
benchmarkAESGCMOpen(b, make([]byte, 1024))
}
func BenchmarkSM4GCMOpen1K(b *testing.B) {
benchmarkSM4GCMOpen(b, make([]byte, 1024))
}
func BenchmarkAESGCMSign1K(b *testing.B) {
benchmarkAESGCMSign(b, make([]byte, 1024))
}
func BenchmarkSM4GCMSign1K(b *testing.B) {
benchmarkSM4GCMSign(b, make([]byte, 1024))
}
func BenchmarkAESGCMSign8K(b *testing.B) {
benchmarkAESGCMSign(b, make([]byte, 8*1024))
}
func BenchmarkSM4GCMSign8K(b *testing.B) {
benchmarkSM4GCMSign(b, make([]byte, 8*1024))
}
func BenchmarkAESGCMSeal8K(b *testing.B) {
benchmarkAESGCMSeal(b, make([]byte, 8*1024))
}
func BenchmarkSM4GCMSeal8K(b *testing.B) {
benchmarkSM4GCMSeal(b, make([]byte, 8*1024))
}
func BenchmarkAESGCMOpen8K(b *testing.B) {
benchmarkAESGCMOpen(b, make([]byte, 8*1024))
}
func BenchmarkSM4GCMOpen8K(b *testing.B) {
benchmarkSM4GCMOpen(b, make([]byte, 8*1024))
}