mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
cipher: implement double tweak amd64 asm #149
This commit is contained in:
parent
0eaad02df4
commit
4eacdccbf6
@ -19,8 +19,7 @@ type concurrentBlocks interface {
|
|||||||
DecryptBlocks(dst, src []byte)
|
DecryptBlocks(dst, src []byte)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Cipher contains an expanded key structure. It is safe for concurrent use if
|
// Cipher contains an expanded key structure. It is unsafe for concurrent use.
|
||||||
// the underlying block cipher is safe for concurrent use.
|
|
||||||
type xts struct {
|
type xts struct {
|
||||||
b _cipher.Block
|
b _cipher.Block
|
||||||
tweak [blockSize]byte
|
tweak [blockSize]byte
|
||||||
@ -198,12 +197,8 @@ func (c *xtsEncrypter) CryptBlocks(ciphertext, plaintext []byte) {
|
|||||||
if concCipher, ok := c.b.(concurrentBlocks); ok {
|
if concCipher, ok := c.b.(concurrentBlocks); ok {
|
||||||
batchSize := concCipher.Concurrency() * blockSize
|
batchSize := concCipher.Concurrency() * blockSize
|
||||||
var tweaks []byte = make([]byte, batchSize)
|
var tweaks []byte = make([]byte, batchSize)
|
||||||
|
|
||||||
for len(plaintext) >= batchSize {
|
for len(plaintext) >= batchSize {
|
||||||
for i := 0; i < concCipher.Concurrency(); i++ {
|
doubleTweaks(&c.tweak, tweaks, c.isGB)
|
||||||
copy(tweaks[blockSize*i:], c.tweak[:])
|
|
||||||
mul2(&c.tweak, c.isGB)
|
|
||||||
}
|
|
||||||
subtle.XORBytes(ciphertext, plaintext, tweaks)
|
subtle.XORBytes(ciphertext, plaintext, tweaks)
|
||||||
concCipher.EncryptBlocks(ciphertext, ciphertext)
|
concCipher.EncryptBlocks(ciphertext, ciphertext)
|
||||||
subtle.XORBytes(ciphertext, ciphertext, tweaks)
|
subtle.XORBytes(ciphertext, ciphertext, tweaks)
|
||||||
@ -212,6 +207,7 @@ func (c *xtsEncrypter) CryptBlocks(ciphertext, plaintext []byte) {
|
|||||||
ciphertext = ciphertext[batchSize:]
|
ciphertext = ciphertext[batchSize:]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for len(plaintext) >= blockSize {
|
for len(plaintext) >= blockSize {
|
||||||
subtle.XORBytes(ciphertext, plaintext, c.tweak[:])
|
subtle.XORBytes(ciphertext, plaintext, c.tweak[:])
|
||||||
c.b.Encrypt(ciphertext, ciphertext)
|
c.b.Encrypt(ciphertext, ciphertext)
|
||||||
@ -262,10 +258,7 @@ func (c *xtsDecrypter) CryptBlocks(plaintext, ciphertext []byte) {
|
|||||||
var tweaks []byte = make([]byte, batchSize)
|
var tweaks []byte = make([]byte, batchSize)
|
||||||
|
|
||||||
for len(ciphertext) >= batchSize {
|
for len(ciphertext) >= batchSize {
|
||||||
for i := 0; i < concCipher.Concurrency(); i++ {
|
doubleTweaks(&c.tweak, tweaks, c.isGB)
|
||||||
copy(tweaks[blockSize*i:], c.tweak[:])
|
|
||||||
mul2(&c.tweak, c.isGB)
|
|
||||||
}
|
|
||||||
subtle.XORBytes(plaintext, ciphertext, tweaks)
|
subtle.XORBytes(plaintext, ciphertext, tweaks)
|
||||||
concCipher.DecryptBlocks(plaintext, plaintext)
|
concCipher.DecryptBlocks(plaintext, plaintext)
|
||||||
subtle.XORBytes(plaintext, plaintext, tweaks)
|
subtle.XORBytes(plaintext, plaintext, tweaks)
|
||||||
@ -313,9 +306,9 @@ func (c *xtsDecrypter) CryptBlocks(plaintext, ciphertext []byte) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// mul2 multiplies tweak by 2 in GF(2¹²⁸) with an irreducible polynomial of
|
// mul2Generic multiplies tweak by 2 in GF(2¹²⁸) with an irreducible polynomial of
|
||||||
// x¹²⁸ + x⁷ + x² + x + 1.
|
// x¹²⁸ + x⁷ + x² + x + 1.
|
||||||
func mul2(tweak *[blockSize]byte, isGB bool) {
|
func mul2Generic(tweak *[blockSize]byte, isGB bool) {
|
||||||
var carryIn byte
|
var carryIn byte
|
||||||
if !isGB {
|
if !isGB {
|
||||||
// tweak[0] represents the coefficients of {x^7, x^6, ..., x^0}
|
// tweak[0] represents the coefficients of {x^7, x^6, ..., x^0}
|
||||||
|
157
cipher/xts_amd64.s
Normal file
157
cipher/xts_amd64.s
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
//go:build amd64 && !purego
|
||||||
|
// +build amd64,!purego
|
||||||
|
|
||||||
|
#include "textflag.h"
|
||||||
|
|
||||||
|
DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f
|
||||||
|
DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607
|
||||||
|
|
||||||
|
DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087
|
||||||
|
DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000
|
||||||
|
|
||||||
|
DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000
|
||||||
|
DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000
|
||||||
|
|
||||||
|
DATA one<>+0x00(SB)/8, $0x0000000000000001
|
||||||
|
DATA one<>+0x08(SB)/8, $0x0000000000000000
|
||||||
|
|
||||||
|
GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16
|
||||||
|
GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16
|
||||||
|
GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
|
||||||
|
GLOBL one<>(SB), (NOPTR+RODATA), $16
|
||||||
|
|
||||||
|
|
||||||
|
#define POLY X0
|
||||||
|
#define BSWAP X1
|
||||||
|
#define ONE X2
|
||||||
|
#define B0 X3
|
||||||
|
#define T0 X4
|
||||||
|
#define T1 X5
|
||||||
|
|
||||||
|
// func mul2(tweak *[blockSize]byte, isGB bool)
|
||||||
|
TEXT ·mul2(SB),NOSPLIT,$0
|
||||||
|
MOVQ tweak+0(FP), DI
|
||||||
|
MOVB isGB+8(FP), AX
|
||||||
|
|
||||||
|
MOVOU (0*16)(DI), B0
|
||||||
|
|
||||||
|
CMPB AX, $1
|
||||||
|
JE gb_alg
|
||||||
|
|
||||||
|
MOVOU gcmPoly<>(SB), POLY
|
||||||
|
|
||||||
|
// B0 * 2
|
||||||
|
PSHUFD $0xff, B0, T0
|
||||||
|
MOVOU B0, T1
|
||||||
|
PSRAL $31, T0 // T0 for reduction
|
||||||
|
PAND POLY, T0
|
||||||
|
PSRLL $31, T1
|
||||||
|
PSLLDQ $4, T1
|
||||||
|
PSLLL $1, B0
|
||||||
|
PXOR T0, B0
|
||||||
|
PXOR T1, B0
|
||||||
|
|
||||||
|
MOVOU B0, (0*16)(DI)
|
||||||
|
|
||||||
|
RET
|
||||||
|
|
||||||
|
gb_alg:
|
||||||
|
MOVOU bswapMask<>(SB), BSWAP
|
||||||
|
MOVOU gbGcmPoly<>(SB), POLY
|
||||||
|
MOVOU one<>(SB), ONE
|
||||||
|
PXOR X6, X6
|
||||||
|
|
||||||
|
PSHUFB BSWAP, B0
|
||||||
|
|
||||||
|
// B0 * 2
|
||||||
|
MOVOU B0, T0
|
||||||
|
MOVOU B0, T1
|
||||||
|
PSRLQ $1, B0
|
||||||
|
PSLLQ $63, T0
|
||||||
|
PSRLDQ $8, T0
|
||||||
|
POR T0, B0
|
||||||
|
|
||||||
|
// reduction
|
||||||
|
PAND ONE, T1
|
||||||
|
PSHUFD $0, T1, T1
|
||||||
|
PCMPEQL X6, T1
|
||||||
|
PANDN POLY, T1
|
||||||
|
PXOR T1, B0
|
||||||
|
|
||||||
|
PSHUFB BSWAP, B0
|
||||||
|
MOVOU B0, (0*16)(DI)
|
||||||
|
RET
|
||||||
|
|
||||||
|
// func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
|
||||||
|
TEXT ·doubleTweaks(SB),NOSPLIT,$0
|
||||||
|
MOVQ tweak+0(FP), DI
|
||||||
|
MOVQ tweaks+8(FP), AX
|
||||||
|
MOVQ tweaks_len+16(FP), BX
|
||||||
|
MOVB isGB+32(FP), CX
|
||||||
|
|
||||||
|
MOVOU (0*16)(DI), B0
|
||||||
|
|
||||||
|
SHRQ $4, BX
|
||||||
|
XORQ DX, DX
|
||||||
|
|
||||||
|
CMPB CX, $1
|
||||||
|
JE dt_gb_alg
|
||||||
|
|
||||||
|
MOVOU gcmPoly<>(SB), POLY
|
||||||
|
|
||||||
|
loop:
|
||||||
|
MOVOU B0, (0*16)(AX)
|
||||||
|
LEAQ 16(AX), AX
|
||||||
|
|
||||||
|
// B0 * 2
|
||||||
|
PSHUFD $0xff, B0, T0
|
||||||
|
MOVOU B0, T1
|
||||||
|
PSRAL $31, T0 // T0 for reduction
|
||||||
|
PAND POLY, T0
|
||||||
|
PSRLL $31, T1
|
||||||
|
PSLLDQ $4, T1
|
||||||
|
PSLLL $1, B0
|
||||||
|
PXOR T0, B0
|
||||||
|
PXOR T1, B0
|
||||||
|
|
||||||
|
ADDQ $1, DX
|
||||||
|
CMPQ DX, BX
|
||||||
|
JB loop
|
||||||
|
|
||||||
|
MOVOU B0, (0*16)(DI)
|
||||||
|
RET
|
||||||
|
|
||||||
|
dt_gb_alg:
|
||||||
|
MOVOU bswapMask<>(SB), BSWAP
|
||||||
|
MOVOU gbGcmPoly<>(SB), POLY
|
||||||
|
MOVOU one<>(SB), ONE
|
||||||
|
PXOR X6, X6
|
||||||
|
|
||||||
|
gb_loop:
|
||||||
|
MOVOU B0, (0*16)(AX)
|
||||||
|
LEAQ 16(AX), AX
|
||||||
|
|
||||||
|
PSHUFB BSWAP, B0
|
||||||
|
|
||||||
|
// B0 * 2
|
||||||
|
MOVOU B0, T0
|
||||||
|
MOVOU B0, T1
|
||||||
|
PSRLQ $1, B0
|
||||||
|
PSLLQ $63, T0
|
||||||
|
PSRLDQ $8, T0
|
||||||
|
POR T0, B0
|
||||||
|
|
||||||
|
// reduction
|
||||||
|
PAND ONE, T1
|
||||||
|
PSHUFD $0, T1, T1
|
||||||
|
PCMPEQL X6, T1
|
||||||
|
PANDN POLY, T1
|
||||||
|
PXOR T1, B0
|
||||||
|
|
||||||
|
PSHUFB BSWAP, B0
|
||||||
|
ADDQ $1, DX
|
||||||
|
CMPQ DX, BX
|
||||||
|
JB gb_loop
|
||||||
|
|
||||||
|
MOVOU B0, (0*16)(DI)
|
||||||
|
RET
|
10
cipher/xts_asm.go
Normal file
10
cipher/xts_asm.go
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
//go:build amd64 && !purego
|
||||||
|
// +build amd64,!purego
|
||||||
|
|
||||||
|
package cipher
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func mul2(tweak *[blockSize]byte, isGB bool)
|
||||||
|
|
||||||
|
//go:noescape
|
||||||
|
func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
|
79
cipher/xts_asm_test.go
Normal file
79
cipher/xts_asm_test.go
Normal file
@ -0,0 +1,79 @@
|
|||||||
|
//go:build amd64 && !purego
|
||||||
|
// +build amd64,!purego
|
||||||
|
|
||||||
|
package cipher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/hex"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
var testTweakVector = []string{
|
||||||
|
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF",
|
||||||
|
"66e94bd4ef8a2c3b884cfa59ca342b2e",
|
||||||
|
"3f803bcd0d7fd2b37558419f59d5cda6",
|
||||||
|
"6dcfba212f5d82bf525ee9793cfa505a",
|
||||||
|
"c172964cd58be2b8d8e09d9c5e9cfe36",
|
||||||
|
"1a267577a90caad6ae988e22714a2b8b",
|
||||||
|
"33fab707493702e77ff8d66ba9e6c6fe",
|
||||||
|
"23fb188b0f87f6ee2ec0803a99771341",
|
||||||
|
"e8de0a4188b7efbc1ac3979eb906cf36",
|
||||||
|
}
|
||||||
|
|
||||||
|
func testDoubleTweak(t *testing.T, isGB bool) {
|
||||||
|
for _, tk := range testTweakVector {
|
||||||
|
tweak, _ := hex.DecodeString(tk)
|
||||||
|
|
||||||
|
var t1, t2 [16]byte
|
||||||
|
copy(t1[:], tweak)
|
||||||
|
copy(t2[:], tweak)
|
||||||
|
mul2(&t1, isGB)
|
||||||
|
mul2Generic(&t2, isGB)
|
||||||
|
|
||||||
|
if !bytes.Equal(t1[:], t2[:]) {
|
||||||
|
t.Errorf("tweak %v, expected %x, got %x", tk, t2[:], t1[:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDoubleTweak(t *testing.T) {
|
||||||
|
testDoubleTweak(t, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDoubleTweakGB(t *testing.T) {
|
||||||
|
testDoubleTweak(t, true)
|
||||||
|
}
|
||||||
|
|
||||||
|
func testDoubleTweaks(t *testing.T, isGB bool) {
|
||||||
|
for _, tk := range testTweakVector {
|
||||||
|
tweak, _ := hex.DecodeString(tk)
|
||||||
|
|
||||||
|
var t1, t2 [16]byte
|
||||||
|
var t11, t12 [128]byte
|
||||||
|
copy(t1[:], tweak)
|
||||||
|
copy(t2[:], tweak)
|
||||||
|
|
||||||
|
for i := 0; i < 8; i++ {
|
||||||
|
copy(t12[16*i:], t2[:])
|
||||||
|
mul2Generic(&t2, isGB)
|
||||||
|
}
|
||||||
|
|
||||||
|
doubleTweaks(&t1, t11[:], isGB)
|
||||||
|
|
||||||
|
if !bytes.Equal(t1[:], t2[:]) {
|
||||||
|
t.Errorf("1 tweak %v, expected %x, got %x", tk, t2[:], t1[:])
|
||||||
|
}
|
||||||
|
if !bytes.Equal(t11[:], t12[:]) {
|
||||||
|
t.Errorf("2 tweak %v, expected %x, got %x", tk, t12[:], t11[:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDoubleTweaks(t *testing.T) {
|
||||||
|
testDoubleTweaks(t, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDoubleTweaksGB(t *testing.T) {
|
||||||
|
testDoubleTweaks(t, true)
|
||||||
|
}
|
16
cipher/xts_generic.go
Normal file
16
cipher/xts_generic.go
Normal file
@ -0,0 +1,16 @@
|
|||||||
|
//go:build !amd64 || purego
|
||||||
|
// +build !amd64 purego
|
||||||
|
|
||||||
|
package cipher
|
||||||
|
|
||||||
|
func mul2(tweak *[blockSize]byte, isGB bool) {
|
||||||
|
mul2Generic(tweak, isGB)
|
||||||
|
}
|
||||||
|
|
||||||
|
func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) {
|
||||||
|
count := len(tweaks) >> 4
|
||||||
|
for i := 0; i < count; i++ {
|
||||||
|
copy(tweaks[blockSize*i:], tweak[:])
|
||||||
|
mul2(tweak, isGB)
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user