cipher: implement double tweak amd64 asm #149

This commit is contained in:
Sun Yimin 2023-08-18 17:49:57 +08:00 committed by GitHub
parent 0eaad02df4
commit 4eacdccbf6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 268 additions and 13 deletions

View File

@ -19,8 +19,7 @@ type concurrentBlocks interface {
DecryptBlocks(dst, src []byte) DecryptBlocks(dst, src []byte)
} }
// Cipher contains an expanded key structure. It is safe for concurrent use if // Cipher contains an expanded key structure. It is unsafe for concurrent use.
// the underlying block cipher is safe for concurrent use.
type xts struct { type xts struct {
b _cipher.Block b _cipher.Block
tweak [blockSize]byte tweak [blockSize]byte
@ -198,12 +197,8 @@ func (c *xtsEncrypter) CryptBlocks(ciphertext, plaintext []byte) {
if concCipher, ok := c.b.(concurrentBlocks); ok { if concCipher, ok := c.b.(concurrentBlocks); ok {
batchSize := concCipher.Concurrency() * blockSize batchSize := concCipher.Concurrency() * blockSize
var tweaks []byte = make([]byte, batchSize) var tweaks []byte = make([]byte, batchSize)
for len(plaintext) >= batchSize { for len(plaintext) >= batchSize {
for i := 0; i < concCipher.Concurrency(); i++ { doubleTweaks(&c.tweak, tweaks, c.isGB)
copy(tweaks[blockSize*i:], c.tweak[:])
mul2(&c.tweak, c.isGB)
}
subtle.XORBytes(ciphertext, plaintext, tweaks) subtle.XORBytes(ciphertext, plaintext, tweaks)
concCipher.EncryptBlocks(ciphertext, ciphertext) concCipher.EncryptBlocks(ciphertext, ciphertext)
subtle.XORBytes(ciphertext, ciphertext, tweaks) subtle.XORBytes(ciphertext, ciphertext, tweaks)
@ -212,6 +207,7 @@ func (c *xtsEncrypter) CryptBlocks(ciphertext, plaintext []byte) {
ciphertext = ciphertext[batchSize:] ciphertext = ciphertext[batchSize:]
} }
} }
for len(plaintext) >= blockSize { for len(plaintext) >= blockSize {
subtle.XORBytes(ciphertext, plaintext, c.tweak[:]) subtle.XORBytes(ciphertext, plaintext, c.tweak[:])
c.b.Encrypt(ciphertext, ciphertext) c.b.Encrypt(ciphertext, ciphertext)
@ -262,10 +258,7 @@ func (c *xtsDecrypter) CryptBlocks(plaintext, ciphertext []byte) {
var tweaks []byte = make([]byte, batchSize) var tweaks []byte = make([]byte, batchSize)
for len(ciphertext) >= batchSize { for len(ciphertext) >= batchSize {
for i := 0; i < concCipher.Concurrency(); i++ { doubleTweaks(&c.tweak, tweaks, c.isGB)
copy(tweaks[blockSize*i:], c.tweak[:])
mul2(&c.tweak, c.isGB)
}
subtle.XORBytes(plaintext, ciphertext, tweaks) subtle.XORBytes(plaintext, ciphertext, tweaks)
concCipher.DecryptBlocks(plaintext, plaintext) concCipher.DecryptBlocks(plaintext, plaintext)
subtle.XORBytes(plaintext, plaintext, tweaks) subtle.XORBytes(plaintext, plaintext, tweaks)
@ -313,9 +306,9 @@ func (c *xtsDecrypter) CryptBlocks(plaintext, ciphertext []byte) {
} }
} }
// mul2 multiplies tweak by 2 in GF(2¹²⁸) with an irreducible polynomial of // mul2Generic multiplies tweak by 2 in GF(2¹²⁸) with an irreducible polynomial of
// x¹²⁸ + x⁷ + x² + x + 1. // x¹²⁸ + x⁷ + x² + x + 1.
func mul2(tweak *[blockSize]byte, isGB bool) { func mul2Generic(tweak *[blockSize]byte, isGB bool) {
var carryIn byte var carryIn byte
if !isGB { if !isGB {
// tweak[0] represents the coefficients of {x^7, x^6, ..., x^0} // tweak[0] represents the coefficients of {x^7, x^6, ..., x^0}

157
cipher/xts_amd64.s Normal file
View File

@ -0,0 +1,157 @@
//go:build amd64 && !purego
// +build amd64,!purego
#include "textflag.h"
DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f
DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607
DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000087
DATA gcmPoly<>+0x08(SB)/8, $0x0000000000000000
DATA gbGcmPoly<>+0x00(SB)/8, $0x0000000000000000
DATA gbGcmPoly<>+0x08(SB)/8, $0xe100000000000000
DATA one<>+0x00(SB)/8, $0x0000000000000001
DATA one<>+0x08(SB)/8, $0x0000000000000000
GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16
GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16
GLOBL gbGcmPoly<>(SB), (NOPTR+RODATA), $16
GLOBL one<>(SB), (NOPTR+RODATA), $16
#define POLY X0
#define BSWAP X1
#define ONE X2
#define B0 X3
#define T0 X4
#define T1 X5
// func mul2(tweak *[blockSize]byte, isGB bool)
TEXT ·mul2(SB),NOSPLIT,$0
MOVQ tweak+0(FP), DI
MOVB isGB+8(FP), AX
MOVOU (0*16)(DI), B0
CMPB AX, $1
JE gb_alg
MOVOU gcmPoly<>(SB), POLY
// B0 * 2
PSHUFD $0xff, B0, T0
MOVOU B0, T1
PSRAL $31, T0 // T0 for reduction
PAND POLY, T0
PSRLL $31, T1
PSLLDQ $4, T1
PSLLL $1, B0
PXOR T0, B0
PXOR T1, B0
MOVOU B0, (0*16)(DI)
RET
gb_alg:
MOVOU bswapMask<>(SB), BSWAP
MOVOU gbGcmPoly<>(SB), POLY
MOVOU one<>(SB), ONE
PXOR X6, X6
PSHUFB BSWAP, B0
// B0 * 2
MOVOU B0, T0
MOVOU B0, T1
PSRLQ $1, B0
PSLLQ $63, T0
PSRLDQ $8, T0
POR T0, B0
// reduction
PAND ONE, T1
PSHUFD $0, T1, T1
PCMPEQL X6, T1
PANDN POLY, T1
PXOR T1, B0
PSHUFB BSWAP, B0
MOVOU B0, (0*16)(DI)
RET
// func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)
TEXT ·doubleTweaks(SB),NOSPLIT,$0
MOVQ tweak+0(FP), DI
MOVQ tweaks+8(FP), AX
MOVQ tweaks_len+16(FP), BX
MOVB isGB+32(FP), CX
MOVOU (0*16)(DI), B0
SHRQ $4, BX
XORQ DX, DX
CMPB CX, $1
JE dt_gb_alg
MOVOU gcmPoly<>(SB), POLY
loop:
MOVOU B0, (0*16)(AX)
LEAQ 16(AX), AX
// B0 * 2
PSHUFD $0xff, B0, T0
MOVOU B0, T1
PSRAL $31, T0 // T0 for reduction
PAND POLY, T0
PSRLL $31, T1
PSLLDQ $4, T1
PSLLL $1, B0
PXOR T0, B0
PXOR T1, B0
ADDQ $1, DX
CMPQ DX, BX
JB loop
MOVOU B0, (0*16)(DI)
RET
dt_gb_alg:
MOVOU bswapMask<>(SB), BSWAP
MOVOU gbGcmPoly<>(SB), POLY
MOVOU one<>(SB), ONE
PXOR X6, X6
gb_loop:
MOVOU B0, (0*16)(AX)
LEAQ 16(AX), AX
PSHUFB BSWAP, B0
// B0 * 2
MOVOU B0, T0
MOVOU B0, T1
PSRLQ $1, B0
PSLLQ $63, T0
PSRLDQ $8, T0
POR T0, B0
// reduction
PAND ONE, T1
PSHUFD $0, T1, T1
PCMPEQL X6, T1
PANDN POLY, T1
PXOR T1, B0
PSHUFB BSWAP, B0
ADDQ $1, DX
CMPQ DX, BX
JB gb_loop
MOVOU B0, (0*16)(DI)
RET

10
cipher/xts_asm.go Normal file
View File

@ -0,0 +1,10 @@
//go:build amd64 && !purego
// +build amd64,!purego
package cipher
//go:noescape
func mul2(tweak *[blockSize]byte, isGB bool)
//go:noescape
func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool)

79
cipher/xts_asm_test.go Normal file
View File

@ -0,0 +1,79 @@
//go:build amd64 && !purego
// +build amd64,!purego
package cipher
import (
"bytes"
"encoding/hex"
"testing"
)
var testTweakVector = []string{
"F0F1F2F3F4F5F6F7F8F9FAFBFCFDFEFF",
"66e94bd4ef8a2c3b884cfa59ca342b2e",
"3f803bcd0d7fd2b37558419f59d5cda6",
"6dcfba212f5d82bf525ee9793cfa505a",
"c172964cd58be2b8d8e09d9c5e9cfe36",
"1a267577a90caad6ae988e22714a2b8b",
"33fab707493702e77ff8d66ba9e6c6fe",
"23fb188b0f87f6ee2ec0803a99771341",
"e8de0a4188b7efbc1ac3979eb906cf36",
}
func testDoubleTweak(t *testing.T, isGB bool) {
for _, tk := range testTweakVector {
tweak, _ := hex.DecodeString(tk)
var t1, t2 [16]byte
copy(t1[:], tweak)
copy(t2[:], tweak)
mul2(&t1, isGB)
mul2Generic(&t2, isGB)
if !bytes.Equal(t1[:], t2[:]) {
t.Errorf("tweak %v, expected %x, got %x", tk, t2[:], t1[:])
}
}
}
func TestDoubleTweak(t *testing.T) {
testDoubleTweak(t, false)
}
func TestDoubleTweakGB(t *testing.T) {
testDoubleTweak(t, true)
}
func testDoubleTweaks(t *testing.T, isGB bool) {
for _, tk := range testTweakVector {
tweak, _ := hex.DecodeString(tk)
var t1, t2 [16]byte
var t11, t12 [128]byte
copy(t1[:], tweak)
copy(t2[:], tweak)
for i := 0; i < 8; i++ {
copy(t12[16*i:], t2[:])
mul2Generic(&t2, isGB)
}
doubleTweaks(&t1, t11[:], isGB)
if !bytes.Equal(t1[:], t2[:]) {
t.Errorf("1 tweak %v, expected %x, got %x", tk, t2[:], t1[:])
}
if !bytes.Equal(t11[:], t12[:]) {
t.Errorf("2 tweak %v, expected %x, got %x", tk, t12[:], t11[:])
}
}
}
func TestDoubleTweaks(t *testing.T) {
testDoubleTweaks(t, false)
}
func TestDoubleTweaksGB(t *testing.T) {
testDoubleTweaks(t, true)
}

16
cipher/xts_generic.go Normal file
View File

@ -0,0 +1,16 @@
//go:build !amd64 || purego
// +build !amd64 purego
package cipher
func mul2(tweak *[blockSize]byte, isGB bool) {
mul2Generic(tweak, isGB)
}
func doubleTweaks(tweak *[blockSize]byte, tweaks []byte, isGB bool) {
count := len(tweaks) >> 4
for i := 0; i < count; i++ {
copy(tweaks[blockSize*i:], tweak[:])
mul2(tweak, isGB)
}
}