diff --git a/cipher/benchmark_test.go b/cipher/benchmark_test.go index 9d79b2e..79d3c62 100644 --- a/cipher/benchmark_test.go +++ b/cipher/benchmark_test.go @@ -3,12 +3,28 @@ package cipher_test import ( "crypto/aes" "crypto/cipher" + "crypto/rand" + "io" "testing" smcipher "github.com/emmansun/gmsm/cipher" "github.com/emmansun/gmsm/sm4" ) +func BenchmarkSM4HCTREncrypt1K(b *testing.B) { + var key [16]byte + var tweak [32]byte + c, _ := sm4.NewCipher(key[:]) + io.ReadFull(rand.Reader, tweak[:]) + hctr, _ := smcipher.NewHCTR(c, tweak[:16], tweak[16:]) + buf := make([]byte, 1024) + b.SetBytes(int64(len(buf))) + b.ResetTimer() + for i := 0; i < b.N; i++ { + hctr.Encrypt(buf, buf) + } +} + func benchmarkECBEncrypt1K(b *testing.B, block cipher.Block) { buf := make([]byte, 1024) b.SetBytes(int64(len(buf))) diff --git a/cipher/hctr.go b/cipher/hctr.go index f8ac95d..7f18c97 100644 --- a/cipher/hctr.go +++ b/cipher/hctr.go @@ -37,6 +37,58 @@ type LengthPreservingMode interface { Decrypt(dst, src []byte) } +// hctrFieldElement represents a value in GF(2¹²⁸). In order to reflect the HCTR +// standard and make binary.BigEndian suitable for marshaling these values, the +// bits are stored in big endian order. For example: +// the coefficient of x⁰ can be obtained by v.low >> 63. +// the coefficient of x⁶³ can be obtained by v.low & 1. +// the coefficient of x⁶⁴ can be obtained by v.high >> 63. +// the coefficient of x¹²⁷ can be obtained by v.high & 1. +type hctrFieldElement struct { + low, high uint64 +} + +// reverseBits reverses the order of the bits of 4-bit number in i. +func reverseBits(i int) int { + i = ((i << 2) & 0xc) | ((i >> 2) & 0x3) + i = ((i << 1) & 0xa) | ((i >> 1) & 0x5) + return i +} + +// hctrAdd adds two elements of GF(2¹²⁸) and returns the sum. +func hctrAdd(x, y *hctrFieldElement) hctrFieldElement { + // Addition in a characteristic 2 field is just XOR. + return hctrFieldElement{x.low ^ y.low, x.high ^ y.high} +} + +// hctrDouble returns the result of doubling an element of GF(2¹²⁸). +func hctrDouble(x *hctrFieldElement) (double hctrFieldElement) { + msbSet := x.high&1 == 1 + + // Because of the bit-ordering, doubling is actually a right shift. + double.high = x.high >> 1 + double.high |= x.low << 63 + double.low = x.low >> 1 + + // If the most-significant bit was set before shifting then it, + // conceptually, becomes a term of x^128. This is greater than the + // irreducible polynomial so the result has to be reduced. The + // irreducible polynomial is 1+x+x^2+x^7+x^128. We can subtract that to + // eliminate the term at x^128 which also means subtracting the other + // four terms. In characteristic 2 fields, subtraction == addition == + // XOR. + if msbSet { + double.low ^= 0xe100000000000000 + } + + return +} + +var hctrReductionTable = []uint16{ + 0x0000, 0x1c20, 0x3840, 0x2460, 0x7080, 0x6ca0, 0x48c0, 0x54e0, + 0xe100, 0xfd20, 0xd940, 0xc560, 0x9180, 0x8da0, 0xa9c0, 0xb5e0, +} + // hctr represents a Varaible-Input-Length enciphering mode with a specific block cipher, // and specific tweak and a hash key. See // https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.470.5288 @@ -44,7 +96,9 @@ type LengthPreservingMode interface { type hctr struct { cipher _cipher.Block tweak [blockSize]byte - hkey [blockSize]byte + // productTable contains the first sixteen powers of the hash key. + // However, they are in bit reversed order. + productTable [16]hctrFieldElement } // NewHCTR returns a [LengthPreservingMode] which encrypts/decrypts useing the given [Block] @@ -55,72 +109,95 @@ func NewHCTR(cipher _cipher.Block, tweak, hkey []byte) (LengthPreservingMode, er } c := &hctr{} c.cipher = cipher - copy(c.hkey[:], hkey) copy(c.tweak[:], tweak) + // We precompute 16 multiples of |key|. However, when we do lookups + // into this table we'll be using bits from a field element and + // therefore the bits will be in the reverse order. So normally one + // would expect, say, 4*key to be in index 4 of the table but due to + // this bit ordering it will actually be in index 0010 (base 2) = 2. + x := hctrFieldElement{ + binary.BigEndian.Uint64(hkey[:8]), + binary.BigEndian.Uint64(hkey[8:blockSize]), + } + c.productTable[reverseBits(1)] = x + + for i := 2; i < 16; i += 2 { + c.productTable[reverseBits(i)] = hctrDouble(&c.productTable[reverseBits(i/2)]) + c.productTable[reverseBits(i+1)] = hctrAdd(&c.productTable[reverseBits(i)], &x) + } return c, nil } -func _mul2(v *[blockSize]byte) { - var carryIn byte - for j := range v { - carryOut := (v[j] << 7) & 0x80 - v[j] = (v[j] >> 1) + carryIn - carryIn = carryOut - } - if carryIn != 0 { - v[0] ^= 0xE1 // 1<<7 | 1<<6 | 1<<5 | 1 - } -} +// mul sets y to y*H, where H is the GCM key, fixed during NewHCTR. +func (h *hctr) mul(y *hctrFieldElement) { + var z hctrFieldElement -// mul sets y to y*hkey. -func (h *hctr) mul(y *[blockSize]byte) { - var z [blockSize]byte - for _, i := range h.hkey { - for k := 0; k < 8; k++ { - if (i>>(7-k))&1 == 1 { - subtle.XORBytes(z[:], z[:], y[:]) - } - _mul2(y) + for i := 0; i < 2; i++ { + word := y.high + if i == 1 { + word = y.low + } + + // Multiplication works by multiplying z by 16 and adding in + // one of the precomputed multiples of hash key. + for j := 0; j < 64; j += 4 { + msw := z.high & 0xf + z.high >>= 4 + z.high |= z.low << 60 + z.low >>= 4 + z.low ^= uint64(hctrReductionTable[msw]) << 48 + + // the values in |table| are ordered for + // little-endian bit positions. See the comment + // in NewGCMWithNonceSize. + t := &h.productTable[word&0xf] + + z.low ^= t.low + z.high ^= t.high + word >>= 4 } } - copy(y[:], z[:]) + + *y = z +} + +func (h *hctr) updateBlock(block []byte, y *hctrFieldElement) { + y.low ^= binary.BigEndian.Uint64(block) + y.high ^= binary.BigEndian.Uint64(block[8:blockSize]) + h.mul(y) } // Universal Hash Function. // Chapter 3.3 in https://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.470.5288. -func (h *hctr) uhash(m []byte, dst *[blockSize]byte) { - for k := 0; k < blockSize; k++ { - dst[k] = 0 - } +func (h *hctr) uhash(m []byte, out *[blockSize]byte) { + var y hctrFieldElement msg := m + // update blocks for len(msg) >= blockSize { - subtle.XORBytes(dst[:], dst[:], msg[:blockSize]) - h.mul(dst) + h.updateBlock(msg, &y) msg = msg[blockSize:] } - var v [blockSize]byte + // update partial block & tweak if len(msg) > 0 { - copy(v[:], msg) - copy(v[len(msg):], h.tweak[:]) - subtle.XORBytes(dst[:], dst[:], v[:]) - h.mul(dst) - copy(v[:], h.tweak[len(msg):]) + var partialBlock [blockSize]byte + copy(partialBlock[:], msg) + copy(partialBlock[len(msg):], h.tweak[:]) + h.updateBlock(partialBlock[:], &y) + + copy(partialBlock[:], h.tweak[len(msg):]) for i := len(msg); i < blockSize; i++ { - v[i] = 0 - } - subtle.XORBytes(dst[:], dst[:], v[:]) - h.mul(dst) - for i := 0; i < len(msg); i++ { - v[i] = 0 + partialBlock[i] = 0 } + h.updateBlock(partialBlock[:], &y) } else { - subtle.XORBytes(dst[:], dst[:], h.tweak[:]) - h.mul(dst) + h.updateBlock(h.tweak[:], &y) } - // (|M|)₂ - binary.BigEndian.PutUint64(v[8:], uint64(len(m)+blockSize)<<3) - subtle.XORBytes(dst[:], dst[:], v[:]) - h.mul(dst) + // update bit string length (|M|)₂ + y.high ^= uint64(len(m)+blockSize) * 8 + h.mul(&y) + // output result + binary.BigEndian.PutUint64(out[:], y.low) + binary.BigEndian.PutUint64(out[8:], y.high) } func (h *hctr) Encrypt(ciphertext, plaintext []byte) { @@ -135,7 +212,6 @@ func (h *hctr) Encrypt(ciphertext, plaintext []byte) { } var z1, z2 [blockSize]byte - // a) z1 generation h.uhash(plaintext[blockSize:], &z1) subtle.XORBytes(z1[:], z1[:], plaintext[:blockSize])