From f5bc1d657e1e049d41781a67378d2be981940800 Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 21 Jun 2022 15:58:47 +0800 Subject: [PATCH] zuc: performance improvement 1 --- zuc/README.md | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ zuc/core.go | 35 ++++++++++++++++++++--------------- 2 files changed, 69 insertions(+), 15 deletions(-) create mode 100644 zuc/README.md diff --git a/zuc/README.md b/zuc/README.md new file mode 100644 index 0000000..8d0b22d --- /dev/null +++ b/zuc/README.md @@ -0,0 +1,49 @@ +## ZUC original performance: + + goos: windows + goarch: amd64 + pkg: github.com/emmansun/gmsm/zuc + cpu: Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz + BenchmarkEncrypt1K-6 30052 39131 ns/op 26.04 MB/s + BenchmarkEncrypt8K-6 3853 310722 ns/op 26.35 MB/s + +## Performance after delay mod & lfsr array copy: + + goos: windows + goarch: amd64 + pkg: github.com/emmansun/gmsm/zuc + cpu: Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz + BenchmarkEncrypt1K-6 41754 26916 ns/op 37.86 MB/s + BenchmarkEncrypt8K-6 5290 215252 ns/op 38.03 MB/s + +## Performance after delay mod & lfsr array copy & merge sbox0/sbox1 (sbox size from 0.5k to 128k, so i do not commit it): + goos: windows + goarch: amd64 + pkg: github.com/emmansun/gmsm/zuc + cpu: Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz + BenchmarkEncrypt1K-6 49195 23710 ns/op 42.98 MB/s + BenchmarkEncrypt8K-6 6000 191255 ns/op 42.81 MB/s + +```go +func (s *zucState32) f32(x0, x1, x2 uint32) uint32 { + w := s.r1 ^ x0 + s.r2 + w1 := s.r1 + x1 + w2 := s.r2 ^ x2 + u := l1((w1 << 16) | (w2 >> 16)) + v := l2((w2 << 16) | (w1 >> 16)) + s.r1 = uint32(bigSbox[u>>16])<<16 | uint32(bigSbox[u&0xFFFF]) + s.r2 = uint32(bigSbox[v>>16])<<16 | uint32(bigSbox[v&0xFFFF]) + return w +} + +// bigSbox is generated by + for i := 0; i < 256; i++ { + for j := 0; j < 256; j++ { + if (j > 0 || i > 0) && j%16 == 0 { + fmt.Println() + } + fmt.Printf("0x%04x,", uint16(sbox0[i])<<8|uint16(sbox1[j])) + } + } + fmt.Println() +``` diff --git a/zuc/core.go b/zuc/core.go index 0640bca..3b65cf0 100644 --- a/zuc/core.go +++ b/zuc/core.go @@ -116,24 +116,30 @@ func add31(x, y uint32) uint32 { } func (s *zucState32) enterInitMode(w uint32) { - v := s.lfsr[0] - v = add31(v, rotateLeft31(s.lfsr[0], 8)) - v = add31(v, rotateLeft31(s.lfsr[4], 20)) - v = add31(v, rotateLeft31(s.lfsr[10], 21)) - v = add31(v, rotateLeft31(s.lfsr[13], 17)) - v = add31(v, rotateLeft31(s.lfsr[15], 15)) - v = add31(v, w) - if v == 0 { - v = 0x7FFFFFFF + v := uint64(s.lfsr[15])<<15 + uint64(s.lfsr[13])<<17 + uint64(s.lfsr[10])<<21 + uint64(s.lfsr[4])<<20 + uint64(s.lfsr[0])<<8 + uint64(s.lfsr[0]) + v = (v & 0x7FFFFFFF) + (v >> 31) + t := add31(uint32(v), w) + + if t == 0 { + t = 0x7FFFFFFF } - for i := 0; i < 15; i++ { - s.lfsr[i] = s.lfsr[i+1] - } - s.lfsr[15] = v + var temp [16]uint32 + copy(temp[:], s.lfsr[1:]) + copy(s.lfsr[:], temp[:]) + s.lfsr[15] = t } func (s *zucState32) enterWorkMode() { - s.enterInitMode(0) + v := uint64(s.lfsr[15])<<15 + uint64(s.lfsr[13])<<17 + uint64(s.lfsr[10])<<21 + uint64(s.lfsr[4])<<20 + uint64(s.lfsr[0])<<8 + uint64(s.lfsr[0]) + v = (v & 0x7FFFFFFF) + (v >> 31) + + if v == 0 { + v = 0x7FFFFFFF + } + var temp [16]uint32 + copy(temp[:], s.lfsr[1:]) + copy(s.lfsr[:], temp[:]) + s.lfsr[15] = uint32(v) } func makeFieldValue3(a, b, c uint32) uint32 { @@ -201,7 +207,6 @@ func newZUCState(key, iv []byte) (*zucState32, error) { x := state.bitReconstruction() w := state.f32(x[0], x[1], x[2]) state.enterInitMode(w >> 1) - } // work state