diff --git a/zuc/README.md b/zuc/README.md index e82b00d..41d4d11 100644 --- a/zuc/README.md +++ b/zuc/README.md @@ -63,3 +63,8 @@ func (s *zucState32) f32(x0, x1, x2 uint32) uint32 { cpu: Intel(R) Core(TM) i5-9500 CPU @ 3.00GHz BenchmarkHash1K-6 317750 3833 ns/op 267.13 MB/s BenchmarkHash8K-6 40460 28921 ns/op 283.26 MB/s + BenchmarkHash1K_Tag64-6 302163 3979 ns/op 257.34 MB/s + BenchmarkHash8K_Tag64-6 39210 30859 ns/op 265.46 MB/s + BenchmarkHash1K_Tag128-6 279069 4134 ns/op 247.70 MB/s + BenchmarkHash8K_Tag128-6 38238 31395 ns/op 260.93 MB/s + diff --git a/zuc/eia256.go b/zuc/eia256.go index fe55b40..123af21 100644 --- a/zuc/eia256.go +++ b/zuc/eia256.go @@ -83,7 +83,7 @@ func (m *ZUC256Mac) Reset() { m.genKeywords(m.k0[:4]) } -func (m *ZUC256Mac) block(p []byte) { +func block256Generic(m *ZUC256Mac, p []byte) { var k64, t64 uint64 if m.tagSize == 4 { t64 = uint64(m.t[0]) << 32 @@ -138,14 +138,14 @@ func (m *ZUC256Mac) Write(p []byte) (nn int, err error) { n := copy(m.x[m.nx:], p) m.nx += n if m.nx == chunk { - m.block(m.x[:]) + block256(m, m.x[:]) m.nx = 0 } p = p[n:] } if len(p) >= chunk { n := len(p) &^ (chunk - 1) - m.block(p[:n]) + block256(m, p[:n]) p = p[n:] } if len(p) > 0 { diff --git a/zuc/eia256_asm.go b/zuc/eia256_asm.go new file mode 100644 index 0000000..09c3ede --- /dev/null +++ b/zuc/eia256_asm.go @@ -0,0 +1,29 @@ +//go:build (amd64 && !generic) +// +build amd64,!generic + +package zuc + +//go:noescape +func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte) + +//go:noescape +func eia256RoundTag16(t *uint32, keyStream *uint32, p *byte) + +func block256(m *ZUC256Mac, p []byte) { + if supportsGFMUL { + for len(p) >= chunk { + m.genKeywords(m.k0[4:]) + switch m.tagSize { + case 8: + eia256RoundTag8(&m.t[0], &m.k0[0], &p[0]) + case 16: + eia256RoundTag16(&m.t[0], &m.k0[0], &p[0]) + default: + eia3Round16B(&m.t[0], &m.k0[0], &p[0], m.tagSize) + } + p = p[chunk:] + } + } else { + block256Generic(m, p) + } +} diff --git a/zuc/eia256_asm_amd64.s b/zuc/eia256_asm_amd64.s new file mode 100644 index 0000000..e830d55 --- /dev/null +++ b/zuc/eia256_asm_amd64.s @@ -0,0 +1,448 @@ +// Referenced https://github.com/intel/intel-ipsec-mb/ +//go:build amd64 && !generic +// +build amd64,!generic + +#include "textflag.h" + +DATA bit_reverse_table_l<>+0x00(SB)/8, $0x0e060a020c040800 +DATA bit_reverse_table_l<>+0x08(SB)/8, $0x0f070b030d050901 +GLOBL bit_reverse_table_l<>(SB), RODATA, $16 + +DATA bit_reverse_table_h<>+0x00(SB)/8, $0xe060a020c0408000 +DATA bit_reverse_table_h<>+0x08(SB)/8, $0xf070b030d0509010 +GLOBL bit_reverse_table_h<>(SB), RODATA, $16 + +DATA bit_reverse_and_table<>+0x00(SB)/8, $0x0f0f0f0f0f0f0f0f +DATA bit_reverse_and_table<>+0x08(SB)/8, $0x0f0f0f0f0f0f0f0f +GLOBL bit_reverse_and_table<>(SB), RODATA, $16 + +DATA shuf_mask_dw0_0_dw1_0<>+0x00(SB)/8, $0xffffffff03020100 +DATA shuf_mask_dw0_0_dw1_0<>+0x08(SB)/8, $0xffffffff07060504 +GLOBL shuf_mask_dw0_0_dw1_0<>(SB), RODATA, $16 + +DATA shuf_mask_0_0_dw1_0<>+0x00(SB)/8, $0xffffffffffffffff +DATA shuf_mask_0_0_dw1_0<>+0x08(SB)/8, $0xffffffff07060504 +GLOBL shuf_mask_0_0_dw1_0<>(SB), RODATA, $16 + +DATA shuf_mask_0_0_0_dw1<>+0x00(SB)/8, $0xffffffffffffffff +DATA shuf_mask_0_0_0_dw1<>+0x08(SB)/8, $0x07060504ffffffff +GLOBL shuf_mask_0_0_0_dw1<>(SB), RODATA, $16 + +DATA shuf_mask_dw2_0_dw3_0<>+0x00(SB)/8, $0xffffffff0b0a0908 +DATA shuf_mask_dw2_0_dw3_0<>+0x08(SB)/8, $0xffffffff0f0e0d0c +GLOBL shuf_mask_dw2_0_dw3_0<>(SB), RODATA, $16 + +DATA bits_32_63<>+0x00(SB)/8, $0xffffffff00000000 +DATA bits_32_63<>+0x08(SB)/8, $0x0000000000000000 +GLOBL bits_32_63<>(SB), RODATA, $16 + + +#define XTMP1 X1 +#define XTMP2 X2 +#define XTMP3 X3 +#define XTMP4 X4 +#define XTMP5 X5 +#define XTMP6 X6 +#define XDATA X7 +#define XDIGEST X8 +#define KS_L X9 +#define KS_M1 X10 +#define KS_M2 X11 +#define KS_H X12 + +// func eia256RoundTag8(t *uint32, keyStream *uint32, p *byte) +TEXT ·eia256RoundTag8(SB),NOSPLIT,$0 + MOVQ t+0(FP), AX + MOVQ ks+8(FP), BX + MOVQ p+16(FP), CX + + CMPB ·useAVX(SB), $1 + JE avx + + // Reverse data bytes + MOVUPS (0)(CX), XDATA + MOVOU bit_reverse_and_table<>(SB), XTMP4 + MOVOU XDATA, XTMP2 + PAND XTMP4, XTMP2 + + PANDN XDATA, XTMP4 + PSRLQ $4, XTMP4 + + MOVOU bit_reverse_table_h<>(SB), XTMP3 + PSHUFB XTMP2, XTMP3 + + MOVOU bit_reverse_table_l<>(SB), XTMP1 + PSHUFB XTMP4, XTMP1 + + PXOR XTMP1, XTMP3 // XTMP3 - bit reverse data bytes + + // ZUC authentication part, 4x32 data bits + // setup KS + MOVUPS (0*4)(BX), XTMP1 + MOVUPS (2*4)(BX), XTMP2 + MOVUPS (4*4)(BX), XTMP4 + PSHUFD $0x61, XTMP1, KS_L // KS bits [63:32 31:0 95:64 63:32] + PSHUFD $0x61, XTMP2, KS_M1 // KS bits [127:96 95:64 159:128 127:96] + PSHUFD $0x61, XTMP4, KS_M2 // KS bits [191:160 159:128 223:192 191:160] + + // setup DATA + MOVOU XTMP3, XTMP1 + PSHUFB shuf_mask_dw0_0_dw1_0<>(SB), XTMP1 + MOVOU XTMP1, XTMP2 // XTMP1/2 - Data bits [31:0 0s 63:32 0s] + + PSHUFB shuf_mask_dw2_0_dw3_0<>(SB), XTMP3 + MOVOU XTMP3, XDIGEST // XDIGEST/XTMP3 - Data bits [95:64 0s 127:96 0s] + + // clmul + // xor the results from 4 32-bit words together + // Save data for following products + MOVOU XTMP2, XTMP5 // Data bits [31:0 0s 63:32 0s] + MOVOU XTMP3, XTMP6 // Data bits [95:64 0s 127:96 0s] + + // Calculate lower 32 bits of tag + PCLMULQDQ $0x00, KS_L, XTMP1 + PCLMULQDQ $0x11, KS_L, XTMP2 + PCLMULQDQ $0x00, KS_M1, XDIGEST + PCLMULQDQ $0x11, KS_M1, XTMP3 + + // XOR all products and move bits 63-32 bits to lower 32 bits + PXOR XTMP1, XTMP2 + PXOR XTMP3, XDIGEST + PXOR XTMP2, XDIGEST + MOVQ XDIGEST, XDIGEST // Clear top 64 bits + PSRLDQ $4, XDIGEST + + // Prepare data and calculate bits 63-32 of tag + MOVOU XTMP5, XTMP1 + MOVOU XTMP5, XTMP2 + MOVOU XTMP6, XTMP3 + MOVOU XTMP6, XTMP4 + + PCLMULQDQ $0x10, KS_L, XTMP1 + PCLMULQDQ $0x01, KS_M1, XTMP2 + PCLMULQDQ $0x10, KS_M1, XTMP3 + PCLMULQDQ $0x01, KS_M2, XTMP4 + + // XOR all the products and keep only bits 63-32 + PXOR XTMP2, XTMP1 + PXOR XTMP4, XTMP3 + PXOR XTMP3, XTMP1 + PAND bits_32_63<>(SB), XTMP1 + + // OR with lower 32 bits, to construct 64 bits of tag + POR XTMP1, XDIGEST + + // Update tag + MOVQ XDIGEST, R10 + XORQ R10, (AX) + + // Copy last 16 bytes of KS to the front + MOVUPS (4*4)(BX), XTMP1 + MOVUPS XTMP1, (0*4)(BX) + + RET + +avx: + VMOVDQU (0)(CX), XDATA + + // Reverse data bytes + VMOVDQU bit_reverse_and_table<>(SB), XTMP1 + VPAND XTMP1, XDATA, XTMP2 + VPANDN XDATA, XTMP1, XTMP3 + VPSRLD $4, XTMP3, XTMP3 + + VMOVDQU bit_reverse_table_h<>(SB), XTMP1 + VPSHUFB XTMP2, XTMP1, XTMP4 + VMOVDQU bit_reverse_table_l<>(SB), XTMP1 + VPSHUFB XTMP3, XTMP1, XTMP1 + VPOR XTMP1, XTMP4, XTMP4 + + // ZUC authentication part, 4x32 data bits + // setup KS + VPSHUFD $0x61, (0*4)(BX), KS_L // KS bits [63:32 31:0 95:64 63:32] + VPSHUFD $0x61, (2*4)(BX), KS_M1 // KS bits [63:32 31:0 95:64 63:32] + VPSHUFD $0x61, (4*4)(BX), KS_M2 // KS bits [191:160 159:128 223:192 191:160] + + // setup DATA + // Data bytes [31:0 0s 63:32 0s] + VPSHUFB shuf_mask_dw0_0_dw1_0<>(SB), XTMP4, XTMP1 + // Data bytes [95:64 0s 127:96 0s] + VPSHUFB shuf_mask_dw2_0_dw3_0<>(SB), XTMP4, XTMP2 + + + // clmul + // xor the results from 4 32-bit words together + // Calculate lower 32 bits of tag + VPCLMULQDQ $0x00, KS_L, XTMP1, XTMP3 + VPCLMULQDQ $0x11, KS_L, XTMP1, XTMP4 + VPCLMULQDQ $0x00, KS_M1, XTMP2, XTMP5 + VPCLMULQDQ $0x11, KS_M1, XTMP2, XTMP6 + + VPXOR XTMP3, XTMP4, XTMP3 + VPXOR XTMP5, XTMP6, XTMP5 + VPXOR XTMP3, XTMP5, XTMP3 + + // Move previous result to low 32 bits and XOR with previous digest + VMOVQ XTMP3, XTMP3 // Clear top 64 bits + VPSRLDQ $4, XTMP3, XDIGEST + + VPCLMULQDQ $0x10, KS_L, XTMP1, XTMP3 + VPCLMULQDQ $0x01, KS_M1, XTMP1, XTMP4 + VPCLMULQDQ $0x10, KS_M1, XTMP2, XTMP5 + VPCLMULQDQ $0x01, KS_M2, XTMP2, XTMP6 + + // XOR all the products and keep only 32-63 bits + VPXOR XTMP4, XTMP3, XTMP3 + VPXOR XTMP6, XTMP5, XTMP5 + VPXOR XTMP5, XTMP3, XTMP3 + VPAND bits_32_63<>(SB), XTMP3, XTMP3 + + // XOR with bits 32-63 of previous digest + VPXOR XTMP3, XDIGEST, XDIGEST + + // Update tag + VMOVQ XDIGEST, R10 + XORQ R10, (AX) + + // Copy last 16 bytes of KS to the front + VMOVDQU (4*4)(BX), XTMP1 + VMOVDQU XTMP1, (0*4)(BX) + + VZEROUPPER + RET + +// func eia256RoundTag16(t *uint32, keyStream *uint32, p *byte) +TEXT ·eia256RoundTag16(SB),NOSPLIT,$0 + MOVQ t+0(FP), AX + MOVQ ks+8(FP), BX + MOVQ p+16(FP), CX + + CMPB ·useAVX(SB), $1 + JE avx + + // Reverse data bytes + MOVUPS (0)(CX), XDATA + MOVOU bit_reverse_and_table<>(SB), XTMP4 + MOVOU XDATA, XTMP2 + PAND XTMP4, XTMP2 + + PANDN XDATA, XTMP4 + PSRLQ $4, XTMP4 + + MOVOU bit_reverse_table_h<>(SB), XTMP3 + PSHUFB XTMP2, XTMP3 + + MOVOU bit_reverse_table_l<>(SB), XTMP1 + PSHUFB XTMP4, XTMP1 + + PXOR XTMP1, XTMP3 // XTMP3 - bit reverse data bytes + + // ZUC authentication part, 4x32 data bits + // setup KS + MOVUPS (0*4)(BX), XTMP1 + MOVUPS (2*4)(BX), XTMP2 + MOVUPS (4*4)(BX), XTMP4 + PSHUFD $0x61, XTMP1, KS_L // KS bits [63:32 31:0 95:64 63:32] + PSHUFD $0x61, XTMP2, KS_M1 // KS bits [127:96 95:64 159:128 127:96] + PSHUFD $0x61, XTMP4, KS_M2 // KS bits [191:160 159:128 223:192 191:160] + PSHUFD $0xBB, XTMP4, KS_H // KS bits [255:224 223:192 255:224 223:192] + + // setup DATA + MOVOU XTMP3, XTMP1 + PSHUFB shuf_mask_dw0_0_dw1_0<>(SB), XTMP1 + MOVOU XTMP1, XTMP2 // XTMP1/2 - Data bits [31:0 0s 63:32 0s] + + PSHUFB shuf_mask_dw2_0_dw3_0<>(SB), XTMP3 + MOVOU XTMP3, XDIGEST // XDIGEST/XTMP3 - Data bits [95:64 0s 127:96 0s] + + // clmul + // xor the results from 4 32-bit words together + // Save data for following products + MOVOU XTMP2, XTMP5 // Data bits [31:0 0s 63:32 0s] + MOVOU XTMP3, XTMP6 // Data bits [95:64 0s 127:96 0s] + + // Calculate lower 32 bits of tag + PCLMULQDQ $0x00, KS_L, XTMP1 + PCLMULQDQ $0x11, KS_L, XTMP2 + PCLMULQDQ $0x00, KS_M1, XDIGEST + PCLMULQDQ $0x11, KS_M1, XTMP3 + + // XOR all products and move bits 63-32 bits to lower 32 bits + PXOR XTMP1, XTMP2 + PXOR XTMP3, XDIGEST + PXOR XTMP2, XDIGEST + MOVQ XDIGEST, XDIGEST // Clear top 64 bits + PSRLDQ $4, XDIGEST + + // Prepare data and calculate bits 63-32 of tag + MOVOU XTMP5, XTMP1 + MOVOU XTMP5, XTMP2 + MOVOU XTMP6, XTMP3 + MOVOU XTMP6, XTMP4 + + PCLMULQDQ $0x10, KS_L, XTMP1 + PCLMULQDQ $0x01, KS_M1, XTMP2 + PCLMULQDQ $0x10, KS_M1, XTMP3 + PCLMULQDQ $0x01, KS_M2, XTMP4 + + // XOR all the products and keep only bits 63-32 + PXOR XTMP2, XTMP1 + PXOR XTMP4, XTMP3 + PXOR XTMP3, XTMP1 + PAND bits_32_63<>(SB), XTMP1 + + // OR with lower 32 bits, to construct 64 bits of tag + POR XTMP1, XDIGEST + + // Prepare data and calculate bits 95-64 of tag + MOVOU XTMP5, XTMP1 + MOVOU XTMP5, XTMP2 + MOVOU XTMP6, XTMP3 + MOVOU XTMP6, XTMP4 + + PCLMULQDQ $0x00, KS_M1, XTMP1 + PCLMULQDQ $0x11, KS_M1, XTMP2 + PCLMULQDQ $0x00, KS_M2, XTMP3 + PCLMULQDQ $0x11, KS_M2, XTMP4 + + // XOR all the products and move bits 63-32 to bits 95-64 + PXOR XTMP2, XTMP1 + PXOR XTMP4, XTMP3 + PXOR XTMP3, XTMP1 + PSHUFB shuf_mask_0_0_dw1_0<>(SB), XTMP1 + + // OR with lower 64 bits, to construct 96 bits of tag + POR XTMP1, XDIGEST + + // Prepare data and calculate bits 127-96 of tag + MOVOU XTMP5, XTMP1 + MOVOU XTMP5, XTMP2 + MOVOU XTMP6, XTMP3 + MOVOU XTMP6, XTMP4 + + PCLMULQDQ $0x10, KS_M1, XTMP1 + PCLMULQDQ $0x01, KS_M2, XTMP2 + PCLMULQDQ $0x10, KS_M2, XTMP3 + PCLMULQDQ $0x01, KS_H, XTMP4 + + // XOR all the products and move bits 63-32 to bits 127-96 + PXOR XTMP2, XTMP1 + PXOR XTMP4, XTMP3 + PXOR XTMP3, XTMP1 + PSHUFB shuf_mask_0_0_0_dw1<>(SB), XTMP1 + + // OR with lower 96 bits, to construct 128 bits of tag + POR XTMP1, XDIGEST + + // Update tag + MOVUPS (AX), XTMP1 + PXOR XTMP1, XDIGEST + MOVUPS XDIGEST, (AX) + + // Copy last 16 bytes of KS to the front + MOVUPS (4*4)(BX), XTMP1 + MOVUPS XTMP1, (0*4)(BX) + + RET + +avx: + VMOVDQU (0)(CX), XDATA + + // Reverse data bytes + VMOVDQU bit_reverse_and_table<>(SB), XTMP1 + VPAND XTMP1, XDATA, XTMP2 + VPANDN XDATA, XTMP1, XTMP3 + VPSRLD $4, XTMP3, XTMP3 + + VMOVDQU bit_reverse_table_h<>(SB), XTMP1 + VPSHUFB XTMP2, XTMP1, XTMP4 + VMOVDQU bit_reverse_table_l<>(SB), XTMP1 + VPSHUFB XTMP3, XTMP1, XTMP1 + VPOR XTMP1, XTMP4, XTMP4 + + // ZUC authentication part, 4x32 data bits + // setup KS + VPSHUFD $0x61, (0*4)(BX), KS_L // KS bits [63:32 31:0 95:64 63:32] + VPSHUFD $0x61, (2*4)(BX), KS_M1 // KS bits [63:32 31:0 95:64 63:32] + VPSHUFD $0x61, (4*4)(BX), KS_M2 // KS bits [191:160 159:128 223:192 191:160] + VPSHUFD $0xBB, (4*4)(BX), KS_H // KS bits [255:224 223:192 255:224 223:192] + + // setup DATA + // Data bytes [31:0 0s 63:32 0s] + VPSHUFB shuf_mask_dw0_0_dw1_0<>(SB), XTMP4, XTMP1 + // Data bytes [95:64 0s 127:96 0s] + VPSHUFB shuf_mask_dw2_0_dw3_0<>(SB), XTMP4, XTMP2 + + + // clmul + // xor the results from 4 32-bit words together + // Calculate lower 32 bits of tag + VPCLMULQDQ $0x00, KS_L, XTMP1, XTMP3 + VPCLMULQDQ $0x11, KS_L, XTMP1, XTMP4 + VPCLMULQDQ $0x00, KS_M1, XTMP2, XTMP5 + VPCLMULQDQ $0x11, KS_M1, XTMP2, XTMP6 + + VPXOR XTMP3, XTMP4, XTMP3 + VPXOR XTMP5, XTMP6, XTMP5 + VPXOR XTMP3, XTMP5, XTMP3 + + // Move previous result to low 32 bits and XOR with previous digest + VMOVQ XTMP3, XTMP3 // Clear top 64 bits + VPSRLDQ $4, XTMP3, XDIGEST + + VPCLMULQDQ $0x10, KS_L, XTMP1, XTMP3 + VPCLMULQDQ $0x01, KS_M1, XTMP1, XTMP4 + VPCLMULQDQ $0x10, KS_M1, XTMP2, XTMP5 + VPCLMULQDQ $0x01, KS_M2, XTMP2, XTMP6 + + // XOR all the products and keep only 32-63 bits + VPXOR XTMP4, XTMP3, XTMP3 + VPXOR XTMP6, XTMP5, XTMP5 + VPXOR XTMP5, XTMP3, XTMP3 + VPAND bits_32_63<>(SB), XTMP3, XTMP3 + + // XOR with bits 32-63 of previous digest + VPXOR XTMP3, XDIGEST, XDIGEST + + // Prepare data and calculate bits 95-64 of tag + VPCLMULQDQ $0x00, KS_M1, XTMP1, XTMP3 + VPCLMULQDQ $0x11, KS_M1, XTMP1, XTMP4 + VPCLMULQDQ $0x00, KS_M2, XTMP2, XTMP5 + VPCLMULQDQ $0x11, KS_M2, XTMP2, XTMP6 + + // XOR all the products and move bits 63-32 to bits 95-64 + VPXOR XTMP4, XTMP3, XTMP3 + VPXOR XTMP6, XTMP5, XTMP5 + VPXOR XTMP5, XTMP3, XTMP3 + + VPSHUFB shuf_mask_0_0_dw1_0<>(SB), XTMP3, XTMP3 + + // XOR with previous bits 64-95 of previous digest + VPXOR XTMP3, XDIGEST, XDIGEST + + // Prepare data and calculate bits 127-96 of tag + VPCLMULQDQ $0x10, KS_M1, XTMP1, XTMP3 + VPCLMULQDQ $0x01, KS_M2, XTMP1, XTMP4 + VPCLMULQDQ $0x10, KS_M2, XTMP2, XTMP5 + VPCLMULQDQ $0x01, KS_H, XTMP2, XTMP6 + + // XOR all the products and move bits 63-32 to bits 127-96 + VPXOR XTMP4, XTMP3, XTMP3 + VPXOR XTMP6, XTMP5, XTMP5 + VPXOR XTMP5, XTMP3, XTMP3 + + VPSHUFB shuf_mask_0_0_0_dw1<>(SB), XTMP3, XTMP3 + + // XOR with previous bits 64-95 of previous digest + VPXOR XTMP3, XDIGEST, XDIGEST + + // Update tag + VPXOR (AX), XDIGEST, XDIGEST + VMOVDQA XDIGEST, (AX) + + // Copy last 16 bytes of KS to the front + VMOVDQU (4*4)(BX), XTMP1 + VMOVDQU XTMP1, (0*4)(BX) + + VZEROUPPER + RET diff --git a/zuc/eia256_generic.go b/zuc/eia256_generic.go new file mode 100644 index 0000000..66598f1 --- /dev/null +++ b/zuc/eia256_generic.go @@ -0,0 +1,8 @@ +//go:build !amd64 || generic +// +build !amd64 generic + +package zuc + +func block256(m *ZUC256Mac, p []byte) { + block256Generic(m, p) +} diff --git a/zuc/eia_asm.go b/zuc/eia_asm.go index e084f5f..f6ad22a 100644 --- a/zuc/eia_asm.go +++ b/zuc/eia_asm.go @@ -6,7 +6,6 @@ package zuc import "golang.org/x/sys/cpu" var supportsGFMUL = cpu.X86.HasPCLMULQDQ || cpu.ARM64.HasPMULL -var useAVX2 = cpu.X86.HasAVX2 && cpu.X86.HasBMI2 //go:noescape func eia3Round16B(t *uint32, keyStream *uint32, p *byte, tagSize int) diff --git a/zuc/eia_asm_amd64.s b/zuc/eia_asm_amd64.s index 10433fc..9f500b4 100644 --- a/zuc/eia_asm_amd64.s +++ b/zuc/eia_asm_amd64.s @@ -44,8 +44,8 @@ TEXT ·eia3Round16B(SB),NOSPLIT,$0 MOVQ p+16(FP), CX MOVQ tagSize+24(FP), DX - CMPB ·useAVX2(SB), $1 - JE avx2 + CMPB ·useAVX(SB), $1 + JE avx // Reverse data bytes MOVUPS (0)(CX), XDATA @@ -103,7 +103,7 @@ TEXT ·eia3Round16B(SB),NOSPLIT,$0 RET -avx2: +avx: VMOVDQU (0)(CX), XDATA // Reverse data bytes diff --git a/zuc/eia_test.go b/zuc/eia_test.go index b7d9d5b..f512656 100644 --- a/zuc/eia_test.go +++ b/zuc/eia_test.go @@ -150,3 +150,57 @@ func Test_Finish(t *testing.T) { } } } + +func TestNewHash(t *testing.T) { + key := make([]byte, 16) + iv := make([]byte, 16) + _, err := NewHash(key[:1], iv) + if err == nil { + t.Fatal("error is expected") + } + + _, err = NewHash(key, iv[:1]) + if err == nil { + t.Fatal("error is expected") + } + + h, err := NewHash(key, iv) + if err != nil { + t.Fatal(err) + } + if h.Size() != 4 { + t.Fatal("eia3 mac size should be 4 bytes") + } + if h.BlockSize() != 16 { + t.Fatal("current eia3 implementation's block size should be 16 bytes") + } + +} + +func TestSum(t *testing.T) { + expected := "6c2db416" + h, err := NewEIAHash(zucEIATests[1].key, zucEIATests[1].count, zucEIATests[1].bearer, zucEIATests[1].direction) + if err != nil { + t.Fatal(err) + } + _, err = h.Write([]byte("emmansun")) + if err != nil { + t.Fatal(err) + } + _, err = h.Write([]byte("shangmi1")) + if err != nil { + t.Fatal(err) + } + _, err = h.Write([]byte("emmansun shangmi")) + if err != nil { + t.Fatal(err) + } + _, err = h.Write([]byte("emmansun shangmi 1234")) + if err != nil { + t.Fatal(err) + } + mac := h.Sum(nil) + if hex.EncodeToString(mac) != expected { + t.Errorf("expected=%s, result=%s\n", expected, hex.EncodeToString(mac)) + } +}