diff --git a/SM4性能优化.md b/SM4性能优化.md index 86f4cd7..910f314 100644 --- a/SM4性能优化.md +++ b/SM4性能优化.md @@ -165,3 +165,37 @@ Golang没提供这两种模式的优化接口,可能这两种模式不怎么 BenchmarkSM4GCMSeal8K-6 36162 33197 ns/op 246.77 MB/s 0 B/op 0 allocs/op BenchmarkAESGCMOpen8K-6 944479 1325 ns/op 6183.50 MB/s 0 B/op 0 allocs/op BenchmarkSM4GCMOpen8K-6 36162 33197 ns/op 246.77 MB/s 0 B/op 0 allocs/op + +AES 128的加密代码(amd64), 没有32轮loop,这种性能差别也不奇怪 + + // func encryptBlockAsm(nr int, xk *uint32, dst, src *byte) + TEXT ·encryptBlockAsm(SB),NOSPLIT,$0 + MOVQ xk+8(FP), AX + MOVQ dst+16(FP), DX + MOVQ src+24(FP), BX + MOVUPS 0(AX), X1 + MOVUPS 0(BX), X0 + ADDQ $16, AX + PXOR X1, X0 + MOVUPS 0(AX), X1 + AESENC X1, X0 + MOVUPS 16(AX), X1 + AESENC X1, X0 + MOVUPS 32(AX), X1 + AESENC X1, X0 + MOVUPS 48(AX), X1 + AESENC X1, X0 + MOVUPS 64(AX), X1 + AESENC X1, X0 + MOVUPS 80(AX), X1 + AESENC X1, X0 + MOVUPS 96(AX), X1 + AESENC X1, X0 + MOVUPS 112(AX), X1 + AESENC X1, X0 + MOVUPS 128(AX), X1 + AESENC X1, X0 + MOVUPS 144(AX), X1 + AESENCLAST X1, X0 + MOVUPS X0, 0(DX) + RET \ No newline at end of file