diff --git a/sm3/sm3blocks_simd_amd64.s b/sm3/sm3blocks_simd_amd64.s index 7bf1e23..f7dcccd 100644 --- a/sm3/sm3blocks_simd_amd64.s +++ b/sm3/sm3blocks_simd_amd64.s @@ -12,7 +12,7 @@ DATA r08_mask<>+0x00(SB)/8, $0x0605040702010003 DATA r08_mask<>+0x08(SB)/8, $0x0E0D0C0F0A09080B GLOBL r08_mask<>(SB), 8, $16 -// Transpose matrix without PUNPCKHDQ/PUNPCKLDQ/PUNPCKHQDQ/PUNPCKLQDQ instructions, bad performance! +// Transpose matrix with PUNPCKHDQ/PUNPCKLDQ/PUNPCKHQDQ/PUNPCKLQDQ instructions. // input: from high to low // r0 = [w3, w2, w1, w0] // r1 = [w7, w6, w5, w4] diff --git a/sm4/aesni_macros_amd64.s b/sm4/aesni_macros_amd64.s index 4e0e6cc..e5044e5 100644 --- a/sm4/aesni_macros_amd64.s +++ b/sm4/aesni_macros_amd64.s @@ -57,7 +57,7 @@ DATA fk_mask<>+0x00(SB)/8, $0x56aa3350a3b1bac6 DATA fk_mask<>+0x08(SB)/8, $0xb27022dc677d9197 GLOBL fk_mask<>(SB), 8, $16 -// Transpose matrix without PUNPCKHDQ/PUNPCKLDQ/PUNPCKHQDQ/PUNPCKLQDQ instructions, bad performance! +// Transpose matrix with PUNPCKHDQ/PUNPCKLDQ/PUNPCKHQDQ/PUNPCKLQDQ instructions. // input: from high to low // r0 = [w3, w2, w1, w0] // r1 = [w7, w6, w5, w4]