diff --git a/sm4/gcm_amd64.s b/sm4/gcm_amd64.s index 9835759..d964838 100644 --- a/sm4/gcm_amd64.s +++ b/sm4/gcm_amd64.s @@ -297,21 +297,19 @@ TEXT ·gcmSm4Init(SB),NOSPLIT,$0 XORL CX, CX sm4InitEncLoop: - SM4_SINGLE_ROUND(0, RK, CX, T0, T1, T2, B0, B1, B2, B3) - SM4_SINGLE_ROUND(1, RK, CX, T0, T1, T2, B1, B2, B3, B0) - SM4_SINGLE_ROUND(2, RK, CX, T0, T1, T2, B2, B3, B0, B1) - SM4_SINGLE_ROUND(3, RK, CX, T0, T1, T2, B3, B0, B1, B2) + SM4_SINGLE_ROUND(0, RK, CX, T0, T1, T2, B3, B2, B1, B0) + SM4_SINGLE_ROUND(1, RK, CX, T0, T1, T2, B2, B1, B0, B3) + SM4_SINGLE_ROUND(2, RK, CX, T0, T1, T2, B1, B0, B3, B2) + SM4_SINGLE_ROUND(3, RK, CX, T0, T1, T2, B0, B3, B2, B1) ADDL $16, CX CMPL CX, $4*32 JB sm4InitEncLoop - PEXTRD $0, B1, R8 - PINSRD $1, R8, B0 - PEXTRD $0, B2, R8 - PINSRD $2, R8, B0 - PEXTRD $0, B3, R8 - PINSRD $3, R8, B0 + PALIGNR $4, B3, B3 + PALIGNR $4, B3, B2 + PALIGNR $4, B2, B1 + PALIGNR $4, B1, B0 // H * 2 PSHUFD $0xff, B0, T0