diff --git a/sm3/sm3block_amd64.s b/sm3/sm3block_amd64.s index 438ba7d..f056a67 100644 --- a/sm3/sm3block_amd64.s +++ b/sm3/sm3block_amd64.s @@ -214,15 +214,15 @@ ADDL (disp + 0*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W VPSRLD $(32-7), XTMP0, XTMP0; \ ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' VPOR XTMP0, XTMP1, XTMP1; \ // XTMP1 = W[-13] rol 7 ; \ - MOVL a, y1; \ - XORL b, y1; \ - XORL c, y1; \ + MOVL a, h; \ + XORL b, h; \ + XORL c, h; \ VPALIGNR $8, XDWORD2, XDWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10} - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] XOR (W[-13] rol 7) @@ -233,15 +233,13 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $(-9), y2, y0; \ VPXOR XDWORD0, XTMP1, XTMP1; \ // XTMP1 = W[-9] XOR W[-16] - RORXL $(-17), y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ + RORXL $(-17), y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) VPSHUFD $0xA5, XDWORD3, XTMP2; \ // XTMP2 = W[-3] {BBAA} {w14,w14,w13,w13} - MOVL y2, d // d = P(tt2) #define ROUND_AND_SCHED_N_0_1(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 1 ############################// @@ -256,14 +254,14 @@ ADDL (disp + 1*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W VPXOR XTMP1, XTMP2, XTMP2; \ // XTMP2 = W[-9] XOR W[-16] XOR (W[-3] rol 15) {xxBA} - ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ + MOVL a, h; \ + XORL b, h; \ VPSLLD $15, XTMP2, XTMP3; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ VPSRLD $(32-15), XTMP2, XTMP4; \ @@ -275,15 +273,13 @@ ROLL $9, b; \ ROLL $19, f; \ VPXOR XTMP2, XTMP4, XTMP4; \ // XTMP4 = XTMP2 XOR (XTMP2 rol 15 {xxBA}) - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ + RORXL $-17, y2, d; \ VPSLLD $23, XTMP2, XTMP3; \ - XORL y0, y2; \ - XORL y1, y2; \ - VPSRLD $(32-23), XTMP2, XTMP5; \ - MOVL y2, d // d = P(tt2) + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) + VPSRLD $(32-23), XTMP2, XTMP5; #define ROUND_AND_SCHED_N_0_2(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 2 ############################// @@ -298,14 +294,14 @@ ADDL (disp + 2*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W VPXOR XTMP4, XTMP0, XTMP2; \ // XTMP2 = {..., ..., W[1], W[0]} - ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ + MOVL a, h; \ VPALIGNR $12, XDWORD3, XTMP2, XTMP3; \ // XTMP3 = {..., W[1], W[0], w15} - XORL b, y1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + XORL b, h; \ + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 VPSHUFD $80, XTMP3, XTMP4; \ // XTMP4 = = W[-3] {DDCC} ; \ MOVL e, y1; \ @@ -317,15 +313,13 @@ ROLL $9, b; \ ROLL $19, f; \ VPSHUFB shuff_DC00<>(SB), XTMP4, XTMP4; \ // XTMP4 = W[-3] rol 15 {DC00} - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ + RORXL $-17, y2, d; \ VPXOR XTMP1, XTMP4, XTMP4; \ // XTMP4 = W[-9] XOR W[-16] XOR (W[-3] rol 15) {DCxx} - XORL y0, y2; \ - XORL y1, y2; \ - VPSLLD $15, XTMP4, XTMP5; \ - MOVL y2, d // d = P(tt2) + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) + VPSLLD $15, XTMP4, XTMP5; #define ROUND_AND_SCHED_N_0_3(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 3 ############################// @@ -344,11 +338,11 @@ VPSLLD $23, XTMP4, XTMP5; \ ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ + MOVL a, h; \ + XORL b, h; \ VPSRLD $(32-23), XTMP4, XTMP1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ VPOR XTMP1, XTMP5, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCxx} MOVL e, y1; \ @@ -360,15 +354,13 @@ ROLL $9, b; \ VPXOR XTMP1, XTMP0, XTMP1; \ // XTMP1 = {W[3], W[2], ..., ...} ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ VPALIGNR $8, XTMP1, XTMP2, XTMP3; \ // XTMP3 = {W[1], W[0], W[3], W[2]} - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) VPSHUFD $0x4E, XTMP3, XDWORD0; \ // XDWORD0 = {W[3], W[2], W[1], W[0]} - MOVL y2, d // d = P(tt2) #define ROUND_AND_SCHED_N_1_0(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 0 ############################// @@ -382,7 +374,7 @@ VPSLLD $7, XTMP0, XTMP1; \ ADDL (disp + 0*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' VPSRLD $(32-7), XTMP0, XTMP0; \ ; \ @@ -393,10 +385,10 @@ ANDL c, y1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) VPALIGNR $8, XDWORD2, XDWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10} - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] XOR (W[-13] rol 7) MOVL e, y1; \ @@ -411,14 +403,12 @@ ROLL $9, b; \ ROLL $19, f; \ VPXOR XDWORD0, XTMP1, XTMP1; \ // XTMP1 = W[-9] XOR W[-16] - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) VPSHUFD $0xA5, XDWORD3, XTMP2; \ // XTMP2 = W[-3] {BBAA} {w14,w14,w13,w13} - MOVL y2, d // d = P(tt2) #define ROUND_AND_SCHED_N_1_1(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 1 ############################// @@ -433,7 +423,7 @@ ADDL (disp + 1*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W VPXOR XTMP1, XTMP2, XTMP2; \ // XTMP2 = W[-9] XOR W[-16] XOR (W[-3] rol 15) {xxBA} - ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ MOVL a, y1; \ @@ -443,11 +433,11 @@ ANDL c, y1; \ VPSRLD $(32-15), XTMP2, XTMP4; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) VPOR XTMP3, XTMP4, XTMP4; \ // XTMP4 = XTMP2 rol 15 {xxBA} - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ MOVL f, y3; \ @@ -461,14 +451,12 @@ ROLL $9, b; \ ROLL $19, f; \ VPSLLD $23, XTMP2, XTMP3; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ VPSRLD $(32-23), XTMP2, XTMP5; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + XORL y2, d; \ // d = P(tt2) #define ROUND_AND_SCHED_N_1_2(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 2 ############################// @@ -482,7 +470,7 @@ VPXOR XTMP4, XTMP5, XTMP4; \ // XTMP4 = XTMP2 XOR (XTMP2 rol 15 {xxBA}) XOR (XTMP2 rol 23 {xxBA}) ADDL (disp + 2*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' VPXOR XTMP4, XTMP0, XTMP2; \ // XTMP2 = {..., ..., W[1], W[0]} ADDL d, y0; \ // y0 = d + SS2 + W' ; \ @@ -493,10 +481,10 @@ ANDL c, y1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) VPSHUFD $80, XTMP3, XTMP4; \ // XTMP4 = = W[-3] {DDCC} - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 VPSLLQ $15, XTMP4, XTMP4; \ // XTMP4 = W[-3] rol 15 {DxCx} ; \ MOVL e, y1; \ @@ -511,15 +499,13 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 VPSLLD $15, XTMP4, XTMP5; \ ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) VPSRLD $(32-15), XTMP4, XTMP3; \ - MOVL y2, d // d = P(tt2) #define ROUND_AND_SCHED_N_1_3(disp, const, a, b, c, d, e, f, g, h, XDWORD0, XDWORD1, XDWORD2, XDWORD3) \ ; \ // ############################# RND N + 3 ############################// @@ -533,7 +519,7 @@ ADDL (disp + 3*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 XOR (XTMP4 rol 15 {DCxx}) ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ MOVL a, y1; \ @@ -543,10 +529,10 @@ ANDL c, y1; \ VPSRLD $(32-23), XTMP4, XTMP1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 VPOR XTMP1, XTMP5, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCxx} ; \ MOVL e, y1; \ @@ -561,15 +547,13 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 VPALIGNR $8, XTMP1, XTMP2, XTMP3; \ // XTMP3 = {W[1], W[0], W[3], W[2]} ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) VPSHUFD $0x4E, XTMP3, XDWORD0; \ // XDWORD0 = {W[3], W[2], W[1], W[0]} - MOVL y2, d // d = P(tt2) #define ROUND_N_0_0(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 0 ############################// @@ -581,13 +565,13 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 0*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL a, h; \ + XORL b, h; \ + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ XORL f, y1; \ @@ -596,13 +580,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_0_1(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 1 ############################// @@ -614,13 +596,13 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 1*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL a, h; \ + XORL b, h; \ + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ XORL f, y1; \ @@ -629,13 +611,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_0_2(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 2 ############################// @@ -647,13 +627,13 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 2*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL a, h; \ + XORL b, h; \ + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ XORL f, y1; \ @@ -662,13 +642,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_0_3(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 3 ############################// @@ -680,13 +658,13 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 3*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - XORL b, y1; \ - XORL c, y1; \ - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL a, h; \ + XORL b, h; \ + XORL c, h; \ + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ XORL f, y1; \ @@ -695,13 +673,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_1_0(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 0 ############################// @@ -713,7 +689,7 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 0*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 0*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ MOVL a, y1; \ @@ -721,10 +697,10 @@ ANDL y1, y3; \ ANDL c, y1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ MOVL f, y3; \ @@ -736,54 +712,50 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_1_1(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 1 ############################// RORXL $-12, a, y0; \ // y0 = a <<< 12 - MOVL e, y1; \ - ADDL $const, y1; \ - ADDL y0, y1; \ // y1 = a <<< 12 + e + T - RORXL $-7, y1, y2; \ // y2 = SS1 - XORL y2, y0 \ // y0 = SS2 - ADDL (disp + 1*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W - ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' - ADDL d, y0; \ // y0 = d + SS2 + W' + MOVL e, y1; \ + ADDL $const, y1; \ + ADDL y0, y1; \ // y1 = a <<< 12 + e + T + RORXL $-7, y1, y2; \ // y2 = SS1 + XORL y2, y0 \ // y0 = SS2 + ADDL (disp + 1*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W + ADDL h, y2; \ // y2 = h + SS1 + W + ADDL (disp + 1*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' + ADDL d, y0; \ // y0 = d + SS2 + W' ; \ - MOVL a, y1; \ - MOVL b, y3; \ - ANDL y1, y3; \ - ANDL c, y1; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 - ; \ - MOVL e, y1; \ - MOVL f, y3; \ - ANDL y1, y3; \ // y3 = e AND f - NOTL y1; \ - ANDL g, y1; \ // y1 = NOT(e) AND g - ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g) - ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2 - ; \ - ROLL $9, b; \ - ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 - ; \ - RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + MOVL a, y1; \ + MOVL b, y3; \ + ANDL y1, y3; \ + ANDL c, y1; \ + ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 + ; \ + MOVL e, y1; \ + MOVL f, y3; \ + ANDL y1, y3; \ // y3 = e AND f + NOTL y1; \ + ANDL g, y1; \ // y1 = NOT(e) AND g + ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g) + ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2 + ; \ + ROLL $9, b; \ + ROLL $19, f; \ + ; \ + RORXL $-9, y2, y0; \ + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_1_2(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 2 ############################// @@ -795,7 +767,7 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 2*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 2*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ MOVL a, y1; \ @@ -803,10 +775,10 @@ ANDL y1, y3; \ ANDL c, y1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ MOVL f, y3; \ @@ -818,13 +790,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) #define ROUND_N_1_3(disp, const, a, b, c, d, e, f, g, h) \ ; \ // ############################# RND N + 3 ############################// @@ -836,7 +806,7 @@ XORL y2, y0 \ // y0 = SS2 ADDL (disp + 3*4)(SP)(SRND*1), y2; \ // y2 = SS1 + W ADDL h, y2; \ // y2 = h + SS1 + W - ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y2 = SS2 + W' + ADDL (disp + 3*4 + 32)(SP)(SRND*1), y0;\ // y0 = SS2 + W' ADDL d, y0; \ // y0 = d + SS2 + W' ; \ MOVL a, y1; \ @@ -844,10 +814,10 @@ ANDL y1, y3; \ ANDL c, y1; \ ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) - MOVL b, y3; \ - ANDL c, y3; \ - ORL y3, y1; \ // y1 = (a AND b) OR (a AND c) OR (b AND c) - ADDL y1, y0; \ // y0 = FF(a, b, c) + d + SS2 + W' = tt1 + MOVL b, h; \ + ANDL c, h; \ + ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c) + ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1 ; \ MOVL e, y1; \ MOVL f, y3; \ @@ -859,13 +829,11 @@ ; \ ROLL $9, b; \ ROLL $19, f; \ - MOVL y0, h; \ // h = tt1 ; \ RORXL $-9, y2, y0; \ - RORXL $-17, y2, y1; \ - XORL y0, y2; \ - XORL y1, y2; \ - MOVL y2, d // d = P(tt2) + RORXL $-17, y2, d; \ + XORL y0, d; \ + XORL y2, d; \ // d = P(tt2) TEXT ·block(SB), 0, $1048-32 CMPB ·useAVX2(SB), $1