mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 12:16:20 +08:00
sm3: optimize FF2, #163
This commit is contained in:
parent
c5d44ca9b2
commit
77893f07f4
@ -432,7 +432,7 @@ func round3(a, b, c, d, e, f, g, h string, i int) {
|
||||
fmt.Printf("w[%d] = p1(w[%d]^w[%d]^bits.RotateLeft32(w[%d], 15)) ^ bits.RotateLeft32(w[%d], 7) ^ w[%d]\n", i+4, i-12, i-5, i+1, i-9, i-2)
|
||||
fmt.Printf("tt2 = bits.RotateLeft32(%s, 12)\n", a)
|
||||
fmt.Printf("ss1 = bits.RotateLeft32(tt2+%s+_K[%d], 7)\n", e, i)
|
||||
fmt.Printf("%s = (%s & %s) | (%s & %s) | (%s & %s) + %s + (ss1 ^ tt2) + (w[%d] ^ w[%d])\n", d, a, b, a, c, b, c, d, i, i+4)
|
||||
fmt.Printf("%s = %s&(%s|%s) | (%s & %s) + %s + (ss1 ^ tt2) + (w[%d] ^ w[%d])\n", d, c, a, b, a, b, d, i, i+4)
|
||||
fmt.Printf("tt2 = (%s & %s) | (^%s & %s) + %s + ss1 + w[%d]\n", e, f, e, g, h, i)
|
||||
fmt.Printf("%s = bits.RotateLeft32(%s, 9)\n", b, b)
|
||||
fmt.Printf("%s = bits.RotateLeft32(%s, 19)\n", f, f)
|
||||
|
@ -200,7 +200,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[20] = p1(w[4]^w[11]^bits.RotateLeft32(w[17], 15)) ^ bits.RotateLeft32(w[7], 7) ^ w[14]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[16], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[16] ^ w[20])
|
||||
d = c&(a|b) | (a & b) + d + (ss1 ^ tt2) + (w[16] ^ w[20])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[16]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -210,7 +210,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[21] = p1(w[5]^w[12]^bits.RotateLeft32(w[18], 15)) ^ bits.RotateLeft32(w[8], 7) ^ w[15]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[17], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[17] ^ w[21])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[17] ^ w[21])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[17]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -220,7 +220,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[22] = p1(w[6]^w[13]^bits.RotateLeft32(w[19], 15)) ^ bits.RotateLeft32(w[9], 7) ^ w[16]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[18], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[18] ^ w[22])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[18] ^ w[22])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[18]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -230,7 +230,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[23] = p1(w[7]^w[14]^bits.RotateLeft32(w[20], 15)) ^ bits.RotateLeft32(w[10], 7) ^ w[17]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[19], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[19] ^ w[23])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[19] ^ w[23])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[19]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -240,7 +240,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[24] = p1(w[8]^w[15]^bits.RotateLeft32(w[21], 15)) ^ bits.RotateLeft32(w[11], 7) ^ w[18]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[20], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[20] ^ w[24])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[20] ^ w[24])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[20]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -250,7 +250,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[25] = p1(w[9]^w[16]^bits.RotateLeft32(w[22], 15)) ^ bits.RotateLeft32(w[12], 7) ^ w[19]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[21], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[21] ^ w[25])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[21] ^ w[25])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[21]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -260,7 +260,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[26] = p1(w[10]^w[17]^bits.RotateLeft32(w[23], 15)) ^ bits.RotateLeft32(w[13], 7) ^ w[20]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[22], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[22] ^ w[26])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[22] ^ w[26])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[22]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -270,7 +270,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[27] = p1(w[11]^w[18]^bits.RotateLeft32(w[24], 15)) ^ bits.RotateLeft32(w[14], 7) ^ w[21]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[23], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[23] ^ w[27])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[23] ^ w[27])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[23]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -280,7 +280,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[28] = p1(w[12]^w[19]^bits.RotateLeft32(w[25], 15)) ^ bits.RotateLeft32(w[15], 7) ^ w[22]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[24], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[24] ^ w[28])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[24] ^ w[28])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[24]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -290,7 +290,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[29] = p1(w[13]^w[20]^bits.RotateLeft32(w[26], 15)) ^ bits.RotateLeft32(w[16], 7) ^ w[23]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[25], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[25] ^ w[29])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[25] ^ w[29])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[25]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -300,7 +300,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[30] = p1(w[14]^w[21]^bits.RotateLeft32(w[27], 15)) ^ bits.RotateLeft32(w[17], 7) ^ w[24]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[26], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[26] ^ w[30])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[26] ^ w[30])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[26]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -310,7 +310,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[31] = p1(w[15]^w[22]^bits.RotateLeft32(w[28], 15)) ^ bits.RotateLeft32(w[18], 7) ^ w[25]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[27], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[27] ^ w[31])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[27] ^ w[31])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[27]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -320,7 +320,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[32] = p1(w[16]^w[23]^bits.RotateLeft32(w[29], 15)) ^ bits.RotateLeft32(w[19], 7) ^ w[26]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[28], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[28] ^ w[32])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[28] ^ w[32])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[28]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -330,7 +330,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[33] = p1(w[17]^w[24]^bits.RotateLeft32(w[30], 15)) ^ bits.RotateLeft32(w[20], 7) ^ w[27]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[29], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[29] ^ w[33])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[29] ^ w[33])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[29]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -340,7 +340,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[34] = p1(w[18]^w[25]^bits.RotateLeft32(w[31], 15)) ^ bits.RotateLeft32(w[21], 7) ^ w[28]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[30], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[30] ^ w[34])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[30] ^ w[34])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[30]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -350,7 +350,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[35] = p1(w[19]^w[26]^bits.RotateLeft32(w[32], 15)) ^ bits.RotateLeft32(w[22], 7) ^ w[29]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[31], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[31] ^ w[35])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[31] ^ w[35])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[31]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -360,7 +360,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[36] = p1(w[20]^w[27]^bits.RotateLeft32(w[33], 15)) ^ bits.RotateLeft32(w[23], 7) ^ w[30]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[32], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[32] ^ w[36])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[32] ^ w[36])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[32]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -370,7 +370,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[37] = p1(w[21]^w[28]^bits.RotateLeft32(w[34], 15)) ^ bits.RotateLeft32(w[24], 7) ^ w[31]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[33], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[33] ^ w[37])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[33] ^ w[37])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[33]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -380,7 +380,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[38] = p1(w[22]^w[29]^bits.RotateLeft32(w[35], 15)) ^ bits.RotateLeft32(w[25], 7) ^ w[32]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[34], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[34] ^ w[38])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[34] ^ w[38])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[34]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -390,7 +390,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[39] = p1(w[23]^w[30]^bits.RotateLeft32(w[36], 15)) ^ bits.RotateLeft32(w[26], 7) ^ w[33]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[35], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[35] ^ w[39])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[35] ^ w[39])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[35]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -400,7 +400,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[40] = p1(w[24]^w[31]^bits.RotateLeft32(w[37], 15)) ^ bits.RotateLeft32(w[27], 7) ^ w[34]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[36], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[36] ^ w[40])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[36] ^ w[40])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[36]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -410,7 +410,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[41] = p1(w[25]^w[32]^bits.RotateLeft32(w[38], 15)) ^ bits.RotateLeft32(w[28], 7) ^ w[35]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[37], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[37] ^ w[41])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[37] ^ w[41])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[37]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -420,7 +420,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[42] = p1(w[26]^w[33]^bits.RotateLeft32(w[39], 15)) ^ bits.RotateLeft32(w[29], 7) ^ w[36]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[38], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[38] ^ w[42])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[38] ^ w[42])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[38]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -430,7 +430,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[43] = p1(w[27]^w[34]^bits.RotateLeft32(w[40], 15)) ^ bits.RotateLeft32(w[30], 7) ^ w[37]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[39], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[39] ^ w[43])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[39] ^ w[43])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[39]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -440,7 +440,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[44] = p1(w[28]^w[35]^bits.RotateLeft32(w[41], 15)) ^ bits.RotateLeft32(w[31], 7) ^ w[38]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[40], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[40] ^ w[44])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[40] ^ w[44])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[40]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -449,7 +449,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[45] = p1(w[29]^w[36]^bits.RotateLeft32(w[42], 15)) ^ bits.RotateLeft32(w[32], 7) ^ w[39]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[41], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[41] ^ w[45])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[41] ^ w[45])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[41]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -459,7 +459,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[46] = p1(w[30]^w[37]^bits.RotateLeft32(w[43], 15)) ^ bits.RotateLeft32(w[33], 7) ^ w[40]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[42], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[42] ^ w[46])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[42] ^ w[46])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[42]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -469,7 +469,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[47] = p1(w[31]^w[38]^bits.RotateLeft32(w[44], 15)) ^ bits.RotateLeft32(w[34], 7) ^ w[41]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[43], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[43] ^ w[47])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[43] ^ w[47])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[43]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -479,7 +479,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[48] = p1(w[32]^w[39]^bits.RotateLeft32(w[45], 15)) ^ bits.RotateLeft32(w[35], 7) ^ w[42]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[44], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[44] ^ w[48])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[44] ^ w[48])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[44]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -489,7 +489,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[49] = p1(w[33]^w[40]^bits.RotateLeft32(w[46], 15)) ^ bits.RotateLeft32(w[36], 7) ^ w[43]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[45], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[45] ^ w[49])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[45] ^ w[49])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[45]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -499,7 +499,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[50] = p1(w[34]^w[41]^bits.RotateLeft32(w[47], 15)) ^ bits.RotateLeft32(w[37], 7) ^ w[44]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[46], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[46] ^ w[50])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[46] ^ w[50])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[46]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -509,7 +509,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[51] = p1(w[35]^w[42]^bits.RotateLeft32(w[48], 15)) ^ bits.RotateLeft32(w[38], 7) ^ w[45]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[47], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[47] ^ w[51])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[47] ^ w[51])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[47]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -519,7 +519,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[52] = p1(w[36]^w[43]^bits.RotateLeft32(w[49], 15)) ^ bits.RotateLeft32(w[39], 7) ^ w[46]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[48], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[48] ^ w[52])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[48] ^ w[52])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[48]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -529,7 +529,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[53] = p1(w[37]^w[44]^bits.RotateLeft32(w[50], 15)) ^ bits.RotateLeft32(w[40], 7) ^ w[47]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[49], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[49] ^ w[53])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[49] ^ w[53])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[49]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -539,7 +539,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[54] = p1(w[38]^w[45]^bits.RotateLeft32(w[51], 15)) ^ bits.RotateLeft32(w[41], 7) ^ w[48]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[50], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[50] ^ w[54])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[50] ^ w[54])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[50]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -549,7 +549,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[55] = p1(w[39]^w[46]^bits.RotateLeft32(w[52], 15)) ^ bits.RotateLeft32(w[42], 7) ^ w[49]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[51], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[51] ^ w[55])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[51] ^ w[55])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[51]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -559,7 +559,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[56] = p1(w[40]^w[47]^bits.RotateLeft32(w[53], 15)) ^ bits.RotateLeft32(w[43], 7) ^ w[50]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[52], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[52] ^ w[56])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[52] ^ w[56])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[52]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -569,7 +569,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[57] = p1(w[41]^w[48]^bits.RotateLeft32(w[54], 15)) ^ bits.RotateLeft32(w[44], 7) ^ w[51]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[53], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[53] ^ w[57])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[53] ^ w[57])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[53]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -579,7 +579,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[58] = p1(w[42]^w[49]^bits.RotateLeft32(w[55], 15)) ^ bits.RotateLeft32(w[45], 7) ^ w[52]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[54], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[54] ^ w[58])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[54] ^ w[58])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[54]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -589,7 +589,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[59] = p1(w[43]^w[50]^bits.RotateLeft32(w[56], 15)) ^ bits.RotateLeft32(w[46], 7) ^ w[53]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[55], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[55] ^ w[59])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[55] ^ w[59])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[55]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -599,7 +599,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[60] = p1(w[44]^w[51]^bits.RotateLeft32(w[57], 15)) ^ bits.RotateLeft32(w[47], 7) ^ w[54]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[56], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[56] ^ w[60])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[56] ^ w[60])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[56]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -609,7 +609,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[61] = p1(w[45]^w[52]^bits.RotateLeft32(w[58], 15)) ^ bits.RotateLeft32(w[48], 7) ^ w[55]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[57], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[57] ^ w[61])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[57] ^ w[61])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[57]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -619,7 +619,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[62] = p1(w[46]^w[53]^bits.RotateLeft32(w[59], 15)) ^ bits.RotateLeft32(w[49], 7) ^ w[56]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[58], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[58] ^ w[62])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[58] ^ w[62])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[58]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -629,7 +629,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[63] = p1(w[47]^w[54]^bits.RotateLeft32(w[60], 15)) ^ bits.RotateLeft32(w[50], 7) ^ w[57]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[59], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[59] ^ w[63])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[59] ^ w[63])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[59]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
@ -639,7 +639,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[64] = p1(w[48]^w[55]^bits.RotateLeft32(w[61], 15)) ^ bits.RotateLeft32(w[51], 7) ^ w[58]
|
||||
tt2 = bits.RotateLeft32(a, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+e+_K[60], 7)
|
||||
d = (a & b) | (a & c) | (b & c) + d + (ss1 ^ tt2) + (w[60] ^ w[64])
|
||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[60] ^ w[64])
|
||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[60]
|
||||
b = bits.RotateLeft32(b, 9)
|
||||
f = bits.RotateLeft32(f, 19)
|
||||
@ -649,7 +649,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[65] = p1(w[49]^w[56]^bits.RotateLeft32(w[62], 15)) ^ bits.RotateLeft32(w[52], 7) ^ w[59]
|
||||
tt2 = bits.RotateLeft32(d, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+h+_K[61], 7)
|
||||
c = (d & a) | (d & b) | (a & b) + c + (ss1 ^ tt2) + (w[61] ^ w[65])
|
||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[61] ^ w[65])
|
||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[61]
|
||||
a = bits.RotateLeft32(a, 9)
|
||||
e = bits.RotateLeft32(e, 19)
|
||||
@ -659,7 +659,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[66] = p1(w[50]^w[57]^bits.RotateLeft32(w[63], 15)) ^ bits.RotateLeft32(w[53], 7) ^ w[60]
|
||||
tt2 = bits.RotateLeft32(c, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+g+_K[62], 7)
|
||||
b = (c & d) | (c & a) | (d & a) + b + (ss1 ^ tt2) + (w[62] ^ w[66])
|
||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[62] ^ w[66])
|
||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[62]
|
||||
d = bits.RotateLeft32(d, 9)
|
||||
h = bits.RotateLeft32(h, 19)
|
||||
@ -669,7 +669,7 @@ func blockGeneric(dig *digest, p []byte) {
|
||||
w[67] = p1(w[51]^w[58]^bits.RotateLeft32(w[64], 15)) ^ bits.RotateLeft32(w[54], 7) ^ w[61]
|
||||
tt2 = bits.RotateLeft32(b, 12)
|
||||
ss1 = bits.RotateLeft32(tt2+f+_K[63], 7)
|
||||
a = (b & c) | (b & d) | (c & d) + a + (ss1 ^ tt2) + (w[63] ^ w[67])
|
||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[63] ^ w[67])
|
||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[63]
|
||||
c = bits.RotateLeft32(c, 9)
|
||||
g = bits.RotateLeft32(g, 19)
|
||||
|
@ -77,13 +77,11 @@
|
||||
// Calculate tt1 in CX, used DX
|
||||
// ret = ((a AND b) OR (a AND c) OR (b AND c)) + d + (ROTL(12, a) XOR ss1) + (Wt XOR Wt+4)
|
||||
#define SM3TT11(index, a, b, c, d) \
|
||||
MOVL b, DX; \
|
||||
ANDL a, DX; \ // a AND b
|
||||
MOVL a, DX; \
|
||||
ORL b, DX; \ // a AND b
|
||||
MOVL a, CX; \
|
||||
ANDL c, CX; \ // a AND c
|
||||
ORL DX, CX; \ // (a AND b) OR (a AND c)
|
||||
MOVL b, DX; \
|
||||
ANDL c, DX; \ // b AND c
|
||||
ANDL b, CX; \ // a AND b
|
||||
ANDL c, DX; \
|
||||
ORL CX, DX; \ // (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL d, DX; \
|
||||
MOVL a, CX; \
|
||||
|
@ -206,13 +206,12 @@
|
||||
ADDW y1, y0; \ // y0 = SS2 + W'
|
||||
ADDW d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
ANDW a, b, y1; \
|
||||
ORRW a, b, y1; \
|
||||
VEXT $8, XWORD3.B16, XWORD2.B16, XTMP0.B16; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
||||
ANDW a, c, y3; \
|
||||
ORRW y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
ANDW b, c, h; \
|
||||
VEOR XTMP1.B16, XTMP0.B16, XTMP0.B16; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
||||
ANDW a, b, h; \
|
||||
ANDW c, y1; \
|
||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
VEOR XTMP1.B16, XTMP0.B16, XTMP0.B16; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
ANDW e, f, y1; \
|
||||
@ -247,11 +246,10 @@
|
||||
ADDW y1, y0; \ // y0 = SS2 + W'
|
||||
ADDW d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
ANDW a, b, y1; \
|
||||
ORRW a, b, y1; \
|
||||
VSHL $15, XTMP2.S4, XTMP4.S4; \
|
||||
ANDW a, c, y3; \
|
||||
ORRW y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
ANDW b, c, h; \
|
||||
ANDW a, b, h; \
|
||||
ANDW c, y1; \
|
||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
VSRI $17, XTMP2.S4, XTMP4.S4; \ // XTMP4 = = XTMP2 rol 15 {xxBA}
|
||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
@ -288,11 +286,10 @@
|
||||
ADDW y1, y0; \ // y0 = SS2 + W'
|
||||
ADDW d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
ANDW a, b, y1; \
|
||||
ORRW a, b, y1; \
|
||||
VSHL $15, XTMP3.S4, XTMP4.S4; \
|
||||
ANDW a, c, y3; \
|
||||
ORRW y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
ANDW b, c, h; \
|
||||
ANDW a, b, h; \
|
||||
ANDW c, y1; \
|
||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
VSRI $17, XTMP3.S4, XTMP4.S4; \ // XTMP4 = W[-3] rol 15 {DCxx}
|
||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
@ -328,11 +325,10 @@
|
||||
ADDW y1, y0; \ // y0 = SS2 + W'
|
||||
ADDW d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
ANDW a, b, y1; \
|
||||
VEOR XTMP3.B16, XTMP4.B16, XTMP3.B16; \ // XTMP3 = XTMP4 XOR (XTMP4 rol 15 {DCxx})
|
||||
ANDW a, c, y3; \
|
||||
ORRW y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
ANDW b, c, h; \
|
||||
ORRW a, b, y1; \
|
||||
ANDW a, b, h; \
|
||||
ANDW c, y1; \
|
||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
VEOR XTMP3.B16, XTMP1.B16, XTMP1.B16; \ // XTMP1 = XTMP4 XOR (XTMP4 rol 15 {DCxx}) XOR (XTMP4 rol 23 {DCxx})
|
||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
@ -367,10 +363,9 @@
|
||||
ADDW y1, y0; \ // y0 = SS2 + W'
|
||||
ADDW d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
ANDW a, b, y1; \
|
||||
ANDW a, c, y3; \
|
||||
ORRW y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
ANDW b, c, h; \
|
||||
ORRW a, b, y1; \
|
||||
ANDW a, b, h; \
|
||||
ANDW c, y1; \
|
||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
|
@ -240,14 +240,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPOR XTMP0, XTMP1, XTMP1; \ // XTMP1 = W[-13] rol 7 = {ROTL(7,w6),ROTL(7,w5),ROTL(7,w4),ROTL(7,w3)}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPALIGNR $8, XDWORD2, XDWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -285,14 +283,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPXOR XTMP1, XTMP2, XTMP2; \ // XTMP2 = W[-9] ^ W[-16] ^ (W[-3] rol 15) {xxBA}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPSLLD $15, XTMP2, XTMP3; \
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -330,14 +326,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPALIGNR $12, XDWORD3, XTMP2, XTMP3; \ // XTMP3 = {..., W[1], W[0], w15}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPSHUFD $80, XTMP3, XTMP4; \ // XTMP4 = W[-3] {DDCC} = {W[0],W[0],w15,w15}
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -375,14 +369,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPSHUFB r08_mask<>(SB), XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCxx}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCxx})
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -451,12 +443,10 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
ANDL y1, y3; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ANDL c, h; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -482,6 +472,8 @@ TEXT ·blockAVX2(SB), 0, $1048-32
|
||||
LEAQ -64(INP)(NUM_BYTES*1), NUM_BYTES // Pointer to the last block
|
||||
MOVQ NUM_BYTES, _INP_END(SP)
|
||||
|
||||
VMOVDQU flip_mask<>(SB), BYTE_FLIP_MASK
|
||||
|
||||
CMPQ NUM_BYTES, INP
|
||||
JE avx2_only_one_block
|
||||
|
||||
@ -502,8 +494,6 @@ avx2_loop: // at each iteration works with one block (512 bit)
|
||||
VMOVDQU (2*32)(INP), XTMP2
|
||||
VMOVDQU (3*32)(INP), XTMP3
|
||||
|
||||
VMOVDQU flip_mask<>(SB), BYTE_FLIP_MASK
|
||||
|
||||
// Apply Byte Flip Mask: LE -> BE
|
||||
VPSHUFB BYTE_FLIP_MASK, XTMP0, XTMP0
|
||||
VPSHUFB BYTE_FLIP_MASK, XTMP1, XTMP1
|
||||
@ -803,8 +793,6 @@ avx2_do_last_block:
|
||||
VMOVDQU 32(INP), XWORD2
|
||||
VMOVDQU 48(INP), XWORD3
|
||||
|
||||
VMOVDQU flip_mask<>(SB), BYTE_FLIP_MASK
|
||||
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD0, XWORD0
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD1, XWORD1
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD2, XWORD2
|
||||
|
@ -232,14 +232,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPOR XTMP0, XTMP1, XTMP1; \ // XTMP1 = W[-13] rol 7 = {ROTL(7,w6),ROTL(7,w5),ROTL(7,w4),ROTL(7,w3)}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPALIGNR $8, XWORD2, XWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -277,14 +275,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPXOR XTMP1, XTMP2, XTMP2; \ // XTMP2 = W[-9] ^ W[-16] ^ (W[-3] rol 15) {xxBA}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPSLLD $15, XTMP2, XTMP3; \
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -322,14 +318,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPALIGNR $12, XWORD3, XTMP2, XTMP3; \ // XTMP3 = {..., W[1], W[0], w15}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPSHUFD $80, XTMP3, XTMP4; \ // XTMP4 = = W[-3] {DDCC}
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -367,14 +361,12 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
VPSHUFB r08_mask<>(SB), XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCxx}
|
||||
ANDL y1, y3; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCxx})
|
||||
ANDL c, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -411,12 +403,10 @@
|
||||
ADDL d, y0; \ // y0 = d + SS2 + W'
|
||||
; \
|
||||
MOVL a, y1; \
|
||||
MOVL b, y3; \
|
||||
ANDL y1, y3; \
|
||||
ORL b, y1; \
|
||||
MOVL a, h; \
|
||||
ANDL b, h; \
|
||||
ANDL c, y1; \
|
||||
ORL y3, y1; \ // y1 = (a AND b) OR (a AND c)
|
||||
MOVL b, h; \
|
||||
ANDL c, h; \
|
||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||
; \
|
||||
@ -452,19 +442,19 @@ TEXT ·blockAVX(SB), 0, $56-32
|
||||
MOVL 24(CTX), g // g = H6
|
||||
MOVL 28(CTX), h // h = H7
|
||||
|
||||
avx_loop: // at each iteration works with one block (256 bit)
|
||||
VMOVDQU flip_mask<>(SB), X_BYTE_FLIP_MASK
|
||||
|
||||
avx_loop: // at each iteration works with one block (512 bit)
|
||||
|
||||
VMOVDQU 0(INP), XWORD0
|
||||
VMOVDQU 16(INP), XWORD1
|
||||
VMOVDQU 32(INP), XWORD2
|
||||
VMOVDQU 48(INP), XWORD3
|
||||
|
||||
VMOVDQU flip_mask<>(SB), X_BYTE_FLIP_MASK
|
||||
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD0, XWORD0
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD1, XWORD1
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD2, XWORD2
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD3, XWORD3
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD0, XWORD0 // w3, w2, w1, w0
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD1, XWORD1 // w7, w6, w5, w4
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD2, XWORD2 // w11, w10, w9, w8
|
||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD3, XWORD3 // w15, w14, w13, w12
|
||||
|
||||
ADDQ $64, INP
|
||||
MOVQ INP, _INP(SP)
|
||||
|
Loading…
x
Reference in New Issue
Block a user