mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
sm3: 使用GG2等价公式 #166
This commit is contained in:
parent
f2199843b1
commit
232af559d7
@ -13,7 +13,7 @@
|
|||||||
## Packages
|
## Packages
|
||||||
* **SM2** - This is a SM2 sm2p256v1 implementation whose performance is similar like golang native NIST P256 under **amd64** and **arm64**, for implementation detail, please refer [SM2实现细节](https://github.com/emmansun/gmsm/wiki/SM2%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96). It supports ShangMi sm2 digital signature, public key encryption algorithm and also key exchange.
|
* **SM2** - This is a SM2 sm2p256v1 implementation whose performance is similar like golang native NIST P256 under **amd64** and **arm64**, for implementation detail, please refer [SM2实现细节](https://github.com/emmansun/gmsm/wiki/SM2%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96). It supports ShangMi sm2 digital signature, public key encryption algorithm and also key exchange.
|
||||||
|
|
||||||
* **SM3** - This is also a SM3 implementation whose performance is similar like golang native SHA 256 with SIMD under **amd64**, for implementation detail, please refer [SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96). It also provides A64 cryptographic instructions SM3 tested with QEMU.
|
* **SM3** - This is also a SM3 implementation whose performance is similar like golang native SHA 256 with SIMD under **amd64** and **arm64**, for implementation detail, please refer [SM3性能优化](https://github.com/emmansun/gmsm/wiki/SM3%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96). It also provides A64 cryptographic instructions SM3 tested with QEMU.
|
||||||
|
|
||||||
* **SM4** - For SM4 implementation, SIMD & AES-NI are used under **amd64** and **arm64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), it supports ECB/CBC/CFB/OFB/CTR/GCM/CCM/XTS modes. It also provides A64 cryptographic instructions SM4 tested with QEMU.
|
* **SM4** - For SM4 implementation, SIMD & AES-NI are used under **amd64** and **arm64**, for detail please refer [SM4性能优化](https://github.com/emmansun/gmsm/wiki/SM4%E6%80%A7%E8%83%BD%E4%BC%98%E5%8C%96), it supports ECB/CBC/CFB/OFB/CTR/GCM/CCM/XTS modes. It also provides A64 cryptographic instructions SM4 tested with QEMU.
|
||||||
|
|
||||||
|
@ -432,8 +432,8 @@ func round3(a, b, c, d, e, f, g, h string, i int) {
|
|||||||
fmt.Printf("w[%d] = p1(w[%d]^w[%d]^bits.RotateLeft32(w[%d], 15)) ^ bits.RotateLeft32(w[%d], 7) ^ w[%d]\n", i+4, i-12, i-5, i+1, i-9, i-2)
|
fmt.Printf("w[%d] = p1(w[%d]^w[%d]^bits.RotateLeft32(w[%d], 15)) ^ bits.RotateLeft32(w[%d], 7) ^ w[%d]\n", i+4, i-12, i-5, i+1, i-9, i-2)
|
||||||
fmt.Printf("tt2 = bits.RotateLeft32(%s, 12)\n", a)
|
fmt.Printf("tt2 = bits.RotateLeft32(%s, 12)\n", a)
|
||||||
fmt.Printf("ss1 = bits.RotateLeft32(tt2+%s+_K[%d], 7)\n", e, i)
|
fmt.Printf("ss1 = bits.RotateLeft32(tt2+%s+_K[%d], 7)\n", e, i)
|
||||||
fmt.Printf("%s = %s&(%s|%s) | (%s & %s) + %s + (ss1 ^ tt2) + (w[%d] ^ w[%d])\n", d, c, a, b, a, b, d, i, i+4)
|
fmt.Printf("%s = %s&(%s|%s) | (%s & %s) + %s + (ss1 ^ tt2) + (w[%d] ^ w[%d])\n", d, a, b, c, b, c, d, i, i+4)
|
||||||
fmt.Printf("tt2 = (%s & %s) | (^%s & %s) + %s + ss1 + w[%d]\n", e, f, e, g, h, i)
|
fmt.Printf("tt2 = (%s^%s)&%s ^ %s + %s + ss1 + w[%d]\n", f, g, e, g, h, i)
|
||||||
fmt.Printf("%s = bits.RotateLeft32(%s, 9)\n", b, b)
|
fmt.Printf("%s = bits.RotateLeft32(%s, 9)\n", b, b)
|
||||||
fmt.Printf("%s = bits.RotateLeft32(%s, 19)\n", f, f)
|
fmt.Printf("%s = bits.RotateLeft32(%s, 19)\n", f, f)
|
||||||
fmt.Printf("%s = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)\n\n", h)
|
fmt.Printf("%s = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)\n\n", h)
|
||||||
|
@ -201,7 +201,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[16], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[16], 7)
|
||||||
d = c&(a|b) | (a & b) + d + (ss1 ^ tt2) + (w[16] ^ w[20])
|
d = c&(a|b) | (a & b) + d + (ss1 ^ tt2) + (w[16] ^ w[20])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[16]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[16]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -211,7 +211,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[17], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[17], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[17] ^ w[21])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[17] ^ w[21])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[17]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[17]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -221,7 +221,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[18], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[18], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[18] ^ w[22])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[18] ^ w[22])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[18]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[18]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -231,7 +231,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[19], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[19], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[19] ^ w[23])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[19] ^ w[23])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[19]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[19]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -241,7 +241,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[20], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[20], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[20] ^ w[24])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[20] ^ w[24])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[20]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[20]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -251,7 +251,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[21], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[21], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[21] ^ w[25])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[21] ^ w[25])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[21]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[21]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -261,7 +261,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[22], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[22], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[22] ^ w[26])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[22] ^ w[26])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[22]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[22]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -271,7 +271,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[23], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[23], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[23] ^ w[27])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[23] ^ w[27])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[23]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[23]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -281,7 +281,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[24], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[24], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[24] ^ w[28])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[24] ^ w[28])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[24]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[24]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -291,7 +291,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[25], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[25], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[25] ^ w[29])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[25] ^ w[29])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[25]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[25]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -301,7 +301,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[26], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[26], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[26] ^ w[30])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[26] ^ w[30])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[26]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[26]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -311,7 +311,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[27], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[27], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[27] ^ w[31])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[27] ^ w[31])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[27]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[27]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -321,7 +321,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[28], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[28], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[28] ^ w[32])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[28] ^ w[32])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[28]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[28]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -331,7 +331,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[29], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[29], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[29] ^ w[33])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[29] ^ w[33])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[29]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[29]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -341,7 +341,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[30], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[30], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[30] ^ w[34])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[30] ^ w[34])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[30]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[30]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -351,7 +351,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[31], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[31], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[31] ^ w[35])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[31] ^ w[35])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[31]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[31]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -361,7 +361,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[32], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[32], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[32] ^ w[36])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[32] ^ w[36])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[32]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[32]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -371,7 +371,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[33], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[33], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[33] ^ w[37])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[33] ^ w[37])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[33]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[33]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -381,7 +381,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[34], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[34], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[34] ^ w[38])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[34] ^ w[38])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[34]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[34]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -391,7 +391,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[35], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[35], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[35] ^ w[39])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[35] ^ w[39])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[35]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[35]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -401,7 +401,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[36], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[36], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[36] ^ w[40])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[36] ^ w[40])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[36]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[36]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -411,7 +411,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[37], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[37], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[37] ^ w[41])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[37] ^ w[41])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[37]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[37]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -421,7 +421,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[38], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[38], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[38] ^ w[42])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[38] ^ w[42])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[38]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[38]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -431,7 +431,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[39], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[39], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[39] ^ w[43])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[39] ^ w[43])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[39]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[39]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -441,16 +441,17 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[40], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[40], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[40] ^ w[44])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[40] ^ w[44])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[40]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[40]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
|
|
||||||
// Round 42
|
// Round 42
|
||||||
w[45] = p1(w[29]^w[36]^bits.RotateLeft32(w[42], 15)) ^ bits.RotateLeft32(w[32], 7) ^ w[39]
|
w[45] = p1(w[29]^w[36]^bits.RotateLeft32(w[42], 15)) ^ bits.RotateLeft32(w[32], 7) ^ w[39]
|
||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[41], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[41], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[41] ^ w[45])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[41] ^ w[45])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[41]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[41]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -460,7 +461,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[42], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[42], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[42] ^ w[46])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[42] ^ w[46])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[42]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[42]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -470,7 +471,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[43], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[43], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[43] ^ w[47])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[43] ^ w[47])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[43]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[43]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -480,7 +481,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[44], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[44], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[44] ^ w[48])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[44] ^ w[48])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[44]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[44]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -490,7 +491,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[45], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[45], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[45] ^ w[49])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[45] ^ w[49])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[45]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[45]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -500,7 +501,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[46], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[46], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[46] ^ w[50])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[46] ^ w[50])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[46]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[46]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -510,7 +511,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[47], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[47], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[47] ^ w[51])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[47] ^ w[51])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[47]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[47]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -520,7 +521,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[48], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[48], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[48] ^ w[52])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[48] ^ w[52])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[48]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[48]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -530,7 +531,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[49], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[49], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[49] ^ w[53])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[49] ^ w[53])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[49]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[49]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -540,7 +541,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[50], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[50], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[50] ^ w[54])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[50] ^ w[54])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[50]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[50]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -550,7 +551,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[51], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[51], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[51] ^ w[55])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[51] ^ w[55])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[51]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[51]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -560,7 +561,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[52], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[52], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[52] ^ w[56])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[52] ^ w[56])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[52]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[52]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -570,7 +571,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[53], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[53], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[53] ^ w[57])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[53] ^ w[57])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[53]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[53]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -580,7 +581,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[54], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[54], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[54] ^ w[58])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[54] ^ w[58])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[54]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[54]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -590,7 +591,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[55], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[55], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[55] ^ w[59])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[55] ^ w[59])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[55]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[55]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -600,7 +601,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[56], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[56], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[56] ^ w[60])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[56] ^ w[60])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[56]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[56]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -610,7 +611,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[57], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[57], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[57] ^ w[61])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[57] ^ w[61])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[57]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[57]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -620,7 +621,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[58], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[58], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[58] ^ w[62])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[58] ^ w[62])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[58]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[58]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -630,7 +631,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[59], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[59], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[59] ^ w[63])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[59] ^ w[63])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[59]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[59]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -640,7 +641,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(a, 12)
|
tt2 = bits.RotateLeft32(a, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+e+_K[60], 7)
|
ss1 = bits.RotateLeft32(tt2+e+_K[60], 7)
|
||||||
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[60] ^ w[64])
|
d = a&(b|c) | (b & c) + d + (ss1 ^ tt2) + (w[60] ^ w[64])
|
||||||
tt2 = (e & f) | (^e & g) + h + ss1 + w[60]
|
tt2 = (f^g)&e ^ g + h + ss1 + w[60]
|
||||||
b = bits.RotateLeft32(b, 9)
|
b = bits.RotateLeft32(b, 9)
|
||||||
f = bits.RotateLeft32(f, 19)
|
f = bits.RotateLeft32(f, 19)
|
||||||
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
h = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -650,7 +651,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(d, 12)
|
tt2 = bits.RotateLeft32(d, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+h+_K[61], 7)
|
ss1 = bits.RotateLeft32(tt2+h+_K[61], 7)
|
||||||
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[61] ^ w[65])
|
c = d&(a|b) | (a & b) + c + (ss1 ^ tt2) + (w[61] ^ w[65])
|
||||||
tt2 = (h & e) | (^h & f) + g + ss1 + w[61]
|
tt2 = (e^f)&h ^ f + g + ss1 + w[61]
|
||||||
a = bits.RotateLeft32(a, 9)
|
a = bits.RotateLeft32(a, 9)
|
||||||
e = bits.RotateLeft32(e, 19)
|
e = bits.RotateLeft32(e, 19)
|
||||||
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
g = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -660,7 +661,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(c, 12)
|
tt2 = bits.RotateLeft32(c, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+g+_K[62], 7)
|
ss1 = bits.RotateLeft32(tt2+g+_K[62], 7)
|
||||||
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[62] ^ w[66])
|
b = c&(d|a) | (d & a) + b + (ss1 ^ tt2) + (w[62] ^ w[66])
|
||||||
tt2 = (g & h) | (^g & e) + f + ss1 + w[62]
|
tt2 = (h^e)&g ^ e + f + ss1 + w[62]
|
||||||
d = bits.RotateLeft32(d, 9)
|
d = bits.RotateLeft32(d, 9)
|
||||||
h = bits.RotateLeft32(h, 19)
|
h = bits.RotateLeft32(h, 19)
|
||||||
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
f = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
@ -670,7 +671,7 @@ func blockGeneric(dig *digest, p []byte) {
|
|||||||
tt2 = bits.RotateLeft32(b, 12)
|
tt2 = bits.RotateLeft32(b, 12)
|
||||||
ss1 = bits.RotateLeft32(tt2+f+_K[63], 7)
|
ss1 = bits.RotateLeft32(tt2+f+_K[63], 7)
|
||||||
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[63] ^ w[67])
|
a = b&(c|d) | (c & d) + a + (ss1 ^ tt2) + (w[63] ^ w[67])
|
||||||
tt2 = (f & g) | (^f & h) + e + ss1 + w[63]
|
tt2 = (g^h)&f ^ h + e + ss1 + w[63]
|
||||||
c = bits.RotateLeft32(c, 9)
|
c = bits.RotateLeft32(c, 9)
|
||||||
g = bits.RotateLeft32(g, 19)
|
g = bits.RotateLeft32(g, 19)
|
||||||
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
e = tt2 ^ bits.RotateLeft32(tt2, 9) ^ bits.RotateLeft32(tt2, 17)
|
||||||
|
@ -115,10 +115,10 @@
|
|||||||
MOVL ((index)*4)(BP), DX; \
|
MOVL ((index)*4)(BP), DX; \
|
||||||
ADDL h, DX; \ // Wt + h
|
ADDL h, DX; \ // Wt + h
|
||||||
ADDL BX, DX; \ // h + ss1 + Wt
|
ADDL BX, DX; \ // h + ss1 + Wt
|
||||||
MOVL f, AX; \
|
MOVL f, BX; \
|
||||||
ANDL e, AX; \ // e AND f
|
XORL g, BX; \
|
||||||
ANDNL g, e, BX; \ // NOT(e) AND g
|
ANDL e, BX; \
|
||||||
ORL AX, BX; \
|
XORL g, BX; \ // GG2(e, f, g)
|
||||||
ADDL DX, BX
|
ADDL DX, BX
|
||||||
|
|
||||||
#define COPYRESULT(b, d, f, h) \
|
#define COPYRESULT(b, d, f, h) \
|
||||||
|
@ -30,11 +30,10 @@
|
|||||||
#define y0 R8
|
#define y0 R8
|
||||||
#define y1 R9
|
#define y1 R9
|
||||||
#define y2 R10
|
#define y2 R10
|
||||||
#define y3 R11
|
|
||||||
|
|
||||||
#define NUM_BYTES R12
|
#define NUM_BYTES R11
|
||||||
#define INP R13
|
#define INP R12
|
||||||
#define CTX R14 // Beginning of digest in memory (a, b, c, ... , h)
|
#define CTX R13 // Beginning of digest in memory (a, b, c, ... , h)
|
||||||
|
|
||||||
#define a1 R15
|
#define a1 R15
|
||||||
#define b1 R16
|
#define b1 R16
|
||||||
@ -210,10 +209,10 @@
|
|||||||
VEOR XTMP1.B16, XTMP0.B16, XTMP0.B16; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
VEOR XTMP1.B16, XTMP0.B16, XTMP0.B16; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
||||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
; \
|
; \
|
||||||
ANDW e, f, y1; \
|
EORW f, g, y1; \
|
||||||
BICW e, g, y3; \
|
ANDW e, y1; \
|
||||||
VEXT $12, XWORD2.B16, XWORD1.B16, XTMP1.B16; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}, Vm = XWORD2, Vn = XWORD1
|
VEXT $12, XWORD2.B16, XWORD1.B16, XTMP1.B16; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}, Vm = XWORD2, Vn = XWORD1
|
||||||
ORRW y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
EORW g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
||||||
; \
|
; \
|
||||||
RORW $23, b; \
|
RORW $23, b; \
|
||||||
@ -250,9 +249,9 @@
|
|||||||
VSRI $17, XTMP2.S4, XTMP4.S4; \ // XTMP4 = = XTMP2 rol 15 {xxBA}
|
VSRI $17, XTMP2.S4, XTMP4.S4; \ // XTMP4 = = XTMP2 rol 15 {xxBA}
|
||||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
; \
|
; \
|
||||||
ANDW e, f, y1; \
|
EORW f, g, y1; \
|
||||||
BICW e, g, y3; \
|
ANDW e, y1; \
|
||||||
ORRW y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
EORW g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
VSHL $8, XTMP4.S4, XTMP3.S4; \
|
VSHL $8, XTMP4.S4, XTMP3.S4; \
|
||||||
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
||||||
; \
|
; \
|
||||||
@ -287,10 +286,10 @@
|
|||||||
VSHL $15, XTMP3.S4, XTMP4.S4; \
|
VSHL $15, XTMP3.S4, XTMP4.S4; \
|
||||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
ANDW e, f, y1; \
|
EORW f, g, y1; \
|
||||||
BICW e, g, y3; \
|
ANDW e, y1; \
|
||||||
VSRI $17, XTMP3.S4, XTMP4.S4; \ // XTMP4 = W[-3] rol 15 {DCBA}
|
VSRI $17, XTMP3.S4, XTMP4.S4; \ // XTMP4 = W[-3] rol 15 {DCBA}
|
||||||
ORRW y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
EORW g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
||||||
RORW $23, b; \
|
RORW $23, b; \
|
||||||
RORW $13, f; \
|
RORW $13, f; \
|
||||||
@ -321,10 +320,10 @@
|
|||||||
VSRI $24, XTMP3.S4, XTMP1.S4; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
VSRI $24, XTMP3.S4, XTMP1.S4; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
||||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
ANDW e, f, y1; \
|
EORW f, g, y1; \
|
||||||
BICW e, g, y3; \
|
ANDW e, y1; \
|
||||||
VEOR XTMP3.B16, XTMP4.B16, XTMP3.B16; \ // XTMP3 = XTMP4 XOR (XTMP4 rol 15 {DCBA})
|
VEOR XTMP3.B16, XTMP4.B16, XTMP3.B16; \ // XTMP3 = XTMP4 XOR (XTMP4 rol 15 {DCBA})
|
||||||
ORRW y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
EORW g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
||||||
RORW $23, b; \
|
RORW $23, b; \
|
||||||
RORW $13, f; \
|
RORW $13, f; \
|
||||||
@ -355,10 +354,10 @@
|
|||||||
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORRW y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDW y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
; \
|
; \
|
||||||
ANDW e, f, y1; \
|
EORW f, g, y1; \
|
||||||
BICW e, g, y3; \
|
ANDW e, y1; \
|
||||||
ORRW y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
EORW g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDW y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDW y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
; \
|
; \
|
||||||
RORW $23, b; \
|
RORW $23, b; \
|
||||||
RORW $13, f; \
|
RORW $13, f; \
|
||||||
|
@ -52,17 +52,14 @@
|
|||||||
#define y0 R12
|
#define y0 R12
|
||||||
#define y1 R13
|
#define y1 R13
|
||||||
#define y2 R14
|
#define y2 R14
|
||||||
#define y3 DI
|
|
||||||
|
|
||||||
// Offsets
|
// Offsets
|
||||||
#define XFER_SIZE 4*64*4
|
#define XFER_SIZE 4*64*4
|
||||||
#define INP_END_SIZE 8
|
#define INP_END_SIZE 8
|
||||||
#define INP_SIZE 8
|
|
||||||
|
|
||||||
#define _XFER 0
|
#define _XFER 0
|
||||||
#define _INP_END _XFER + XFER_SIZE
|
#define _INP_END _XFER + XFER_SIZE
|
||||||
#define _INP _INP_END + INP_END_SIZE
|
#define STACK_SIZE _INP_END + INP_END_SIZE
|
||||||
#define STACK_SIZE _INP + INP_SIZE
|
|
||||||
|
|
||||||
#define P0(tt2, tmp, out) \
|
#define P0(tt2, tmp, out) \
|
||||||
RORXL $23, tt2, tmp; \
|
RORXL $23, tt2, tmp; \
|
||||||
@ -221,12 +218,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPALIGNR $8, XDWORD2, XDWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
VPALIGNR $8, XDWORD2, XDWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPALIGNR $12, XDWORD1, XDWORD2, XTMP1; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}
|
VPALIGNR $12, XDWORD1, XDWORD2, XTMP1; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}
|
||||||
@ -256,12 +253,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSHUFD $0x00, XTMP2, XTMP2; \ // XTMP2 = {AAAA}
|
VPSHUFD $0x00, XTMP2, XTMP2; \ // XTMP2 = {AAAA}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPSRLQ $17, XTMP2, XTMP3; \ // XTMP3 = XTMP2 rol 15 {xxxA}
|
VPSRLQ $17, XTMP2, XTMP3; \ // XTMP3 = XTMP2 rol 15 {xxxA}
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPSRLQ $9, XTMP2, XTMP4; \ // XTMP4 = XTMP2 rol 23 {xxxA}
|
VPSRLQ $9, XTMP2, XTMP4; \ // XTMP4 = XTMP2 rol 23 {xxxA}
|
||||||
@ -291,12 +288,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSLLD $15, XTMP3, XTMP4; \
|
VPSLLD $15, XTMP3, XTMP4; \
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPSRLD $(32-15), XTMP3, XTMP3; \
|
VPSRLD $(32-15), XTMP3, XTMP3; \
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPOR XTMP3, XTMP4, XTMP4; \ // XTMP4 = (W[-3] rol 15) {DCBA}
|
VPOR XTMP3, XTMP4, XTMP4; \ // XTMP4 = (W[-3] rol 15) {DCBA}
|
||||||
@ -326,12 +323,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSHUFB R08_SHUFFLE_MASK, XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
VPSHUFB R08_SHUFFLE_MASK, XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCBA})
|
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCBA})
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPXOR XTMP3, XTMP1, XTMP1; \ // XTMP1 = XTMP4 ^ (XTMP4 rol 15 {DCBA}) ^ (XTMP4 rol 23 {DCBA})
|
VPXOR XTMP3, XTMP1, XTMP1; \ // XTMP1 = XTMP4 ^ (XTMP4 rol 15 {DCBA}) ^ (XTMP4 rol 23 {DCBA})
|
||||||
@ -387,18 +384,18 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
; \
|
; \
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
; \
|
; \
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
; \
|
; \
|
||||||
P0(y2, y0, d)
|
P0(y2, y0, d)
|
||||||
|
|
||||||
TEXT ·blockAVX2(SB), 0, $1048-32
|
TEXT ·blockAVX2(SB), 0, $1040-32
|
||||||
MOVQ dig+0(FP), CTX // d.h[8]
|
MOVQ dig+0(FP), CTX // d.h[8]
|
||||||
MOVQ p_base+8(FP), INP
|
MOVQ p_base+8(FP), INP
|
||||||
MOVQ p_len+16(FP), NUM_BYTES
|
MOVQ p_len+16(FP), NUM_BYTES
|
||||||
@ -443,7 +440,6 @@ avx2_loop: // at each iteration works with one block (512 bit)
|
|||||||
|
|
||||||
avx2_last_block_enter:
|
avx2_last_block_enter:
|
||||||
ADDQ $64, INP
|
ADDQ $64, INP
|
||||||
MOVQ INP, _INP(SP)
|
|
||||||
|
|
||||||
avx2_schedule_compress: // for w0 - w47
|
avx2_schedule_compress: // for w0 - w47
|
||||||
// Do 4 rounds and scheduling
|
// Do 4 rounds and scheduling
|
||||||
@ -592,8 +588,6 @@ avx2_schedule_compress: // for w0 - w47
|
|||||||
DO_ROUND_N_1(_XFER + 30*32, 2, T62, c, d, e, f, g, h, a, b)
|
DO_ROUND_N_1(_XFER + 30*32, 2, T62, c, d, e, f, g, h, a, b)
|
||||||
DO_ROUND_N_1(_XFER + 30*32, 3, T63, b, c, d, e, f, g, h, a)
|
DO_ROUND_N_1(_XFER + 30*32, 3, T63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
MOVQ _INP(SP), INP
|
|
||||||
|
|
||||||
xorm( 0(CTX), a)
|
xorm( 0(CTX), a)
|
||||||
xorm( 4(CTX), b)
|
xorm( 4(CTX), b)
|
||||||
xorm( 8(CTX), c)
|
xorm( 8(CTX), c)
|
||||||
@ -687,7 +681,6 @@ avx2_compress: // Do second block using previously scheduled results
|
|||||||
DO_ROUND_N_1(_XFER + 30*32 + 16, 2, T62, c, d, e, f, g, h, a, b)
|
DO_ROUND_N_1(_XFER + 30*32 + 16, 2, T62, c, d, e, f, g, h, a, b)
|
||||||
DO_ROUND_N_1(_XFER + 30*32 + 16, 3, T63, b, c, d, e, f, g, h, a)
|
DO_ROUND_N_1(_XFER + 30*32 + 16, 3, T63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
MOVQ _INP(SP), INP
|
|
||||||
ADDQ $64, INP
|
ADDQ $64, INP
|
||||||
|
|
||||||
xorm( 0(CTX), a)
|
xorm( 0(CTX), a)
|
||||||
|
@ -44,17 +44,14 @@
|
|||||||
#define y0 R12
|
#define y0 R12
|
||||||
#define y1 R13
|
#define y1 R13
|
||||||
#define y2 R14
|
#define y2 R14
|
||||||
#define y3 DI
|
|
||||||
|
|
||||||
// Offsets
|
// Offsets
|
||||||
#define XFER_SIZE 2*16
|
#define XFER_SIZE 2*16
|
||||||
#define INP_END_SIZE 8
|
#define INP_END_SIZE 8
|
||||||
#define INP_SIZE 8
|
|
||||||
|
|
||||||
#define _XFER 0
|
#define _XFER 0
|
||||||
#define _INP_END _XFER + XFER_SIZE
|
#define _INP_END _XFER + XFER_SIZE
|
||||||
#define _INP _INP_END + INP_END_SIZE
|
#define STACK_SIZE _INP_END + INP_END_SIZE
|
||||||
#define STACK_SIZE _INP + INP_SIZE
|
|
||||||
|
|
||||||
#define SS12(a, e, const, ss1, ss2) \
|
#define SS12(a, e, const, ss1, ss2) \
|
||||||
MOVL a, ss2; \
|
MOVL a, ss2; \
|
||||||
@ -249,12 +246,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPALIGNR $8, XWORD2, XWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
VPALIGNR $8, XWORD2, XWORD3, XTMP0; \ // XTMP0 = W[-6] = {w13,w12,w11,w10}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
VPXOR XTMP1, XTMP0, XTMP0; \ // XTMP0 = W[-6] ^ (W[-13] rol 7)
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPALIGNR $12, XWORD1, XWORD2, XTMP1; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}
|
VPALIGNR $12, XWORD1, XWORD2, XTMP1; \ // XTMP1 = W[-9] = {w10,w9,w8,w7}
|
||||||
@ -285,12 +282,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSHUFD $0x00, XTMP2, XTMP2; \ // XTMP2 = {AAAA}
|
VPSHUFD $0x00, XTMP2, XTMP2; \ // XTMP2 = {AAAA}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPSRLQ $17, XTMP2, XTMP3; \ // XTMP3 = XTMP2 rol 15 {xxxA}
|
VPSRLQ $17, XTMP2, XTMP3; \ // XTMP3 = XTMP2 rol 15 {xxxA}
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPSRLQ $9, XTMP2, XTMP4; \ // XTMP4 = XTMP2 rol 23 {xxxA}
|
VPSRLQ $9, XTMP2, XTMP4; \ // XTMP4 = XTMP2 rol 23 {xxxA}
|
||||||
@ -321,12 +318,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSLLD $15, XTMP3, XTMP4; \
|
VPSLLD $15, XTMP3, XTMP4; \
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPSRLD $(32-15), XTMP3, XTMP3; \
|
VPSRLD $(32-15), XTMP3, XTMP3; \
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPOR XTMP3, XTMP4, XTMP4; \ // XTMP4 = (W[-3] rol 15) {DCBA}
|
VPOR XTMP3, XTMP4, XTMP4; \ // XTMP4 = (W[-3] rol 15) {DCBA}
|
||||||
@ -357,12 +354,12 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
VPSHUFB R08_SHUFFLE_MASK, XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
VPSHUFB R08_SHUFFLE_MASK, XTMP3, XTMP1; \ // XTMP1 = XTMP4 rol 23 {DCBA}
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCBA})
|
VPXOR XTMP3, XTMP4, XTMP3; \ // XTMP3 = XTMP4 ^ (XTMP4 rol 15 {DCBA})
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g) = (e AND f) OR (NOT(e) AND g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
VPXOR XTMP3, XTMP1, XTMP1; \ // XTMP1 = XTMP4 ^ (XTMP4 rol 15 {DCBA}) ^ (XTMP4 rol 23 {DCBA})
|
VPXOR XTMP3, XTMP1, XTMP1; \ // XTMP1 = XTMP4 ^ (XTMP4 rol 15 {DCBA}) ^ (XTMP4 rol 23 {DCBA})
|
||||||
@ -410,11 +407,11 @@
|
|||||||
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
ORL y1, h; \ // h = (a AND b) OR (a AND c) OR (b AND c)
|
||||||
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
ADDL y0, h; \ // h = FF(a, b, c) + d + SS2 + W' = tt1
|
||||||
; \
|
; \
|
||||||
MOVL f, y3; \
|
MOVL f, y1; \
|
||||||
ANDL e, y3; \ // y3 = e AND f
|
XORL g, y1; \
|
||||||
ANDNL g, e, y1; \ // y1 = NOT(e) AND g
|
ANDL e, y1; \
|
||||||
ORL y3, y1; \ // y1 = (e AND f) OR (NOT(e) AND g)
|
XORL g, y1; \ // y1 = GG2(e, f, g)
|
||||||
ADDL y1, y2; \ // y2 = GG(e, f, g) + h + SS1 + W = tt2
|
ADDL y1, y2; \ // y2 = GG2(e, f, g) + h + SS1 + W = tt2
|
||||||
; \
|
; \
|
||||||
ROLL $9, b; \
|
ROLL $9, b; \
|
||||||
ROLL $19, f; \
|
ROLL $19, f; \
|
||||||
@ -471,7 +468,7 @@
|
|||||||
MOVOU XTMP0, XWORD0; \
|
MOVOU XTMP0, XWORD0; \
|
||||||
PXOR XTMP1, XWORD0
|
PXOR XTMP1, XWORD0
|
||||||
|
|
||||||
TEXT ·blockSIMD(SB), 0, $56-32
|
TEXT ·blockSIMD(SB), 0, $48-32
|
||||||
MOVQ dig+0(FP), CTX // d.h[8]
|
MOVQ dig+0(FP), CTX // d.h[8]
|
||||||
MOVQ p_base+8(FP), INP
|
MOVQ p_base+8(FP), INP
|
||||||
MOVQ p_len+16(FP), NUM_BYTES
|
MOVQ p_len+16(FP), NUM_BYTES
|
||||||
@ -507,7 +504,6 @@ sse_loop: // at each iteration works with one block (512 bit)
|
|||||||
PSHUFB X_BYTE_FLIP_MASK, XWORD3 // w15, w14, w13, w12
|
PSHUFB X_BYTE_FLIP_MASK, XWORD3 // w15, w14, w13, w12
|
||||||
|
|
||||||
ADDQ $64, INP
|
ADDQ $64, INP
|
||||||
MOVQ INP, _INP(SP)
|
|
||||||
|
|
||||||
sse_schedule_compress: // for w0 - w47
|
sse_schedule_compress: // for w0 - w47
|
||||||
// Do 4 rounds and scheduling
|
// Do 4 rounds and scheduling
|
||||||
@ -684,8 +680,6 @@ sse_schedule_compress: // for w0 - w47
|
|||||||
DO_ROUND_N_1(_XFER, 2, T62, c, d, e, f, g, h, a, b)
|
DO_ROUND_N_1(_XFER, 2, T62, c, d, e, f, g, h, a, b)
|
||||||
DO_ROUND_N_1(_XFER, 3, T63, b, c, d, e, f, g, h, a)
|
DO_ROUND_N_1(_XFER, 3, T63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
MOVQ _INP(SP), INP
|
|
||||||
|
|
||||||
xorm( 0(CTX), a)
|
xorm( 0(CTX), a)
|
||||||
xorm( 4(CTX), b)
|
xorm( 4(CTX), b)
|
||||||
xorm( 8(CTX), c)
|
xorm( 8(CTX), c)
|
||||||
@ -718,7 +712,6 @@ avx_loop: // at each iteration works with one block (512 bit)
|
|||||||
VPSHUFB X_BYTE_FLIP_MASK, XWORD3, XWORD3 // w15, w14, w13, w12
|
VPSHUFB X_BYTE_FLIP_MASK, XWORD3, XWORD3 // w15, w14, w13, w12
|
||||||
|
|
||||||
ADDQ $64, INP
|
ADDQ $64, INP
|
||||||
MOVQ INP, _INP(SP)
|
|
||||||
|
|
||||||
avx_schedule_compress: // for w0 - w47
|
avx_schedule_compress: // for w0 - w47
|
||||||
// Do 4 rounds and scheduling
|
// Do 4 rounds and scheduling
|
||||||
@ -868,8 +861,6 @@ avx_schedule_compress: // for w0 - w47
|
|||||||
DO_ROUND_N_1(_XFER, 2, T62, c, d, e, f, g, h, a, b)
|
DO_ROUND_N_1(_XFER, 2, T62, c, d, e, f, g, h, a, b)
|
||||||
DO_ROUND_N_1(_XFER, 3, T63, b, c, d, e, f, g, h, a)
|
DO_ROUND_N_1(_XFER, 3, T63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
MOVQ _INP(SP), INP
|
|
||||||
|
|
||||||
xorm( 0(CTX), a)
|
xorm( 0(CTX), a)
|
||||||
xorm( 4(CTX), b)
|
xorm( 4(CTX), b)
|
||||||
xorm( 8(CTX), c)
|
xorm( 8(CTX), c)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user