gmsm/sm3/kdf_mult4_asm.go

105 lines
2.5 KiB
Go
Raw Normal View History

2024-09-04 17:47:15 +08:00
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2024-09-06 11:05:26 +08:00
//go:build (amd64 || arm64 || s390x || ppc64 || ppc64le) && !purego
package sm3
2024-11-21 14:32:32 +08:00
import "github.com/emmansun/gmsm/internal/byteorder"
// prepare data template: remaining data + [ct] + padding + length
// p will be 1 or 2 blocks according to the length of remaining data
func prepareInitData(baseMD *digest, p []byte, len, lenStart uint64) {
if baseMD.nx > 0 {
copy(p, baseMD.x[:baseMD.nx])
}
// Padding. Add a 1 bit and 0 bits until 56 bytes mod 64.
var tmp [64 + 8]byte // padding + length buffer
tmp[0] = 0x80
padlen := tmp[:lenStart+8]
2024-11-21 14:32:32 +08:00
byteorder.BEPutUint64(padlen[lenStart:], len)
copy(p[baseMD.nx+4:], padlen)
}
// p || state || words
// p = 64 * 4 * 2 = 512
// state = 8 * 16 = 128
// words = 68 * 16 = 1088
const preallocSizeBy4 = 1728
const parallelSize4 = 4
func kdfBy4(baseMD *digest, keyLen int, limit int) []byte {
if limit < 4 {
return kdfGeneric(baseMD, keyLen, limit)
}
2024-11-21 14:32:32 +08:00
var t uint64
blocks := 1
len := baseMD.len + 4
remainlen := len % 64
if remainlen < 56 {
t = 56 - remainlen
} else {
t = 64 + 56 - remainlen
blocks = 2
}
len <<= 3
// prepare temporary buffer
tmpStart := parallelSize4 * blocks * BlockSize
buffer := make([]byte, preallocSizeBy4)
tmp := buffer[tmpStart:]
// prepare processing data
var dataPtrs [parallelSize4]*byte
var data [parallelSize4][]byte
var digs [parallelSize4]*[8]uint32
var states [parallelSize4][8]uint32
2024-11-21 14:32:32 +08:00
for j := 0; j < parallelSize4; j++ {
digs[j] = &states[j]
p := buffer[blocks*BlockSize*j:]
data[j] = p
dataPtrs[j] = &p[0]
if j == 0 {
prepareInitData(baseMD, p, len, t)
} else {
copy(p, data[0])
}
}
var ct uint32 = 1
k := make([]byte, limit*Size)
ret := k
times := limit / parallelSize4
for i := 0; i < times; i++ {
for j := 0; j < parallelSize4; j++ {
// prepare states
states[j] = baseMD.h
// prepare data
2024-11-21 14:32:32 +08:00
byteorder.BEPutUint32(data[j][baseMD.nx:], ct)
ct++
}
blockMultBy4(&digs[0], &dataPtrs[0], &tmp[0], blocks)
copyResultsBy4(&states[0][0], &ret[0])
ret = ret[Size*parallelSize4:]
}
remain := limit % parallelSize4
for i := 0; i < remain; i++ {
2024-11-21 14:32:32 +08:00
byteorder.BEPutUint32(tmp[:], ct)
md := *baseMD
md.Write(tmp[:4])
h := md.checkSum()
copy(ret[i*Size:], h[:])
ct++
}
return k[:keyLen]
}
//go:noescape
func blockMultBy4(dig **[8]uint32, p **byte, buffer *byte, blocks int)
//go:noescape
func copyResultsBy4(dig *uint32, p *byte)