diff --git a/GCM-for-SM4.md b/GCM-for-SM4.md index ef15140..afb85cc 100644 --- a/GCM-for-SM4.md +++ b/GCM-for-SM4.md @@ -12,4 +12,40 @@ func gcmSm4Finish(productTable *[256]byte, tagMask, T *[16]byte, pLen, dLen uint 1. gcmSm4Finish没有改变,和gcmAesFinish一模一样; 1. precomputeTableAsm和gcmAesInit的区别在于前者没有加密部分,输入参数就是加密结果; -1. gcmSm4Data和gcmAesData的差别在于前者那个T参数同时作为输入输出,而后者只作为输出。 \ No newline at end of file +1. gcmSm4Data和gcmAesData的差别在于前者那个T参数同时作为输入输出,而后者只作为输出。 + +### 加密和GHASH结合ASM优化 +**主要困难**: +1. 底层的CTR加密数据不是block对齐的,更不是4、8blocks对齐的,所以尾加密及异或运算处理比较麻烦; +1. AMD64同时支持AVX2和NON-AVX2,代码量比较大,比较复杂; +1. 和GHASH的混合处理,提高性能; +计划先把下面方法转成ASM: +``` +// counterCrypt crypts in to out using g.cipher in counter mode. +func (g *gcm) counterCrypt(out, in []byte, counter *[gcmBlockSize]byte) { + mask := make([]byte, g.cipher.blocksSize) + counters := make([]byte, g.cipher.blocksSize) + + for len(in) >= g.cipher.blocksSize { + for i := 0; i < g.cipher.batchBlocks; i++ { + copy(counters[i*gcmBlockSize:(i+1)*gcmBlockSize], counter[:]) + gcmInc32(counter) + } + g.cipher.EncryptBlocks(mask, counters) + xor.XorWords(out, in, mask[:]) + out = out[g.cipher.blocksSize:] + in = in[g.cipher.blocksSize:] + } + + if len(in) > 0 { + blocks := (len(in) + gcmBlockSize - 1) / gcmBlockSize + for i := 0; i < blocks; i++ { + copy(counters[i*gcmBlockSize:], counter[:]) + gcmInc32(counter) + } + g.cipher.EncryptBlocks(mask, counters) + xor.XorBytes(out, in, mask[:blocks*gcmBlockSize]) + } +} +``` +最后再处理和GHASH混合处理。