sm4: xts asm amd64

This commit is contained in:
Sun Yimin 2023-08-24 11:47:06 +08:00 committed by GitHub
parent 61e5507c69
commit 9ee8ee4529
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 2118 additions and 22 deletions

View File

@ -296,13 +296,19 @@ func (c *xtsDecrypter) CryptBlocks(plaintext, ciphertext []byte) {
copy(x[remain:], plaintext[remain:blockSize])
//Copy the final plaintext bytes
copy(plaintext[blockSize:], plaintext)
} else {
//The last block contains exactly 128 bits
copy(x[:], ciphertext)
}
subtle.XORBytes(x[:], x[:], c.tweak[:])
c.b.Decrypt(x[:], x[:])
subtle.XORBytes(plaintext, x[:], c.tweak[:])
} else {
//The last block contains exactly 128 bits
subtle.XORBytes(plaintext, ciphertext, c.tweak[:])
c.b.Decrypt(plaintext, plaintext)
subtle.XORBytes(plaintext, plaintext, c.tweak[:])
// Maybe there are still ciphertext
mul2(&c.tweak, c.isGB)
}
}
}

View File

@ -114,7 +114,7 @@ var xtsGBTestVectors = []struct {
},
}
func TestXTS_GB(t *testing.T) {
func TestGBXTSSample(t *testing.T) {
for i, test := range xtsGBTestVectors {
key := fromHex(test.key)
tweak := fromHex(test.tweak)
@ -145,3 +145,85 @@ func TestXTS_GB(t *testing.T) {
}
}
}
var gbXtsTestVectors = []struct {
key string
sector uint64
plaintext string
ciphertext string
}{
{ // XTS-SM4-128 applied for a data unit of 32 bytes
"0000000000000000000000000000000000000000000000000000000000000000",
0,
"0000000000000000000000000000000000000000000000000000000000000000",
"d9b421f731c894fdc35b77291fe4e3b0e58e55e613a862b4d2b0f1073b4b4fd0",
}, {
"1111111111111111111111111111111122222222222222222222222222222222",
0x3333333333,
"4444444444444444444444444444444444444444444444444444444444444444",
"a74d726c11196a32be04e001ff29d0c7724feef81d666ae5afdfe4649544fcf5",
}, {
"fffefdfcfbfaf9f8f7f6f5f4f3f2f1f022222222222222222222222222222222",
0x3333333333,
"4444444444444444444444444444444444444444444444444444444444444444",
"7f76088effadf70c02ea9f95da0628d3ef2d6a77004beaa9016001d6789dd5a0",
}, { // XTS-SM4-128 applied for a data unit of 512 bytes
"2718281828459045235360287471352631415926535897932384626433832795",
0,
"000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f505152535455565758595a5b5c5d5e5f606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeafb0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff",
"54dd65b6326faea8fad1a83c63614af398bddb6824735dab93ec4e75734215d463f67daf53742fb2a2847d5fde3984f8882cfd5fa9d6642e1e871c155202440044251465211628ba86f8d2998387a685edde23c07610b7388aab17f205aa5dada33c0a8a4225bc114254c796f800c638e016d199cd21dc28e92dc2b8587545509a8e1d659c596d3f6c8c225a27bdb2b02fe5a0c0183a592b396d32765fe733afb438a6ffb305ae1377c56d872badcebbd37812ff79f0571b3f977537570a1f76b9a50c49ab8d867fa024ea4483a25f7947b07885bb839e777abe76af11adf3108d1195933f96b7949b0664bdb89beb3bc48fb5f5d2109d32332f17c9a6ddea55441d1bbf43280ec7e75791e234d651a0716209eb21ae06061e33a72b0c530cb15fe0b55016b188dad75c4c50232dce1f5df61911c79bee60397b64bb914c0f26efcfb6ffab2bb33bdfd8db98c44debbd4ca865d41cbe1d0801b01aba2603cbea599b32c836789deeb9a3c18f3cae977b42ec81f1dfef6e098dd9e9dd6c1822bb938b08641bb72461f8d38c1724a43ae1254b9223e2270cf9f7d71a6bf093df2079fd2cc2fe87e846d799de30483f80164c31e65d8aae5f72d6dc71118932a008df547c712bee45ddebcdce098d673ef5ede91edfd45d17cb90963d3e2e2e2508a376a7b1af4d69e756ea5df52ac440791d57d56b5e057ad00e077d2df5009416",
}, { // Vector 5
"2718281828459045235360287471352631415926535897932384626433832795",
1,
"27a7479befa1d476489f308cd4cfa6e2a96e4bbe3208ff25287dd3819616e89cc78cf7f5e543445f8333d8fa7f56000005279fa5d8b5e4ad40e736ddb4d35412328063fd2aab53e5ea1e0a9f332500a5df9487d07a5c92cc512c8866c7e860ce93fdf166a24912b422976146ae20ce846bb7dc9ba94a767aaef20c0d61ad02655ea92dc4c4e41a8952c651d33174be51a10c421110e6d81588ede82103a252d8a750e8768defffed9122810aaeb99f9172af82b604dc4b8e51bcb08235a6f4341332e4ca60482a4ba1a03b3e65008fc5da76b70bf1690db4eae29c5f1badd03c5ccf2a55d705ddcd86d449511ceb7ec30bf12b1fa35b913f9f747a8afd1b130e94bff94effd01a91735ca1726acd0b197c4e5b03393697e126826fb6bbde8ecc1e08298516e2c9ed03ff3c1b7860f6de76d4cecd94c8119855ef5297ca67e9f3e7ff72b1e99785ca0a7e7720c5b36dc6d72cac9574c8cbbc2f801e23e56fd344b07f22154beba0f08ce8891e643ed995c94d9a69c9f1b5f499027a78572aeebd74d20cc39881c213ee770b1010e4bea718846977ae119f7a023ab58cca0ad752afe656bb3c17256a9f6e9bf19fdd5a38fc82bbe872c5539edb609ef4f79c203ebb140f2e583cb2ad15b4aa5b655016a8449277dbd477ef2c8d6c017db738b18deb4a427d1923ce3ff262735779a418f20a282df920147beabe421ee5319d0568",
"0e36ba273dd121afc77e0d8c00aa4a665f801f3607af61b61058b2f5d007310822200eaaeef759d515ebd032dad4235f5cd2dc735b57b56e003bce3f56890618877db69aa4519edcf681c6fc19c9c4a5655372d1549148c759efba00140275b46b6a5f6522de1702c48ff209a1dd7d1f56e775252796a09c20f903bfb3935bc79c0cdcaa9d2f30e616160e0662fb35311676e86e18d7d90d4203bc6862a9187b8657162143ce914750a86f984cf660311917e00fcf450ee188f088b4222522276bf3391e94de4fdad4134dfc7d08113c65e1b103bd3ad75fb13bba7f842451f9023ed21f1d23bc1c57d593932e021548bbff61ea9a24f359b4f7a8f2a998587495b726411f84734b189f65c4e79f09c7875f9c924b32e5bf2785a9935854e08ce86f5a4a399af6731099a13e10db0b32b888865b4416d69014a8cdb28b3912ec0b832835df7b59637d0687747815ba7cf9efae862dd6e80763acb50898fe1b3ba13a39d81b20d6d50613fbb5fbdcae2a7a87b9377eec455a8bae5102d5e6a7bea9b6b77d3f9895b277a55a524721cd0e59ce35e915de622480c5e0d31d153282dd832278fd2b795933f5dc591c17bd6d7f38fcd6afce551e8485109673881519d2845395ce9ceaea6306e38a73f9bb990931323a3136d18ee76c3e727cfb07cf386519313e1c44adcc50ae79bfac6952e3b98948206fb3dc3ebaed556bf27f16",
}, { // XTS-SM4-128 applied for a data unit that is not a multiple of 16 bytes, but should be a complte byte
"c46acc2e7e013cb71cdbf750cf76b000249fbf4fb6cd17607773c23ffa2c4330",
94,
"7e9c2289cba460e470222953439cdaa892a5433d4dab2a3f67",
"4d5501ea41cf6b6532b4b7129c6f6ee74605d9fb66f1f12c0c",
}, {
"56ffcc9bbbdf413f0fc0f888f44b7493bb1925a39b8adf02d9009bb16db0a887",
144,
"9a839cc14363bafcfc0cc93b14f8e769d35b94cc98267438e3",
"f04f3f16b354cccdc39fc664ec7f8db010a83bcacbc5c96353",
},
{
"7454a43b87b1cf0dec95032c22873be3cace3bb795568854c1a008c07c5813f3",
108,
"41088fa15195b2733fe824d2c1fdc8306080863945fb2a73cf",
"791a9469ed5a22d8195ac37c43c1b0377dc15126349bed1465",
},
}
func TestGBXTS(t *testing.T) {
for i, test := range gbXtsTestVectors {
key := fromHex(test.key)
encrypter, err := cipher.NewGBXTSEncrypterWithSector(sm4.NewCipher, key[:len(key)/2], key[len(key)/2:], test.sector)
if err != nil {
t.Errorf("#%d: failed to create encrypter: %s", i, err)
continue
}
decrypter, err := cipher.NewGBXTSDecrypterWithSector(sm4.NewCipher, key[:len(key)/2], key[len(key)/2:], test.sector)
if err != nil {
t.Errorf("#%d: failed to create decrypter: %s", i, err)
continue
}
plaintext := fromHex(test.plaintext)
ciphertext := make([]byte, len(plaintext))
encrypter.CryptBlocks(ciphertext, plaintext)
expectedCiphertext := fromHex(test.ciphertext)
if !bytes.Equal(ciphertext, expectedCiphertext) {
t.Errorf("#%d: encrypted failed, got: %x, want: %x", i, ciphertext, expectedCiphertext)
continue
}
decrypted := make([]byte, len(ciphertext))
decrypter.CryptBlocks(decrypted, ciphertext)
if !bytes.Equal(decrypted, plaintext) {
t.Errorf("#%d: decryption failed, got: %x, want: %x", i, decrypted, plaintext)
}
}
}

View File

@ -239,6 +239,33 @@ GLOBL r24_mask256<>(SB), 8, $32
PSHUFD $0xFF, rk128, x; \
SM4_ONE_ROUND_SSE(x, y, z, t3, t0, t1, t2); \
#define SM4_SINGLE_BLOCK(RK, rk128, x, y, z, t0, t1, t2, t3) \
PSHUFB flip_mask<>(SB), t0; \
PSHUFD $1, t0, t1; \
PSHUFD $2, t0, t2; \
PSHUFD $3, t0, t3; \
MOVOU (0*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (1*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (2*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (3*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (4*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (5*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (6*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
MOVOU (7*16)(RK), rk128; \
SM4_4BLOCKS_4ROUNDS(rk128, x, y, z, t0, t1, t2, t3); \
PALIGNR $4, t3, t3; \
PALIGNR $4, t3, t2; \
PALIGNR $4, t2, t1; \
PALIGNR $4, t1, t0; \
PSHUFB flip_mask<>(SB), t0
#define SM4_4BLOCKS(RK, rk128, x, y, z, t0, t1, t2, t3) \
PSHUFB flip_mask<>(SB), t0; \
PSHUFB flip_mask<>(SB), t1; \

View File

@ -230,7 +230,7 @@ cbCSm4Single:
cbcSm4Single16:
MOVOU -16(DX), XWORD0
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
SM4_SINGLE_BLOCK(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
PXOR 0(SI), XWORD0

View File

@ -117,7 +117,7 @@ ecbSm4Single:
JEQ ecbSm4Single32
CMPQ DI, $48
JEQ ecbSm4Single48
SM4_4BLOCKS(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
SM4_SINGLE_BLOCK(AX, XWORD, YWORD, XWTMP0, XWTMP1, XWORD0, XWORD1, XWORD2, XWORD3)
MOVUPS XWORD0, 0(BX)
JMP ecbSm4Done

View File

@ -41,10 +41,6 @@
#define NIBBLE_MASK Y11
#define X_NIBBLE_MASK X11
DATA bswapMask<>+0x00(SB)/8, $0x08090a0b0c0d0e0f
DATA bswapMask<>+0x08(SB)/8, $0x0001020304050607
DATA gcmPoly<>+0x00(SB)/8, $0x0000000000000001
DATA gcmPoly<>+0x08(SB)/8, $0xc200000000000000
@ -79,7 +75,6 @@ DATA andMask<>+0xd8(SB)/8, $0x0000ffffffffffff
DATA andMask<>+0xe0(SB)/8, $0xffffffffffffffff
DATA andMask<>+0xe8(SB)/8, $0x00ffffffffffffff
GLOBL bswapMask<>(SB), (NOPTR+RODATA), $16
GLOBL gcmPoly<>(SB), (NOPTR+RODATA), $16
GLOBL andMask<>(SB), (NOPTR+RODATA), $240
@ -102,7 +97,7 @@ TEXT ·gcmSm4Finish(SB),NOSPLIT,$0
MOVOU (tPtr), ACC0
MOVOU (tMsk), T2
MOVOU bswapMask<>(SB), BSWAP
MOVOU bswap_mask<>(SB), BSWAP
MOVOU gcmPoly<>(SB), POLY
SHLQ $3, plen
@ -277,7 +272,7 @@ TEXT ·gcmSm4Data(SB),NOSPLIT,$0
PXOR ACC0, ACC0
// MOVOU (tPtr), ACC0 // originally we passed in tag initial value
MOVOU bswapMask<>(SB), BSWAP
MOVOU bswap_mask<>(SB), BSWAP
MOVOU gcmPoly<>(SB), POLY
TESTQ autLen, autLen
@ -525,7 +520,7 @@ TEXT ·gcmSm4Enc(SB),0,$256-96
CMPB ·useAVX(SB), $1
JE avxGcmSm4Enc
MOVOU bswapMask<>(SB), BSWAP
MOVOU bswap_mask<>(SB), BSWAP
MOVOU gcmPoly<>(SB), POLY
MOVOU (tPtr), ACC0
@ -868,7 +863,7 @@ gcmSm4EncDone:
RET
avxGcmSm4Enc:
VMOVDQU bswapMask<>(SB), BSWAP
VMOVDQU bswap_mask<>(SB), BSWAP
VMOVDQU gcmPoly<>(SB), POLY
VMOVDQU (tPtr), ACC0
@ -1196,7 +1191,7 @@ avxGcmSm4EncDone:
RET
avx2GcmSm4Enc:
VMOVDQU bswapMask<>(SB), BSWAP
VMOVDQU bswap_mask<>(SB), BSWAP
VMOVDQU gcmPoly<>(SB), POLY
VMOVDQU (tPtr), ACC0
@ -1229,7 +1224,7 @@ avx2GcmSm4Enc:
VMOVDQU T0, (8*16 + 7*16)(SP)
increment(7)
VBROADCASTI128 bswapMask<>(SB), DWBSWAP
VBROADCASTI128 bswap_mask<>(SB), DWBSWAP
// load 8 ctrs for encryption
VMOVDQU (4*32 + 0*32)(SP), DWB0
VMOVDQU (4*32 + 1*32)(SP), DWB1
@ -1631,7 +1626,7 @@ TEXT ·gcmSm4Dec(SB),0,$128-96
CMPB ·useAVX(SB), $1
JE avxGcmSm4Dec
MOVOU bswapMask<>(SB), BSWAP
MOVOU bswap_mask<>(SB), BSWAP
MOVOU gcmPoly<>(SB), POLY
MOVOU (tPtr), ACC0
@ -1859,7 +1854,7 @@ gcmSm4DecDone:
RET
avxGcmSm4Dec:
VMOVDQU bswapMask<>(SB), BSWAP
VMOVDQU bswap_mask<>(SB), BSWAP
VMOVDQU gcmPoly<>(SB), POLY
VMOVDQU (tPtr), ACC0
@ -2082,7 +2077,7 @@ avxGcmSm4DecDone:
RET
avx2GcmSm4Dec:
VMOVDQU bswapMask<>(SB), BSWAP
VMOVDQU bswap_mask<>(SB), BSWAP
VMOVDQU gcmPoly<>(SB), POLY
VMOVDQU (tPtr), ACC0
@ -2114,7 +2109,7 @@ avx2GcmSm4Dec:
VMOVDQU T0, (7*16)(SP)
increment(7)
VBROADCASTI128 bswapMask<>(SB), DWBSWAP
VBROADCASTI128 bswap_mask<>(SB), DWBSWAP
avx2GcmSm4DecOctetsLoop:
CMPQ ptxLen, $128

View File

@ -43,3 +43,19 @@ type ctrAble interface {
type gcmAble interface {
NewGCM(nonceSize, tagSize int) (cipher.AEAD, error)
}
// xtsEncAble is an interface implemented by ciphers that have a specific
// optimized implementation of XTS encryption, like sm4.
// NewXTSEncrypter will check for this interface and return the specific
// BlockMode if found.
type xtsEncAble interface {
NewXTSEncrypter(encryptedTweak *[BlockSize]byte, isGB bool) cipher.BlockMode
}
// xtsDecAble is an interface implemented by ciphers that have a specific
// optimized implementation of XTS encryption, like sm4.
// NewXTSDecrypter will check for this interface and return the specific
// BlockMode if found.
type xtsDecAble interface {
NewXTSDecrypter(encryptedTweak *[BlockSize]byte, isGB bool) cipher.BlockMode
}

81
sm4/sm4_xts.go Normal file
View File

@ -0,0 +1,81 @@
//go:build amd64 && !purego
// +build amd64,!purego
package sm4
import (
"crypto/cipher"
"github.com/emmansun/gmsm/internal/alias"
)
// Assert that sm4CipherAsm implements the xtsEncAble and xtsDecAble interfaces.
var _ xtsEncAble = (*sm4CipherAsm)(nil)
var _ xtsDecAble = (*sm4CipherAsm)(nil)
const xtsEncrypt = 1
const xtsDecrypt = 0
type xts struct {
b *sm4CipherAsm
tweak [BlockSize]byte
isGB bool // if true, follows GB/T 17964-2021
enc int
}
func (b *sm4CipherAsm) NewXTSEncrypter(encryptedTweak *[BlockSize]byte, isGB bool) cipher.BlockMode {
var c xts
c.b = b
c.enc = xtsEncrypt
c.isGB = isGB
copy(c.tweak[:], encryptedTweak[:])
return &c
}
func (b *sm4CipherAsm) NewXTSDecrypter(encryptedTweak *[BlockSize]byte, isGB bool) cipher.BlockMode {
var c xts
c.b = b
c.enc = xtsDecrypt
c.isGB = isGB
copy(c.tweak[:], encryptedTweak[:])
return &c
}
func (x *xts) BlockSize() int { return BlockSize }
//go:noescape
func encryptSm4Xts(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
//go:noescape
func encryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
//go:noescape
func decryptSm4Xts(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
//go:noescape
func decryptSm4XtsGB(xk *uint32, tweak *[BlockSize]byte, dst, src []byte)
func (x *xts) CryptBlocks(dst, src []byte) {
if len(dst) < len(src) {
panic("xts: dst is smaller than src")
}
if len(src) < BlockSize {
panic("xts: src length is smaller than the block size")
}
if alias.InexactOverlap(dst[:len(src)], src) {
panic("xts: invalid buffer overlap")
}
if x.enc == xtsEncrypt {
if x.isGB {
encryptSm4XtsGB(&x.b.enc[0], &x.tweak, dst, src)
} else {
encryptSm4Xts(&x.b.enc[0], &x.tweak, dst, src)
}
} else {
if x.isGB {
decryptSm4XtsGB(&x.b.dec[0], &x.tweak, dst, src)
} else {
decryptSm4Xts(&x.b.dec[0], &x.tweak, dst, src)
}
}
}

1889
sm4/xts_amd64.s Normal file

File diff suppressed because it is too large Load Diff