kdf-sm3: add (8+4+1) test cases and reduce last round instructions

This commit is contained in:
Sun Yimin 2024-05-24 17:44:17 +08:00 committed by GitHub
parent 3ede319900
commit 238c0a3634
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 62 additions and 52 deletions

View File

@ -422,7 +422,8 @@ func TestKdf(t *testing.T) {
{"sm3 case 5", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 128}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb"},
{"sm3 case 6", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 159}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e8"},
{"sm3 case 7", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 300}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a9"},
{"sm3 case 8", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 516}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343ec8ab3d0cd9491a146e1b6033c512399bcd1cb9568d4f10d582f145c3ad7aae4ace7a14ec0abf831edc5aabcf58a1fb05180fa6e79651aa8753ddbf3ca0877b9a9d745ae1729b253f61cfc726cba4c9113008187830e41d428ca223014c994f317998689"},
{"sm3 case 8", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 416}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343"},
{"sm3 case 9", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 516}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343ec8ab3d0cd9491a146e1b6033c512399bcd1cb9568d4f10d582f145c3ad7aae4ace7a14ec0abf831edc5aabcf58a1fb05180fa6e79651aa8753ddbf3ca0877b9a9d745ae1729b253f61cfc726cba4c9113008187830e41d428ca223014c994f317998689"},
}
for _, tt := range tests {
wantBytes, _ := hex.DecodeString(tt.want)

View File

@ -81,12 +81,6 @@ GLOBL r08_mask<>(SB), 8, $32
VPERM2I128 $0x31, r6, tmp4, r6; \ // r6 = [w62, w54, w46, w38, w30, w22, w14, w6]
VPERM2I128 $0x31, r7, tmp3, r7; \ // r7 = [w63, w55, w47, w39, w31, w23, w15, w7]
// xorm (mem), reg
// xor reg to mem using reg-mem xor and store
#define xorm(P1, P2) \
VPXOR P1, P2, P2; \
VMOVDQU P2, P1
// store 256 bits
#define storeWord(W, j) VMOVDQU W, (256+(j)*32)(BX)
// load 256 bits
@ -329,6 +323,7 @@ loop:
prepare8Words(0)
prepare8Words(1)
// Need to load state again due to YMM registers are used in prepare8Words
loadState
ROUND_00_11(0, a, b, c, d, e, f, g, h)
@ -398,15 +393,19 @@ loop:
ROUND_16_63(62, c, d, e, f, g, h, a, b)
ROUND_16_63(63, b, c, d, e, f, g, h, a)
xorm( 0(BX), a)
xorm( 32(BX), b)
xorm( 64(BX), c)
xorm( 96(BX), d)
xorm( 128(BX), e)
xorm( 160(BX), f)
xorm( 192(BX), g)
xorm(224(BX), h)
VPXOR (0*32)(BX), a, a
VPXOR (1*32)(BX), b, b
VPXOR (2*32)(BX), c, c
VPXOR (3*32)(BX), d, d
VPXOR (4*32)(BX), e, e
VPXOR (5*32)(BX), f, f
VPXOR (6*32)(BX), g, g
VPXOR (7*32)(BX), h, h
DECQ DX
JZ end
saveState
LEAQ 64(srcPtr1), srcPtr1
LEAQ 64(srcPtr2), srcPtr2
LEAQ 64(srcPtr3), srcPtr3
@ -416,9 +415,9 @@ loop:
LEAQ 64(srcPtr7), srcPtr7
LEAQ 64(srcPtr8), srcPtr8
DECQ DX
JNZ loop
JMP loop
end:
TRANSPOSE_MATRIX(a, b, c, d, e, f, g, h, TMP1, TMP2, TMP3, TMP4)
// save state

View File

@ -79,13 +79,6 @@ GLOBL r08_mask<>(SB), 8, $16
MOVOU g, 96(BX) \
MOVOU h, 112(BX)
// xorm (mem), reg
// Xor reg to mem using reg-mem xor and store
#define xorm(P1, P2) \
MOVOU P1, tmp1; \
PXOR tmp1, P2; \
MOVOU P2, P1
#define storeWord(W, j) MOVOU W, (128+(j)*16)(BX)
#define loadWord(W, i) MOVOU (128+(i)*16)(BX), W
@ -235,12 +228,6 @@ GLOBL r08_mask<>(SB), 8, $16
VPUNPCKHQDQ r2, tmp2, r3; \ // r3 = [w31, w27, w15, w7, w27, w19, w11, w3] r3 = [w15, w11, w7, w3]
VPUNPCKLQDQ r2, tmp2, r2 // r2 = [w30, w22, w14, w6, w26, w18, w10, w2] r2 = [w14, w10, w6, w2]
// avxXorm (mem), reg
// Xor reg to mem using reg-mem xor and store
#define avxXorm(P1, P2) \
VPXOR P1, P2, P2; \
VMOVDQU P2, P1
#define avxStoreWord(W, j) VMOVDQU W, (128+(j)*16)(BX)
#define avxLoadWord(W, i) VMOVDQU (128+(i)*16)(BX), W
@ -472,23 +459,34 @@ loop:
ROUND_16_63(62, c, d, e, f, g, h, a, b)
ROUND_16_63(63, b, c, d, e, f, g, h, a)
xorm( 0(BX), a)
xorm( 16(BX), b)
xorm( 32(BX), c)
xorm( 48(BX), d)
xorm( 64(BX), e)
xorm( 80(BX), f)
xorm( 96(BX), g)
xorm(112(BX), h)
MOVOU (0*16)(BX), tmp1
PXOR tmp1, a
MOVOU (1*16)(BX), tmp1
PXOR tmp1, b
MOVOU (2*16)(BX), tmp1
PXOR tmp1, c
MOVOU (3*16)(BX), tmp1
PXOR tmp1, d
MOVOU (4*16)(BX), tmp1
PXOR tmp1, e
MOVOU (5*16)(BX), tmp1
PXOR tmp1, f
MOVOU (6*16)(BX), tmp1
PXOR tmp1, g
MOVOU (7*16)(BX), tmp1
PXOR tmp1, h
DECQ DX
JZ end
storeState
LEAQ 64(R8), R8
LEAQ 64(R9), R9
LEAQ 64(R10), R10
LEAQ 64(R11), R11
JMP loop
DECQ DX
JNZ loop
end:
// transpose state
SSE_TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
SSE_TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)
@ -616,23 +614,35 @@ avxLoop:
AVX_ROUND_16_63(62, c, d, e, f, g, h, a, b)
AVX_ROUND_16_63(63, b, c, d, e, f, g, h, a)
avxXorm( 0(BX), a)
avxXorm( 16(BX), b)
avxXorm( 32(BX), c)
avxXorm( 48(BX), d)
avxXorm( 64(BX), e)
avxXorm( 80(BX), f)
avxXorm( 96(BX), g)
avxXorm(112(BX), h)
VPXOR (0*16)(BX), a, a
VPXOR (1*16)(BX), b, b
VPXOR (2*16)(BX), c, c
VPXOR (3*16)(BX), d, d
VPXOR (4*16)(BX), e, e
VPXOR (5*16)(BX), f, f
VPXOR (6*16)(BX), g, g
VPXOR (7*16)(BX), h, h
DECQ DX
JZ avxEnd
// store current state
VMOVDQU a, (0*16)(BX)
VMOVDQU b, (1*16)(BX)
VMOVDQU c, (2*16)(BX)
VMOVDQU d, (3*16)(BX)
VMOVDQU e, (4*16)(BX)
VMOVDQU f, (5*16)(BX)
VMOVDQU g, (6*16)(BX)
VMOVDQU h, (7*16)(BX)
LEAQ 64(R8), R8
LEAQ 64(R9), R9
LEAQ 64(R10), R10
LEAQ 64(R11), R11
JMP avxLoop
DECQ DX
JNZ avxLoop
avxEnd:
// transpose state
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)