mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
kdf-sm3: add (8+4+1) test cases and reduce last round instructions
This commit is contained in:
parent
3ede319900
commit
238c0a3634
@ -422,7 +422,8 @@ func TestKdf(t *testing.T) {
|
|||||||
{"sm3 case 5", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 128}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb"},
|
{"sm3 case 5", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 128}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb"},
|
||||||
{"sm3 case 6", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 159}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e8"},
|
{"sm3 case 6", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 159}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e8"},
|
||||||
{"sm3 case 7", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 300}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a9"},
|
{"sm3 case 7", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 300}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a9"},
|
||||||
{"sm3 case 8", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 516}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343ec8ab3d0cd9491a146e1b6033c512399bcd1cb9568d4f10d582f145c3ad7aae4ace7a14ec0abf831edc5aabcf58a1fb05180fa6e79651aa8753ddbf3ca0877b9a9d745ae1729b253f61cfc726cba4c9113008187830e41d428ca223014c994f317998689"},
|
{"sm3 case 8", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 416}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343"},
|
||||||
|
{"sm3 case 9", args{[]byte("708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493708993ef1388a0ae4245a19bb6c02554c632633e356ddb989beb804fda96cfd47eba4fa460e7b277bc6b4ce4d07ed493"), 516}, "49cf14649f324a07e0d5bb2a00f7f05d5f5bdd6d14dff028e071327ec031104590eddb18f98b763e18bf382ff7c3875f30277f3179baebd795e7853fa643fdf280d8d7b81a2ab7829f615e132ab376d32194cd315908d27090e1180ce442d9be99322523db5bfac40ac5acb03550f5c93e5b01b1d71f2630868909a6a1250edb9abb2c6b0673e349f64c6577d4ba1b0a9c360016bae9478f8a80d5426327e84ea915c10ef39a016618b00aaae8735a8a1405180746ddd7ccd05dc890c5e5d07f49c40afdbc09267859ac5967b8c1163dc6defab955604e45e349a51df11d81b298424b84472607249a05b481ae88d98a9273ecdee009add0619641bd7d9f0b13a502e36e67b5836d0480a518a01046fa2738698fbe5e5008de11704b45531532667896158158ea08847a55a93f26fd6c99111b3017db1d1e6025d28d88ed3a419eb9c72e4fa3267f19c806092fd80cb91079cc00cefc55db53ad840ed1e6384f4cf02d9f2ecbaed54391e7a6da71fca4ea53ccfdd4d85adf37e4be8af1324f43ee402f109ac6a77915fd7e248d3f14f3698dd0e8ea7ea27e4288b288d75b4343ec8ab3d0cd9491a146e1b6033c512399bcd1cb9568d4f10d582f145c3ad7aae4ace7a14ec0abf831edc5aabcf58a1fb05180fa6e79651aa8753ddbf3ca0877b9a9d745ae1729b253f61cfc726cba4c9113008187830e41d428ca223014c994f317998689"},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
wantBytes, _ := hex.DecodeString(tt.want)
|
wantBytes, _ := hex.DecodeString(tt.want)
|
||||||
|
@ -81,12 +81,6 @@ GLOBL r08_mask<>(SB), 8, $32
|
|||||||
VPERM2I128 $0x31, r6, tmp4, r6; \ // r6 = [w62, w54, w46, w38, w30, w22, w14, w6]
|
VPERM2I128 $0x31, r6, tmp4, r6; \ // r6 = [w62, w54, w46, w38, w30, w22, w14, w6]
|
||||||
VPERM2I128 $0x31, r7, tmp3, r7; \ // r7 = [w63, w55, w47, w39, w31, w23, w15, w7]
|
VPERM2I128 $0x31, r7, tmp3, r7; \ // r7 = [w63, w55, w47, w39, w31, w23, w15, w7]
|
||||||
|
|
||||||
// xorm (mem), reg
|
|
||||||
// xor reg to mem using reg-mem xor and store
|
|
||||||
#define xorm(P1, P2) \
|
|
||||||
VPXOR P1, P2, P2; \
|
|
||||||
VMOVDQU P2, P1
|
|
||||||
|
|
||||||
// store 256 bits
|
// store 256 bits
|
||||||
#define storeWord(W, j) VMOVDQU W, (256+(j)*32)(BX)
|
#define storeWord(W, j) VMOVDQU W, (256+(j)*32)(BX)
|
||||||
// load 256 bits
|
// load 256 bits
|
||||||
@ -329,6 +323,7 @@ loop:
|
|||||||
prepare8Words(0)
|
prepare8Words(0)
|
||||||
prepare8Words(1)
|
prepare8Words(1)
|
||||||
|
|
||||||
|
// Need to load state again due to YMM registers are used in prepare8Words
|
||||||
loadState
|
loadState
|
||||||
|
|
||||||
ROUND_00_11(0, a, b, c, d, e, f, g, h)
|
ROUND_00_11(0, a, b, c, d, e, f, g, h)
|
||||||
@ -398,15 +393,19 @@ loop:
|
|||||||
ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
xorm( 0(BX), a)
|
VPXOR (0*32)(BX), a, a
|
||||||
xorm( 32(BX), b)
|
VPXOR (1*32)(BX), b, b
|
||||||
xorm( 64(BX), c)
|
VPXOR (2*32)(BX), c, c
|
||||||
xorm( 96(BX), d)
|
VPXOR (3*32)(BX), d, d
|
||||||
xorm( 128(BX), e)
|
VPXOR (4*32)(BX), e, e
|
||||||
xorm( 160(BX), f)
|
VPXOR (5*32)(BX), f, f
|
||||||
xorm( 192(BX), g)
|
VPXOR (6*32)(BX), g, g
|
||||||
xorm(224(BX), h)
|
VPXOR (7*32)(BX), h, h
|
||||||
|
|
||||||
|
DECQ DX
|
||||||
|
JZ end
|
||||||
|
|
||||||
|
saveState
|
||||||
LEAQ 64(srcPtr1), srcPtr1
|
LEAQ 64(srcPtr1), srcPtr1
|
||||||
LEAQ 64(srcPtr2), srcPtr2
|
LEAQ 64(srcPtr2), srcPtr2
|
||||||
LEAQ 64(srcPtr3), srcPtr3
|
LEAQ 64(srcPtr3), srcPtr3
|
||||||
@ -416,9 +415,9 @@ loop:
|
|||||||
LEAQ 64(srcPtr7), srcPtr7
|
LEAQ 64(srcPtr7), srcPtr7
|
||||||
LEAQ 64(srcPtr8), srcPtr8
|
LEAQ 64(srcPtr8), srcPtr8
|
||||||
|
|
||||||
DECQ DX
|
JMP loop
|
||||||
JNZ loop
|
|
||||||
|
|
||||||
|
end:
|
||||||
TRANSPOSE_MATRIX(a, b, c, d, e, f, g, h, TMP1, TMP2, TMP3, TMP4)
|
TRANSPOSE_MATRIX(a, b, c, d, e, f, g, h, TMP1, TMP2, TMP3, TMP4)
|
||||||
|
|
||||||
// save state
|
// save state
|
||||||
|
@ -79,13 +79,6 @@ GLOBL r08_mask<>(SB), 8, $16
|
|||||||
MOVOU g, 96(BX) \
|
MOVOU g, 96(BX) \
|
||||||
MOVOU h, 112(BX)
|
MOVOU h, 112(BX)
|
||||||
|
|
||||||
// xorm (mem), reg
|
|
||||||
// Xor reg to mem using reg-mem xor and store
|
|
||||||
#define xorm(P1, P2) \
|
|
||||||
MOVOU P1, tmp1; \
|
|
||||||
PXOR tmp1, P2; \
|
|
||||||
MOVOU P2, P1
|
|
||||||
|
|
||||||
#define storeWord(W, j) MOVOU W, (128+(j)*16)(BX)
|
#define storeWord(W, j) MOVOU W, (128+(j)*16)(BX)
|
||||||
#define loadWord(W, i) MOVOU (128+(i)*16)(BX), W
|
#define loadWord(W, i) MOVOU (128+(i)*16)(BX), W
|
||||||
|
|
||||||
@ -235,12 +228,6 @@ GLOBL r08_mask<>(SB), 8, $16
|
|||||||
VPUNPCKHQDQ r2, tmp2, r3; \ // r3 = [w31, w27, w15, w7, w27, w19, w11, w3] r3 = [w15, w11, w7, w3]
|
VPUNPCKHQDQ r2, tmp2, r3; \ // r3 = [w31, w27, w15, w7, w27, w19, w11, w3] r3 = [w15, w11, w7, w3]
|
||||||
VPUNPCKLQDQ r2, tmp2, r2 // r2 = [w30, w22, w14, w6, w26, w18, w10, w2] r2 = [w14, w10, w6, w2]
|
VPUNPCKLQDQ r2, tmp2, r2 // r2 = [w30, w22, w14, w6, w26, w18, w10, w2] r2 = [w14, w10, w6, w2]
|
||||||
|
|
||||||
// avxXorm (mem), reg
|
|
||||||
// Xor reg to mem using reg-mem xor and store
|
|
||||||
#define avxXorm(P1, P2) \
|
|
||||||
VPXOR P1, P2, P2; \
|
|
||||||
VMOVDQU P2, P1
|
|
||||||
|
|
||||||
#define avxStoreWord(W, j) VMOVDQU W, (128+(j)*16)(BX)
|
#define avxStoreWord(W, j) VMOVDQU W, (128+(j)*16)(BX)
|
||||||
#define avxLoadWord(W, i) VMOVDQU (128+(i)*16)(BX), W
|
#define avxLoadWord(W, i) VMOVDQU (128+(i)*16)(BX), W
|
||||||
|
|
||||||
@ -472,23 +459,34 @@ loop:
|
|||||||
ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
xorm( 0(BX), a)
|
MOVOU (0*16)(BX), tmp1
|
||||||
xorm( 16(BX), b)
|
PXOR tmp1, a
|
||||||
xorm( 32(BX), c)
|
MOVOU (1*16)(BX), tmp1
|
||||||
xorm( 48(BX), d)
|
PXOR tmp1, b
|
||||||
xorm( 64(BX), e)
|
MOVOU (2*16)(BX), tmp1
|
||||||
xorm( 80(BX), f)
|
PXOR tmp1, c
|
||||||
xorm( 96(BX), g)
|
MOVOU (3*16)(BX), tmp1
|
||||||
xorm(112(BX), h)
|
PXOR tmp1, d
|
||||||
|
MOVOU (4*16)(BX), tmp1
|
||||||
|
PXOR tmp1, e
|
||||||
|
MOVOU (5*16)(BX), tmp1
|
||||||
|
PXOR tmp1, f
|
||||||
|
MOVOU (6*16)(BX), tmp1
|
||||||
|
PXOR tmp1, g
|
||||||
|
MOVOU (7*16)(BX), tmp1
|
||||||
|
PXOR tmp1, h
|
||||||
|
|
||||||
|
DECQ DX
|
||||||
|
JZ end
|
||||||
|
|
||||||
|
storeState
|
||||||
LEAQ 64(R8), R8
|
LEAQ 64(R8), R8
|
||||||
LEAQ 64(R9), R9
|
LEAQ 64(R9), R9
|
||||||
LEAQ 64(R10), R10
|
LEAQ 64(R10), R10
|
||||||
LEAQ 64(R11), R11
|
LEAQ 64(R11), R11
|
||||||
|
JMP loop
|
||||||
|
|
||||||
DECQ DX
|
end:
|
||||||
JNZ loop
|
|
||||||
|
|
||||||
// transpose state
|
// transpose state
|
||||||
SSE_TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
|
SSE_TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
|
||||||
SSE_TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)
|
SSE_TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)
|
||||||
@ -616,23 +614,35 @@ avxLoop:
|
|||||||
AVX_ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
AVX_ROUND_16_63(62, c, d, e, f, g, h, a, b)
|
||||||
AVX_ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
AVX_ROUND_16_63(63, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
avxXorm( 0(BX), a)
|
VPXOR (0*16)(BX), a, a
|
||||||
avxXorm( 16(BX), b)
|
VPXOR (1*16)(BX), b, b
|
||||||
avxXorm( 32(BX), c)
|
VPXOR (2*16)(BX), c, c
|
||||||
avxXorm( 48(BX), d)
|
VPXOR (3*16)(BX), d, d
|
||||||
avxXorm( 64(BX), e)
|
VPXOR (4*16)(BX), e, e
|
||||||
avxXorm( 80(BX), f)
|
VPXOR (5*16)(BX), f, f
|
||||||
avxXorm( 96(BX), g)
|
VPXOR (6*16)(BX), g, g
|
||||||
avxXorm(112(BX), h)
|
VPXOR (7*16)(BX), h, h
|
||||||
|
|
||||||
|
DECQ DX
|
||||||
|
JZ avxEnd
|
||||||
|
|
||||||
|
// store current state
|
||||||
|
VMOVDQU a, (0*16)(BX)
|
||||||
|
VMOVDQU b, (1*16)(BX)
|
||||||
|
VMOVDQU c, (2*16)(BX)
|
||||||
|
VMOVDQU d, (3*16)(BX)
|
||||||
|
VMOVDQU e, (4*16)(BX)
|
||||||
|
VMOVDQU f, (5*16)(BX)
|
||||||
|
VMOVDQU g, (6*16)(BX)
|
||||||
|
VMOVDQU h, (7*16)(BX)
|
||||||
|
|
||||||
LEAQ 64(R8), R8
|
LEAQ 64(R8), R8
|
||||||
LEAQ 64(R9), R9
|
LEAQ 64(R9), R9
|
||||||
LEAQ 64(R10), R10
|
LEAQ 64(R10), R10
|
||||||
LEAQ 64(R11), R11
|
LEAQ 64(R11), R11
|
||||||
|
JMP avxLoop
|
||||||
|
|
||||||
DECQ DX
|
avxEnd:
|
||||||
JNZ avxLoop
|
|
||||||
|
|
||||||
// transpose state
|
// transpose state
|
||||||
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
|
TRANSPOSE_MATRIX(a, b, c, d, tmp1, tmp2)
|
||||||
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)
|
TRANSPOSE_MATRIX(e, f, g, h, tmp1, tmp2)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user