mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
sm3: ppc64x change T handling method #267
This commit is contained in:
parent
d263ac5889
commit
8f45f4efcb
@ -5,7 +5,6 @@
|
|||||||
//go:build (ppc64 || ppc64le) && !purego
|
//go:build (ppc64 || ppc64le) && !purego
|
||||||
|
|
||||||
#include "textflag.h"
|
#include "textflag.h"
|
||||||
#include "sm3_const_asm.s"
|
|
||||||
|
|
||||||
#define a V0
|
#define a V0
|
||||||
#define e V1
|
#define e V1
|
||||||
@ -15,6 +14,10 @@
|
|||||||
#define g V5
|
#define g V5
|
||||||
#define d V6
|
#define d V6
|
||||||
#define h V7
|
#define h V7
|
||||||
|
#define T0 V8
|
||||||
|
#define T1 V9
|
||||||
|
#define T2 V10
|
||||||
|
#define ONE V11
|
||||||
#define TMP0 V12
|
#define TMP0 V12
|
||||||
#define TMP1 V13
|
#define TMP1 V13
|
||||||
#define TMP2 V14
|
#define TMP2 V14
|
||||||
@ -25,6 +28,12 @@
|
|||||||
// For instruction emulation
|
// For instruction emulation
|
||||||
#define ESPERMW V31 // Endian swapping permute into BE
|
#define ESPERMW V31 // Endian swapping permute into BE
|
||||||
|
|
||||||
|
DATA t_const<>+0x00(SB)/8, $0x79cc451979cc4519
|
||||||
|
DATA t_const<>+0x08(SB)/8, $0x79cc451979cc4519
|
||||||
|
DATA t_const<>+0x10(SB)/8, $0x9d8a7a879d8a7a87
|
||||||
|
DATA t_const<>+0x18(SB)/8, $0x9d8a7a879d8a7a87
|
||||||
|
GLOBL t_const<>(SB), RODATA, $32
|
||||||
|
|
||||||
#define R_x08 R15
|
#define R_x08 R15
|
||||||
#define R_x10 R16
|
#define R_x10 R16
|
||||||
#define R_x18 R17
|
#define R_x18 R17
|
||||||
@ -83,20 +92,11 @@
|
|||||||
XXPERMDI TMP2, TMP3, $0, T1; \
|
XXPERMDI TMP2, TMP3, $0, T1; \
|
||||||
XXPERMDI TMP2, TMP3, $3, T3
|
XXPERMDI TMP2, TMP3, $3, T3
|
||||||
|
|
||||||
// Load constant T, How to simlify it?
|
#define ROUND_00_11(index, T, a, b, c, d, e, f, g, h) \
|
||||||
// Solution 1: big constant table like sha256block_ppc64x.s
|
|
||||||
// Solution 2: 2 constant T, rotate shift left one bit every time
|
|
||||||
// Solution 1's performance is better but it uses more memory.
|
|
||||||
#define LOAD_T(index, const, target) \
|
|
||||||
MOVD $const, R_TMP \
|
|
||||||
MTVSRWZ R_TMP, target \
|
|
||||||
VSPLTW $1, target, target
|
|
||||||
|
|
||||||
#define ROUND_00_11(index, const, a, b, c, d, e, f, g, h) \
|
|
||||||
PROLD(a, TMP0, 12) \
|
PROLD(a, TMP0, 12) \
|
||||||
VOR TMP0, TMP0, TMP1 \
|
VOR TMP0, TMP0, TMP1 \
|
||||||
LOAD_T(index, const, TMP2) \
|
VADDUWM T, TMP0, TMP0 \
|
||||||
VADDUWM TMP2, TMP0, TMP0 \
|
VRLW T, ONE, T \
|
||||||
VADDUWM e, TMP0, TMP0 \
|
VADDUWM e, TMP0, TMP0 \
|
||||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||||
VXOR TMP2, TMP1, TMP0 \ // TMP0 = SS2
|
VXOR TMP2, TMP1, TMP0 \ // TMP0 = SS2
|
||||||
@ -141,16 +141,16 @@
|
|||||||
STXVW4X TMP1, (wordPtr) \
|
STXVW4X TMP1, (wordPtr) \
|
||||||
ADD $16, wordPtr \
|
ADD $16, wordPtr \
|
||||||
|
|
||||||
#define ROUND_12_15(index, const, a, b, c, d, e, f, g, h) \
|
#define ROUND_12_15(index, T, a, b, c, d, e, f, g, h) \
|
||||||
MESSAGE_SCHEDULE(index) \
|
MESSAGE_SCHEDULE(index) \
|
||||||
ROUND_00_11(index, const, a, b, c, d, e, f, g, h)
|
ROUND_00_11(index, T, a, b, c, d, e, f, g, h)
|
||||||
|
|
||||||
#define ROUND_16_63(index, const, a, b, c, d, e, f, g, h) \
|
#define ROUND_16_63(index, T, a, b, c, d, e, f, g, h) \
|
||||||
MESSAGE_SCHEDULE(index) \ // TMP1 is Wt+4 now, Pls do not use it
|
MESSAGE_SCHEDULE(index) \ // TMP1 is Wt+4 now, Pls do not use it
|
||||||
PROLD(a, TMP0, 12) \
|
PROLD(a, TMP0, 12) \
|
||||||
VOR TMP0, TMP0, TMP4 \
|
VOR TMP0, TMP0, TMP4 \
|
||||||
LOAD_T(index, const, TMP2) \
|
VADDUWM T, TMP0, TMP0 \
|
||||||
VADDUWM TMP2, TMP0, TMP0 \
|
VRLW T, ONE, T \
|
||||||
VADDUWM e, TMP0, TMP0 \
|
VADDUWM e, TMP0, TMP0 \
|
||||||
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
PROLD(TMP0, TMP2, 7) \ // TMP2 = SS1
|
||||||
VXOR TMP2, TMP4, TMP0 \ // TMP0 = SS2
|
VXOR TMP2, TMP4, TMP0 \ // TMP0 = SS2
|
||||||
@ -188,6 +188,10 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0
|
|||||||
MOVD $·flip_mask(SB), R4
|
MOVD $·flip_mask(SB), R4
|
||||||
LVX (R4), ESPERMW
|
LVX (R4), ESPERMW
|
||||||
#endif
|
#endif
|
||||||
|
VSPLTISW $1, ONE
|
||||||
|
MOVD $t_const<>(SB), R4
|
||||||
|
LXVD2X (R0)(R4), T0
|
||||||
|
LXVD2X (R_x10)(R4), T1
|
||||||
#define digPtr R11
|
#define digPtr R11
|
||||||
#define srcPtrPtr R5
|
#define srcPtrPtr R5
|
||||||
#define statePtr R4
|
#define statePtr R4
|
||||||
@ -247,72 +251,74 @@ loop:
|
|||||||
prepare4Words
|
prepare4Words
|
||||||
prepare4Words
|
prepare4Words
|
||||||
|
|
||||||
ROUND_00_11(0, T0, a, b, c, d, e, f, g, h)
|
XXLOR T0, T0, T2
|
||||||
ROUND_00_11(1, T1, h, a, b, c, d, e, f, g)
|
ROUND_00_11(0, T2, a, b, c, d, e, f, g, h)
|
||||||
|
ROUND_00_11(1, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_00_11(2, T2, g, h, a, b, c, d, e, f)
|
ROUND_00_11(2, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_00_11(3, T3, f, g, h, a, b, c, d, e)
|
ROUND_00_11(3, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_00_11(4, T4, e, f, g, h, a, b, c, d)
|
ROUND_00_11(4, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_00_11(5, T5, d, e, f, g, h, a, b, c)
|
ROUND_00_11(5, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_00_11(6, T6, c, d, e, f, g, h, a, b)
|
ROUND_00_11(6, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_00_11(7, T7, b, c, d, e, f, g, h, a)
|
ROUND_00_11(7, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_00_11(8, T8, a, b, c, d, e, f, g, h)
|
ROUND_00_11(8, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_00_11(9, T9, h, a, b, c, d, e, f, g)
|
ROUND_00_11(9, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_00_11(10, T10, g, h, a, b, c, d, e, f)
|
ROUND_00_11(10, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_00_11(11, T11, f, g, h, a, b, c, d, e)
|
ROUND_00_11(11, T2, f, g, h, a, b, c, d, e)
|
||||||
|
|
||||||
ROUND_12_15(12, T12, e, f, g, h, a, b, c, d)
|
ROUND_12_15(12, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_12_15(13, T13, d, e, f, g, h, a, b, c)
|
ROUND_12_15(13, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_12_15(14, T14, c, d, e, f, g, h, a, b)
|
ROUND_12_15(14, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_12_15(15, T15, b, c, d, e, f, g, h, a)
|
ROUND_12_15(15, T2, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
ROUND_16_63(16, T16, a, b, c, d, e, f, g, h)
|
XXLOR T1, T1, T2
|
||||||
ROUND_16_63(17, T17, h, a, b, c, d, e, f, g)
|
ROUND_16_63(16, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(18, T18, g, h, a, b, c, d, e, f)
|
ROUND_16_63(17, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(19, T19, f, g, h, a, b, c, d, e)
|
ROUND_16_63(18, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(20, T20, e, f, g, h, a, b, c, d)
|
ROUND_16_63(19, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(21, T21, d, e, f, g, h, a, b, c)
|
ROUND_16_63(20, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(22, T22, c, d, e, f, g, h, a, b)
|
ROUND_16_63(21, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(23, T23, b, c, d, e, f, g, h, a)
|
ROUND_16_63(22, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(24, T24, a, b, c, d, e, f, g, h)
|
ROUND_16_63(23, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_16_63(25, T25, h, a, b, c, d, e, f, g)
|
ROUND_16_63(24, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(26, T26, g, h, a, b, c, d, e, f)
|
ROUND_16_63(25, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(27, T27, f, g, h, a, b, c, d, e)
|
ROUND_16_63(26, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(28, T28, e, f, g, h, a, b, c, d)
|
ROUND_16_63(27, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(29, T29, d, e, f, g, h, a, b, c)
|
ROUND_16_63(28, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(30, T30, c, d, e, f, g, h, a, b)
|
ROUND_16_63(29, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(31, T31, b, c, d, e, f, g, h, a)
|
ROUND_16_63(30, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(32, T32, a, b, c, d, e, f, g, h)
|
ROUND_16_63(31, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_16_63(33, T33, h, a, b, c, d, e, f, g)
|
ROUND_16_63(32, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(34, T34, g, h, a, b, c, d, e, f)
|
ROUND_16_63(33, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(35, T35, f, g, h, a, b, c, d, e)
|
ROUND_16_63(34, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(36, T36, e, f, g, h, a, b, c, d)
|
ROUND_16_63(35, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(37, T37, d, e, f, g, h, a, b, c)
|
ROUND_16_63(36, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(38, T38, c, d, e, f, g, h, a, b)
|
ROUND_16_63(37, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(39, T39, b, c, d, e, f, g, h, a)
|
ROUND_16_63(38, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(40, T40, a, b, c, d, e, f, g, h)
|
ROUND_16_63(39, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_16_63(41, T41, h, a, b, c, d, e, f, g)
|
ROUND_16_63(40, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(42, T42, g, h, a, b, c, d, e, f)
|
ROUND_16_63(41, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(43, T43, f, g, h, a, b, c, d, e)
|
ROUND_16_63(42, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(44, T44, e, f, g, h, a, b, c, d)
|
ROUND_16_63(43, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(45, T45, d, e, f, g, h, a, b, c)
|
ROUND_16_63(44, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(46, T46, c, d, e, f, g, h, a, b)
|
ROUND_16_63(45, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(47, T47, b, c, d, e, f, g, h, a)
|
ROUND_16_63(46, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(48, T16, a, b, c, d, e, f, g, h)
|
ROUND_16_63(47, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_16_63(49, T17, h, a, b, c, d, e, f, g)
|
ROUND_16_63(48, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(50, T18, g, h, a, b, c, d, e, f)
|
ROUND_16_63(49, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(51, T19, f, g, h, a, b, c, d, e)
|
ROUND_16_63(50, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(52, T20, e, f, g, h, a, b, c, d)
|
ROUND_16_63(51, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(53, T21, d, e, f, g, h, a, b, c)
|
ROUND_16_63(52, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(54, T22, c, d, e, f, g, h, a, b)
|
ROUND_16_63(53, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(55, T23, b, c, d, e, f, g, h, a)
|
ROUND_16_63(54, T2, c, d, e, f, g, h, a, b)
|
||||||
ROUND_16_63(56, T24, a, b, c, d, e, f, g, h)
|
ROUND_16_63(55, T2, b, c, d, e, f, g, h, a)
|
||||||
ROUND_16_63(57, T25, h, a, b, c, d, e, f, g)
|
ROUND_16_63(56, T2, a, b, c, d, e, f, g, h)
|
||||||
ROUND_16_63(58, T26, g, h, a, b, c, d, e, f)
|
ROUND_16_63(57, T2, h, a, b, c, d, e, f, g)
|
||||||
ROUND_16_63(59, T27, f, g, h, a, b, c, d, e)
|
ROUND_16_63(58, T2, g, h, a, b, c, d, e, f)
|
||||||
ROUND_16_63(60, T28, e, f, g, h, a, b, c, d)
|
ROUND_16_63(59, T2, f, g, h, a, b, c, d, e)
|
||||||
ROUND_16_63(61, T29, d, e, f, g, h, a, b, c)
|
ROUND_16_63(60, T2, e, f, g, h, a, b, c, d)
|
||||||
ROUND_16_63(62, T30, c, d, e, f, g, h, a, b)
|
ROUND_16_63(61, T2, d, e, f, g, h, a, b, c)
|
||||||
ROUND_16_63(63, T31, b, c, d, e, f, g, h, a)
|
ROUND_16_63(62, T2, c, d, e, f, g, h, a, b)
|
||||||
|
ROUND_16_63(63, T2, b, c, d, e, f, g, h, a)
|
||||||
|
|
||||||
XXLXOR V0, VS24, V0
|
XXLXOR V0, VS24, V0
|
||||||
XXLXOR V1, VS25, V1
|
XXLXOR V1, VS25, V1
|
||||||
|
Loading…
x
Reference in New Issue
Block a user