mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
zuc: ppc64x genKeywordAsm 1
This commit is contained in:
parent
7cfcabad54
commit
0904554637
20
zuc/core_ppc64x.go
Normal file
20
zuc/core_ppc64x.go
Normal file
@ -0,0 +1,20 @@
|
||||
// Copyright 2024 Sun Yimin. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (ppc64 || ppc64le) && !purego
|
||||
|
||||
package zuc
|
||||
|
||||
// Generate single keyword, 4 bytes.
|
||||
//
|
||||
//go:noescape
|
||||
func genKeywordAsm(s *zucState32) uint32
|
||||
|
||||
// Generate multiple keywords, n*4 bytes.
|
||||
//
|
||||
//go:noescape
|
||||
func genKeyStreamAsm(keyStream []uint32, pState *zucState32)
|
||||
|
||||
//go:noescape
|
||||
func genKeyStreamRev32Asm(keyStream []byte, pState *zucState32)
|
294
zuc/core_ppc64x.s
Normal file
294
zuc/core_ppc64x.s
Normal file
@ -0,0 +1,294 @@
|
||||
// Copyright 2024 Sun Yimin. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (ppc64 || ppc64le) && !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA rcon<>+0x00(SB)/8, $0x0F0F0F0F0F0F0F0F // nibble mask
|
||||
DATA rcon<>+0x08(SB)/8, $0x0F0F0F0F0F0F0F0F
|
||||
DATA rcon<>+0x10(SB)/8, $0x691CA0D5B6C37F0A // affine transform matrix m1 low
|
||||
DATA rcon<>+0x18(SB)/8, $0x53269AEF8CF94530
|
||||
DATA rcon<>+0x20(SB)/8, $0x009837AF6CF45BC3 // affine transform matrix m1 high
|
||||
DATA rcon<>+0x28(SB)/8, $0xAB339C04C75FF068
|
||||
DATA rcon<>+0x30(SB)/8, $0x616EF1FE050A959A // affine transform matrix m2 low
|
||||
DATA rcon<>+0x38(SB)/8, $0xF5FA656A919E010E
|
||||
DATA rcon<>+0x40(SB)/8, $0x00A4E044CD692D89 // affine transform matrix m2 high
|
||||
DATA rcon<>+0x48(SB)/8, $0xA50145E168CC882C
|
||||
DATA rcon<>+0x50(SB)/8, $0x090F000E0F0F020A // P1
|
||||
DATA rcon<>+0x58(SB)/8, $0x0004000C07050309 // P1
|
||||
DATA rcon<>+0x60(SB)/8, $0x080D060507000C04 // P2
|
||||
DATA rcon<>+0x68(SB)/8, $0x0B010E0A0F030902 // P2
|
||||
DATA rcon<>+0x70(SB)/8, $0x02060A06000D0A0F // P3
|
||||
DATA rcon<>+0x78(SB)/8, $0x03030D0500090C0D // P3
|
||||
DATA rcon<>+0x70(SB)/8, $0x02060A06000D0A0F // P3
|
||||
DATA rcon<>+0x78(SB)/8, $0x03030D0500090C0D // P3
|
||||
DATA rcon<>+0x80(SB)/8, $0xff00ff00ff00ff00 // S0
|
||||
DATA rcon<>+0x88(SB)/8, $0xff00ff00ff00ff00
|
||||
DATA rcon<>+0x90(SB)/8, $0x00ff00ff00ff00ff // S1
|
||||
DATA rcon<>+0x98(SB)/8, $0x00ff00ff00ff00ff
|
||||
GLOBL rcon<>(SB), RODATA, $160
|
||||
|
||||
|
||||
#define M1L V20
|
||||
#define M1H V21
|
||||
#define M2L V22
|
||||
#define M2H V23
|
||||
#define V_FOUR V24
|
||||
#define NIBBLE_MASK V25
|
||||
#define S1_MASK V26
|
||||
#define S0_MASK V27
|
||||
#define P1 V28
|
||||
#define P2 V29
|
||||
#define P3 V30
|
||||
|
||||
#define LOAD_CONSTS \
|
||||
VSPLTISW $4, V_FOUR \
|
||||
MOVD $rcon<>+0x00(SB), R4 \
|
||||
LXVD2X (R4)(R0), NIBBLE_MASK \
|
||||
MOVD $16, R5 \
|
||||
LXVD2X (R4)(R5), M1L \
|
||||
MOVD $32, R5 \
|
||||
LXVD2X (R4)(R5), M1H \
|
||||
MOVD $48, R5 \
|
||||
LXVD2X (R4)(R5), M2L \
|
||||
MOVD $64, R5 \
|
||||
LXVD2X (R4)(R5), M2H \
|
||||
MOVD $80, R5 \
|
||||
LXVD2X (R4)(R5), P1 \
|
||||
MOVD $96, R5 \
|
||||
LXVD2X (R4)(R5), P2 \
|
||||
MOVD $112, R5 \
|
||||
LXVD2X (R4)(R5), P3 \
|
||||
MOVD $128, R5 \
|
||||
LXVD2X (R4)(R5), S0_MASK \
|
||||
MOVD $144, R5 \
|
||||
LXVD2X (R4)(R5), S1_MASK
|
||||
|
||||
#define S0_comput(IN_OUT, V_FOUR, XTMP1, XTMP2) \
|
||||
VSRW IN_OUT, V_FOUR, XTMP1; \
|
||||
VAND XTMP1, NIBBLE_MASK, XTMP1; \
|
||||
VAND IN_OUT, NIBBLE_MASK, IN_OUT; \
|
||||
VPERM P1, P1, IN_OUT, XTMP2; \
|
||||
VXOR XTMP1, XTMP2, XTMP2; \
|
||||
VPERM P2, P2, XTMP2, XTMP1; \
|
||||
VXOR IN_OUT, XTMP1, XTMP1; \
|
||||
VPERM P3, P3, XTMP1, IN_OUT; \
|
||||
VXOR XTMP2, IN_OUT, IN_OUT; \
|
||||
VSLW IN_OUT, V_FOUR, IN_OUT; \
|
||||
VXOR IN_OUT, XTMP1, IN_OUT; \
|
||||
VSPLTISB $5, XTMP1; \
|
||||
VRLB IN_OUT, XTMP1, IN_OUT
|
||||
|
||||
// Affine Transform
|
||||
// parameters:
|
||||
// - L: table low nibbles
|
||||
// - H: table high nibbles
|
||||
// - x: 128 bits register as sbox input/output data
|
||||
// - y: 128 bits temp register
|
||||
// - z: 128 bits temp register
|
||||
#define AFFINE_TRANSFORM(L, H, V_FOUR, x, y, z) \
|
||||
VAND NIBBLE_MASK, x, z; \
|
||||
VPERM L, L, z, y; \
|
||||
VSRD x, V_FOUR, x; \
|
||||
VAND NIBBLE_MASK, x, z; \
|
||||
VPERM H, H, z, x; \
|
||||
VXOR y, x, x
|
||||
|
||||
#define SHLDL(a, b, n) \ // NO SHLDL in GOLANG now
|
||||
SLW n, a, a \
|
||||
SRW n, b, b \
|
||||
OR b, a, a
|
||||
|
||||
// zuc sbox function
|
||||
// parameters:
|
||||
// - x: 128 bits register as sbox input/output data
|
||||
// - y: 128 bits temp register
|
||||
// - z: 128 bits temp register
|
||||
#define S1_comput(x, y, z) \
|
||||
AFFINE_TRANSFORM(M1L, M1H, V_FOUR, x, y, z); \
|
||||
VSBOX x, x; \
|
||||
AFFINE_TRANSFORM(M2L, M2H, V_FOUR, x, y, z)
|
||||
|
||||
#define OFFSET_FR1 (16*4)
|
||||
#define OFFSET_FR2 (17*4)
|
||||
#define OFFSET_BRC_X0 (18*4)
|
||||
#define OFFSET_BRC_X1 (19*4)
|
||||
#define OFFSET_BRC_X2 (20*4)
|
||||
#define OFFSET_BRC_X3 (21*4)
|
||||
|
||||
#define F_R1 R7
|
||||
#define F_R2 R8
|
||||
#define BRC_X0 R9
|
||||
#define BRC_X1 R10
|
||||
#define BRC_X2 R11
|
||||
#define BRC_X3 R12
|
||||
|
||||
#define BITS_REORG(idx, addr, tmpR1, tmpR2, tmpR3, tmpR4) \
|
||||
MOVWZ (((15 + idx) % 16)*4)(addr), BRC_X0 \
|
||||
MOVWZ (((14 + idx) % 16)*4)(addr), tmpR1 \
|
||||
MOVWZ (((11 + idx) % 16)*4)(addr), BRC_X1 \
|
||||
MOVWZ (((9 + idx) % 16)*4)(addr), tmpR2 \
|
||||
MOVWZ (((7 + idx) % 16)*4)(addr), BRC_X2 \
|
||||
MOVWZ (((5 + idx) % 16)*4)(addr), tmpR3 \
|
||||
MOVWZ (((2 + idx) % 16)*4)(addr), BRC_X4 \
|
||||
MOVWZ (((0 + idx) % 16)*4)(addr), tmpR4 \
|
||||
SRW $15, BRC_X0, BRC_X0 \
|
||||
SLW $16, tmpR1, tmpR1 \
|
||||
SLW $1, tmpR2, tmpR2 \
|
||||
SLW $1, tmpR3, tmpR3 \
|
||||
SLW $1, tmpR4, tmpR4 \
|
||||
SHLDL(BRC_X0, tmpR1, $16) \
|
||||
SHLDL(BRC_X1, tmpR2, $16) \
|
||||
SHLDL(BRC_X2, tmpR3, $16) \
|
||||
SHLDL(BRC_X4, tmpR4, $16)
|
||||
|
||||
#define LOAD_STATE(addr) \
|
||||
MOVWZ OFFSET_FR1(addr), F_R1 \
|
||||
MOVWZ OFFSET_FR2(addr), F_R2 \
|
||||
MOVWZ OFFSET_BRC_X0(addr), BRC_X0 \
|
||||
MOVWZ OFFSET_BRC_X1(addr), BRC_X1 \
|
||||
MOVWZ OFFSET_BRC_X2(addr), BRC_X2 \
|
||||
MOVWZ OFFSET_BRC_X3(addr), BRC_X3
|
||||
|
||||
#define SAVE_STATE(addr) \
|
||||
MOVW F_R1, OFFSET_FR1(addr) \
|
||||
MOVW F_R2, OFFSET_FR2(addr) \
|
||||
MOVW BRC_X0, OFFSET_BRC_X0(addr) \
|
||||
MOVW BRC_X1, OFFSET_BRC_X1(addr) \
|
||||
MOVW BRC_X2, OFFSET_BRC_X2(addr) \
|
||||
MOVW BRC_X3, OFFSET_BRC_X3(addr)
|
||||
|
||||
#define NONLIN_FUN(AX, BX, CX, DX) \
|
||||
XOR F_R1, BRC_X0, AX \ // F_R1 xor BRC_X0
|
||||
ADD F_R2, AX \ // W = (F_R1 xor BRC_X1) + F_R2
|
||||
ADD BRC_X1, F_R1 \ // W1= F_R1 + BRC_X1
|
||||
XOR BRC_X2, F_R2 \ // W2= F_R2 ^ BRC_X2
|
||||
\
|
||||
SLW $16, F_R1, DX \
|
||||
SRW $16, F_R2, CX \
|
||||
OR CX, DX \ // P = (W1 << 16) | (W2 >> 16)
|
||||
SHLDL(F_R2, F_R1, $16) \ // Q = (W2 << 16) | (W1 >> 16)
|
||||
ROTLW $2, DX, BX \ // start L1
|
||||
ROTLW $24, DX, CX \
|
||||
XOR CX, DX \
|
||||
XOR BX, DX \
|
||||
ROTLW $8, BX \
|
||||
XOR BX, DX \
|
||||
ROTLW $8, BX \
|
||||
XOR BX, DX, BX \ // U = L1(P) = EDX, hi(RDX)=0
|
||||
RLDICL $0, BX, $32, DX \ // make sure hi(RDX)=0
|
||||
ROTLW $8, F_R2, BX \
|
||||
ROTLW $14, F_R2, CX \
|
||||
XOR BX, F_R2 \
|
||||
XOR CX, F_R2 \
|
||||
ROTLW $8, CX \
|
||||
XOR CX, F_R2 \
|
||||
ROTLW $8, CX \
|
||||
XOR CX, F_R2 \ // V = L2(Q) = R11D, hi(R11)=0
|
||||
SLD $32, F_R2 \ // DX = V || U
|
||||
XOR F_R2, DX \
|
||||
MTVSRD DX, V0 \ // save V || U to V0
|
||||
S0_comput(V0, V_FOUR, V1, V2) \
|
||||
S1_comput(V0, V1, V2) \
|
||||
VAND S1_MASK, V0, V0 \
|
||||
VAND S0_MASK, V1, V1 \
|
||||
VXOR V0, V1, V0 \
|
||||
MFVSRD V0, DX \
|
||||
SRD $32, DX, F_R1 \
|
||||
MOVWZ DX, F_R2
|
||||
|
||||
#define LFSR_UPDT(idx, addr, W, tmpR1, tmpR2, tmpR3, tmpR4 ) \
|
||||
MOVWZ (((0 + idx) % 16)*4)(addr), tmpR1 \
|
||||
MOVWZ (((4 + idx) % 16)*4)(addr), tmpR2 \
|
||||
MOVWZ (((10 + idx) % 16)*4)(addr), tmpR3 \
|
||||
MOVWZ (((13 + idx) % 16)*4)(addr), tmpR4 \
|
||||
\ // Calculate 64-bit LFSR feedback
|
||||
ADD tmpR1, W \
|
||||
SLD $8, tmpR1 \
|
||||
SLD $20, tmpR2 \
|
||||
SLD $21, tmpR3 \
|
||||
SLD $17, tmpR4 \
|
||||
ADD tmpR1, W \
|
||||
ADD tmpR2, W \
|
||||
ADD tmpR3, W \
|
||||
ADD tmpR4, W \
|
||||
MOVWZ (((15 + idx) % 16)*4)(addr), tmpR4 \
|
||||
SLD $15, tmpR4 \
|
||||
ADD tmpR4, W \
|
||||
\ // Reduce it to 31-bit value
|
||||
SRD $31, W, tmpR1 \
|
||||
AND $0x7FFFFFFF, W \
|
||||
ADD tmpR1, W \
|
||||
\
|
||||
SRD $31, W, tmpR1 \
|
||||
AND $0x7FFFFFFF, W \
|
||||
ADD tmpR1, W \
|
||||
\ // LFSR_S16 = (LFSR_S15++) = W
|
||||
MOVW W, (((0 + idx) % 16)*4)(addr)
|
||||
|
||||
#define RESTORE_LFSR_0(addr, tmpR1, tmpR2, tmpR3, tmpR4) \
|
||||
MOVWZ (addr), tmpR1 \
|
||||
MOVD $4, tmpR4 \
|
||||
LXVD2X (tmpR4)(addr), V0 \
|
||||
MOVD $20, tmpR4 \
|
||||
LXVD2X (tmpR4)(addr), V1 \
|
||||
MOVD $36, tmpR4 \
|
||||
LXVD2X (tmpR4)(addr), V2 \
|
||||
MOVD 52(addr), tmpR2 \
|
||||
MOVWZ 60(addr), tmpR3 \
|
||||
STXVD2X V0, (addr) \
|
||||
MOVD $16, tmpR4 \
|
||||
STXVD2X V1, (tmpR4)(addr) \
|
||||
MOVD $32, tmpR4 \
|
||||
STXVD2X V2, (tmpR4)(addr) \
|
||||
MOVD tmpR2, 48(addr) \
|
||||
MOVW tmpR3, 56(addr) \
|
||||
MOVW tmpR1, 60(addr)
|
||||
|
||||
#define RESTORE_LFSR_2(addr, tmpR1, tmpR2, tmpR3) \
|
||||
MOVD (addr), tmpR1 \
|
||||
MOVD 8(addr), tmpR2 \
|
||||
LXVD2X (tmpR2)(addr), V0 \
|
||||
MOVD 24(addr), tmpR2 \
|
||||
LXVD2X (tmpR2)(addr), V1 \
|
||||
MOVD 40(addr), tmpR2 \
|
||||
LXVD2X (tmpR2)(addr), V2 \
|
||||
MOVD 56(addr), tmpR3 \
|
||||
\
|
||||
STXVD2X V0, (addr) \
|
||||
MOVD 16(addr), tmpR2 \
|
||||
STXVD2X V1, (tmpR2)(addr) \
|
||||
MOVD 32(addr), tmpR2 \
|
||||
STXVD2X V2, (tmpR2)(addr) \
|
||||
MOVW tmpR3, 48(addr) \
|
||||
MOVW tmpR1, 56(addr)
|
||||
|
||||
|
||||
// func genKeywordAsm(s *zucState32) uint32
|
||||
TEXT ·genKeywordAsm(SB),NOSPLIT,$0
|
||||
LOAD_CONSTS
|
||||
|
||||
MOVQ pState+0(FP), R4
|
||||
LOAD_STATE(R4)
|
||||
NONLIN_FUN(R14, R15, R16, R17)
|
||||
// (BRC_X3 xor W) as result
|
||||
XOR BRC_X3, R14
|
||||
MOVW R14, ret+8(FP)
|
||||
|
||||
// LFSRWithWorkMode
|
||||
XOR R14, R14
|
||||
LFSR_UPDT(0, R4, R14, R15, R16, R17, R18)
|
||||
SAVE_STATE(R4)
|
||||
RESTORE_LFSR_0(R4, R15, R16, R17, R18)
|
||||
|
||||
RET
|
||||
|
||||
// func genKeyStreamAsm(keyStream []uint32, pState *zucState32)
|
||||
TEXT ·genKeyStreamAsm(SB),NOSPLIT,$0
|
||||
RET
|
||||
|
||||
// func genKeyStreamRev32Asm(keyStream []byte, pState *zucState32)
|
||||
TEXT ·genKeyStreamRev32Asm(SB),NOSPLIT,$0
|
||||
RET
|
21
zuc/core_ppc64x_test.go
Normal file
21
zuc/core_ppc64x_test.go
Normal file
@ -0,0 +1,21 @@
|
||||
// Copyright 2024 Sun Yimin. All rights reserved.
|
||||
// Use of this source code is governed by a MIT-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build (ppc64 || ppc64le) && !purego
|
||||
|
||||
package zuc
|
||||
|
||||
import "testing"
|
||||
|
||||
func Test_genKeywordAsm_case1(t *testing.T) {
|
||||
s, _ := newZUCState([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
|
||||
z1 := genKeywordAsm(s)
|
||||
if z1 != 0x27bede74 {
|
||||
t.Errorf("expected=%x, result=%x\n", 0x27bede74, z1)
|
||||
}
|
||||
z2 := genKeywordAsm(s)
|
||||
if z2 != 0x018082da {
|
||||
t.Errorf("expected=%x, result=%x\n", 0x018082da, z2)
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user