zuc: ppc64x core/eea #255

This commit is contained in:
Sun Yimin 2024-10-03 15:45:17 +08:00 committed by GitHub
parent d9e452d386
commit 9dc7633a48
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 224 additions and 58 deletions

View File

@ -249,22 +249,49 @@ GLOBL rcon<>(SB), RODATA, $160
#define RESTORE_LFSR_2(addr, tmpR1, tmpR2, tmpR3) \ #define RESTORE_LFSR_2(addr, tmpR1, tmpR2, tmpR3) \
MOVD (addr), tmpR1 \ MOVD (addr), tmpR1 \
MOVD 8(addr), tmpR2 \ MOVD $8, tmpR2 \
LXVD2X (tmpR2)(addr), V0 \ LXVD2X (tmpR2)(addr), V0 \
MOVD 24(addr), tmpR2 \ MOVD $24, tmpR2 \
LXVD2X (tmpR2)(addr), V1 \ LXVD2X (tmpR2)(addr), V1 \
MOVD 40(addr), tmpR2 \ MOVD $40, tmpR2 \
LXVD2X (tmpR2)(addr), V2 \ LXVD2X (tmpR2)(addr), V2 \
MOVD 56(addr), tmpR3 \ MOVD 56(addr), tmpR3 \
\ \
STXVD2X V0, (addr) \ STXVD2X V0, (addr) \
MOVD 16(addr), tmpR2 \ MOVD $16, tmpR2 \
STXVD2X V1, (tmpR2)(addr) \ STXVD2X V1, (tmpR2)(addr) \
MOVD 32(addr), tmpR2 \ MOVD $32, tmpR2 \
STXVD2X V2, (tmpR2)(addr) \ STXVD2X V2, (tmpR2)(addr) \
MOVW tmpR3, 48(addr) \ MOVD tmpR3, 48(addr) \
MOVW tmpR1, 56(addr) MOVD tmpR1, 56(addr)
#define RESTORE_LFSR_4(addr, tmpR1, tmpR2, tmpR3) \
LXVD2X (addr), V0 \
MOVD $16, tmpR1 \
LXVD2X (tmpR1)(addr), V1 \
MOVD $32, tmpR2 \
LXVD2X (tmpR2)(addr), V2 \
MOVD $48, tmpR3 \
LXVD2X (tmpR3)(addr), V3 \
\
STXVD2X V1, (addr) \
STXVD2X V2, (tmpR1)(addr) \
STXVD2X V3, (tmpR2)(addr) \
STXVD2X V0, (tmpR3)(addr)
#define RESTORE_LFSR_8(addr, tmpR1, tmpR2, tmpR3) \
LXVD2X (addr), V0 \
MOVD $16, tmpR1 \
LXVD2X (tmpR1)(addr), V1 \
MOVD $32, tmpR2 \
LXVD2X (tmpR2)(addr), V2 \
MOVD $48, tmpR3 \
LXVD2X (tmpR3)(addr), V3 \
\
STXVD2X V2, (addr) \
STXVD2X V3, (tmpR1)(addr) \
STXVD2X V0, (tmpR2)(addr) \
STXVD2X V1, (tmpR3)(addr)
// func genKeywordAsm(s *zucState32) uint32 // func genKeywordAsm(s *zucState32) uint32
TEXT ·genKeywordAsm(SB),NOSPLIT,$0 TEXT ·genKeywordAsm(SB),NOSPLIT,$0
@ -286,10 +313,190 @@ TEXT ·genKeywordAsm(SB),NOSPLIT,$0
RET RET
#define ONEROUND(idx, addr, dst, W, tmpR1, tmpR2, tmpR3, tmpR4) \
BITS_REORG(idx, addr, W, tmpR1, tmpR2, tmpR3) \
NONLIN_FUN(W, tmpR1, tmpR2, tmpR3) \
XOR BRC_X3, W \
MOVW W, (idx*4)(dst) \
XOR W, W \
LFSR_UPDT(idx, addr, W, tmpR1, tmpR2, tmpR3, tmpR4)
#define ONEROUND_REV32(idx, addr, dst, W, tmpR1, tmpR2, tmpR3, tmpR4) \
BITS_REORG(idx, addr, W, tmpR1, tmpR2, tmpR3) \
NONLIN_FUN(W, tmpR1, tmpR2, tmpR3) \
XOR BRC_X3, W \
MOVWBR W, (idx*4)(dst) \
XOR W, W \
LFSR_UPDT(idx, addr, W, tmpR1, tmpR2, tmpR3, tmpR4)
// func genKeyStreamAsm(keyStream []uint32, pState *zucState32) // func genKeyStreamAsm(keyStream []uint32, pState *zucState32)
TEXT ·genKeyStreamAsm(SB),NOSPLIT,$0 TEXT ·genKeyStreamAsm(SB),NOSPLIT,$0
LOAD_CONSTS
MOVD pState+0(FP), R4
MOVD ks+0(FP), R3
MOVD ks_len+8(FP), R5
LOAD_STATE(R4)
CMP R5, $16
BLT zucOctet
preloop16:
SRD $4, R5, R6 // Set up loop counter
MOVD R6, CTR
ANDCC $15, R5, R6 // Check for tailing bytes for later
PCALIGN $16
zucSixteens:
ONEROUND(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(3, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(4, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(5, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(6, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(7, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(8, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(9, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(10, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(11, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(12, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(13, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(14, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(15, R4, R3, R14, R15, R16, R17, R18)
ADD $64, R3
BDNZ zucSixteens
BC 12,2,LR // BEQLR, fast return
MOVD R6, R5
zucOctet:
CMP R5, $8
BLT zucNibble
ONEROUND(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(3, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(4, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(5, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(6, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(7, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_8(R4, R14, R15, R16)
ADD $32, R3
ADD $-8, R5
zucNibble:
CMP R5, $4
BLT zucDouble
ONEROUND(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(3, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_4(R4, R14, R15, R16)
ADD $16, R3
ADD $-4, R5
zucDouble:
CMP R5, $2
BLT zucSingle
ONEROUND(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND(1, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_2(R4, R14, R15, R16)
ADD $8, R3
ADD $-2, R5
zucSingle:
CMP R5, $1
BLT zucRet
ONEROUND(0, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_0(R4, R14, R15, R16, R17)
zucRet:
SAVE_STATE(R4)
RET RET
// func genKeyStreamRev32Asm(keyStream []byte, pState *zucState32) // func genKeyStreamRev32Asm(keyStream []byte, pState *zucState32)
TEXT ·genKeyStreamRev32Asm(SB),NOSPLIT,$0 TEXT ·genKeyStreamRev32Asm(SB),NOSPLIT,$0
LOAD_CONSTS
MOVD pState+0(FP), R4
MOVD ks+0(FP), R3
MOVD ks_len+8(FP), R5
LOAD_STATE(R4)
CMP R5, $16
BLT zucOctet
preloop16:
SRD $4, R5, R6 // Set up loop counter
MOVD R6, CTR
ANDCC $15, R5, R6 // Check for tailing bytes for later
PCALIGN $16
zucSixteens:
ONEROUND_REV32(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(3, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(4, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(5, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(6, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(7, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(8, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(9, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(10, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(11, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(12, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(13, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(14, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(15, R4, R3, R14, R15, R16, R17, R18)
ADD $64, R3
BDNZ zucSixteens
BC 12,2,LR // BEQLR, fast return
MOVD R6, R5
zucOctet:
CMP R5, $8
BLT zucNibble
ONEROUND_REV32(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(3, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(4, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(5, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(6, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(7, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_8(R4, R14, R15, R16)
ADD $32, R3
ADD $-8, R5
zucNibble:
CMP R5, $4
BLT zucDouble
ONEROUND_REV32(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(1, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(2, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(3, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_4(R4, R14, R15, R16)
ADD $16, R3
ADD $-4, R5
zucDouble:
CMP R5, $2
BLT zucSingle
ONEROUND_REV32(0, R4, R3, R14, R15, R16, R17, R18)
ONEROUND_REV32(1, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_2(R4, R14, R15, R16)
ADD $8, R3
ADD $-2, R5
zucSingle:
CMP R5, $1
BLT zucRet
ONEROUND_REV32(0, R4, R3, R14, R15, R16, R17, R18)
RESTORE_LFSR_0(R4, R14, R15, R16, R17)
zucRet:
SAVE_STATE(R4)
RET RET

View File

@ -1,4 +1,4 @@
//go:build (amd64 || arm64) && !purego //go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego
package zuc package zuc

View File

@ -1,4 +1,4 @@
//go:build purego || !(amd64 || arm64) //go:build purego || !(amd64 || arm64 || ppc64 || ppc64le)
package zuc package zuc

View File

@ -1,20 +0,0 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
package zuc
// Generate single keyword, 4 bytes.
//
//go:noescape
func genKeywordAsm(s *zucState32) uint32
// Generate multiple keywords, n*4 bytes.
//
//go:noescape
func genKeyStreamAsm(keyStream []uint32, pState *zucState32)
//go:noescape
func genKeyStreamRev32Asm(keyStream []byte, pState *zucState32)

View File

@ -1,27 +0,0 @@
// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build (ppc64 || ppc64le) && !purego
package zuc
import "testing"
func Test_genKeywordAsm_case1(t *testing.T) {
s, _ := newZUCState([]byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0})
z1 := genKeywordAsm(s)
if z1 != 0x27bede74 {
t.Errorf("expected=%x, result=%x\n", 0x27bede74, z1)
}
if s.r1 != 0xc7ee7f13 {
t.Errorf("expected=%x, result=%x\n", 0xc7ee7f13, s.r1)
}
if s.r2 != 0xc0fa817 {
t.Errorf("expected=%x, result=%x\n", 0xc0fa817, s.r2)
}
z2 := genKeywordAsm(s)
if z2 != 0x018082da {
t.Errorf("expected=%x, result=%x\n", 0x018082da, z2)
}
}

View File

@ -11,6 +11,12 @@ func Test_genKeyword_case1(t *testing.T) {
if z1 != 0x27bede74 { if z1 != 0x27bede74 {
t.Errorf("expected=%x, result=%x\n", 0x27bede74, z1) t.Errorf("expected=%x, result=%x\n", 0x27bede74, z1)
} }
if s.r1 != 0xc7ee7f13 {
t.Errorf("expected=%x, result=%x\n", 0xc7ee7f13, s.r1)
}
if s.r2 != 0xc0fa817 {
t.Errorf("expected=%x, result=%x\n", 0xc0fa817, s.r2)
}
z2 := s.genKeyword() z2 := s.genKeyword()
if z2 != 0x018082da { if z2 != 0x018082da {
t.Errorf("expected=%x, result=%x\n", 0x018082da, z2) t.Errorf("expected=%x, result=%x\n", 0x018082da, z2)

View File

@ -1,4 +1,4 @@
//go:build (amd64 || arm64) && !purego //go:build (amd64 || arm64 || ppc64 || ppc64le) && !purego
package zuc package zuc

View File

@ -1,4 +1,4 @@
//go:build purego || !(amd64 || arm64) //go:build purego || !(amd64 || arm64 || ppc64 || ppc64le)
package zuc package zuc