mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 04:06:18 +08:00
75 lines
1.6 KiB
ArmAsm
75 lines
1.6 KiB
ArmAsm
// Copyright 2013 The Go Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
//go:build !purego
|
|
|
|
#include "textflag.h"
|
|
|
|
// func addMulVVW256(z, x *uint, y uint) (c uint)
|
|
TEXT ·addMulVVW256(SB), $0-32
|
|
MOVD $4, R0
|
|
JMP addMulVVWy(SB)
|
|
|
|
// func addMulVVW1024(z, x *uint, y uint) (c uint)
|
|
TEXT ·addMulVVW1024(SB), $0-32
|
|
MOVD $16, R0
|
|
JMP addMulVVWy(SB)
|
|
|
|
// func addMulVVW1536(z, x *uint, y uint) (c uint)
|
|
TEXT ·addMulVVW1536(SB), $0-32
|
|
MOVD $24, R0
|
|
JMP addMulVVWy(SB)
|
|
|
|
// func addMulVVW2048(z, x *uint, y uint) (c uint)
|
|
TEXT ·addMulVVW2048(SB), $0-32
|
|
MOVD $32, R0
|
|
JMP addMulVVWy(SB)
|
|
|
|
TEXT addMulVVWy(SB), NOFRAME|NOSPLIT, $0
|
|
MOVD z+0(FP), R1
|
|
MOVD x+8(FP), R2
|
|
MOVD y+16(FP), R3
|
|
MOVD $0, R4
|
|
|
|
// The main loop of this code operates on a block of 4 words every iteration
|
|
// performing [R4:R12:R11:R10:R9] = R4 + R3 * [R8:R7:R6:R5] + [R12:R11:R10:R9]
|
|
// where R4 is carried from the previous iteration, R8:R7:R6:R5 hold the next
|
|
// 4 words of x, R3 is y and R12:R11:R10:R9 are part of the result z.
|
|
loop:
|
|
CBZ R0, done
|
|
|
|
LDP.P 16(R2), (R5, R6)
|
|
LDP.P 16(R2), (R7, R8)
|
|
|
|
LDP (R1), (R9, R10)
|
|
ADDS R4, R9
|
|
MUL R6, R3, R14
|
|
ADCS R14, R10
|
|
MUL R7, R3, R15
|
|
LDP 16(R1), (R11, R12)
|
|
ADCS R15, R11
|
|
MUL R8, R3, R16
|
|
ADCS R16, R12
|
|
UMULH R8, R3, R20
|
|
ADC $0, R20
|
|
|
|
MUL R5, R3, R13
|
|
ADDS R13, R9
|
|
UMULH R5, R3, R17
|
|
ADCS R17, R10
|
|
UMULH R6, R3, R21
|
|
STP.P (R9, R10), 16(R1)
|
|
ADCS R21, R11
|
|
UMULH R7, R3, R19
|
|
ADCS R19, R12
|
|
STP.P (R11, R12), 16(R1)
|
|
ADC $0, R20, R4
|
|
|
|
SUB $4, R0
|
|
B loop
|
|
|
|
done:
|
|
MOVD R4, c+24(FP)
|
|
RET
|