internal/nat: port loong64 & wasm

This commit is contained in:
Sun Yimin 2025-10-07 15:43:14 +08:00 committed by GitHub
parent 878e860bec
commit b264044294
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 164 additions and 1 deletions

View File

@ -0,0 +1,98 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// derived from crypto/internal/fips140/bigmod/nat_riscv64.s
//go:build !purego
#include "textflag.h"
// func addMulVVW256(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW256(SB),$0-32
MOVV $4, R8
JMP addMulVVWx(SB)
// func addMulVVW1024(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1024(SB),$0-32
MOVV $16, R8
JMP addMulVVWx(SB)
// func addMulVVW1536(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW1536(SB),$0-32
MOVV $24, R8
JMP addMulVVWx(SB)
// func addMulVVW2048(z, x *uint, y uint) (c uint)
TEXT ·addMulVVW2048(SB),$0-32
MOVV $32, R8
JMP addMulVVWx(SB)
TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
MOVV z+0(FP), R4
MOVV x+8(FP), R6
MOVV y+16(FP), R5
MOVV $0, R7
BEQ R8, R0, done
loop:
MOVV 0*8(R4), R9 // z[0]
MOVV 1*8(R4), R10 // z[1]
MOVV 2*8(R4), R11 // z[2]
MOVV 3*8(R4), R12 // z[3]
MOVV 0*8(R6), R13 // x[0]
MOVV 1*8(R6), R14 // x[1]
MOVV 2*8(R6), R15 // x[2]
MOVV 3*8(R6), R16 // x[3]
MULHVU R13, R5, R17 // z_hi[0] = x[0] * y
MULV R13, R5, R13 // z_lo[0] = x[0] * y
ADDV R13, R9, R18 // z_lo[0] = x[0] * y + z[0]
SGTU R13, R18, R19
ADDV R17, R19, R17 // z_hi[0] = x[0] * y + z[0]
ADDV R18, R7, R9 // z_lo[0] = x[0] * y + z[0] + c
SGTU R18, R9, R19
ADDV R17, R19, R7 // next c
MULHVU R14, R5, R24 // z_hi[1] = x[1] * y
MULV R14, R5, R14 // z_lo[1] = x[1] * y
ADDV R14, R10, R18 // z_lo[1] = x[1] * y + z[1]
SGTU R14, R18, R19
ADDV R24, R19, R24 // z_hi[1] = x[1] * y + z[1]
ADDV R18, R7, R10 // z_lo[1] = x[1] * y + z[1] + c
SGTU R18, R10, R19
ADDV R24, R19, R7 // next c
MULHVU R15, R5, R25 // z_hi[2] = x[2] * y
MULV R15, R5, R15 // z_lo[2] = x[2] * y
ADDV R15, R11, R18 // z_lo[2] = x[2] * y + z[2]
SGTU R15, R18, R19
ADDV R25, R19, R25 // z_hi[2] = x[2] * y + z[2]
ADDV R18, R7, R11 // z_lo[2] = x[2] * y + z[2] + c
SGTU R18, R11, R19
ADDV R25, R19, R7 // next c
MULHVU R16, R5, R26 // z_hi[3] = x[3] * y
MULV R16, R5, R16 // z_lo[3] = x[3] * y
ADDV R16, R12, R18 // z_lo[3] = x[3] * y + z[3]
SGTU R16, R18, R19
ADDV R26, R19, R26 // z_hi[3] = x[3] * y + z[3]
ADDV R18, R7, R12 // z_lo[3] = x[3] * y + z[3] + c
SGTU R18, R12, R19
ADDV R26, R19, R7 // next c
MOVV R9, 0*8(R4) // z[0]
MOVV R10, 1*8(R4) // z[1]
MOVV R11, 2*8(R4) // z[2]
MOVV R12, 3*8(R4) // z[3]
ADDV $32, R4
ADDV $32, R6
SUBV $4, R8
BNE R8, R0, loop
done:
MOVV R7, c+24(FP)
RET

View File

@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build purego || !(386 || amd64 || arm || arm64 || ppc64 || ppc64le || riscv64 || s390x)
//go:build purego || !(386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le || riscv64 || s390x || wasm)
package bigmod

View File

@ -0,0 +1,65 @@
// Copyright 2024 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
//go:build !purego
package bigmod
import "unsafe"
// The generic implementation relies on 64x64->128 bit multiplication and
// 64-bit add-with-carry, which are compiler intrinsics on many architectures.
// Wasm doesn't support those. Here we implement it with 32x32->64 bit
// operations, which is more efficient on Wasm.
func idx(x *uint, i uintptr) *uint {
return (*uint)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + i*8))
}
func addMulVVWWasm(z, x *uint, y uint, n uintptr) (carry uint) {
const mask32 = 1<<32 - 1
y0 := y & mask32
y1 := y >> 32
for i := range n {
xi := *idx(x, i)
x0 := xi & mask32
x1 := xi >> 32
zi := *idx(z, i)
z0 := zi & mask32
z1 := zi >> 32
c0 := carry & mask32
c1 := carry >> 32
w00 := x0*y0 + z0 + c0
l00 := w00 & mask32
h00 := w00 >> 32
w01 := x0*y1 + z1 + h00
l01 := w01 & mask32
h01 := w01 >> 32
w10 := x1*y0 + c1 + l01
h10 := w10 >> 32
carry = x1*y1 + h10 + h01
*idx(z, i) = w10<<32 + l00
}
return carry
}
func addMulVVW256(z, x *uint, y uint) (c uint) {
return addMulVVWWasm(z, x, y, 256/_W)
}
func addMulVVW1024(z, x *uint, y uint) (c uint) {
return addMulVVWWasm(z, x, y, 1024/_W)
}
func addMulVVW1536(z, x *uint, y uint) (c uint) {
return addMulVVWWasm(z, x, y, 1536/_W)
}
func addMulVVW2048(z, x *uint, y uint) (c uint) {
return addMulVVWWasm(z, x, y, 2048/_W)
}