mirror of
https://github.com/emmansun/gmsm.git
synced 2025-10-13 23:00:47 +08:00
internal/nat: port loong64 & wasm
This commit is contained in:
parent
878e860bec
commit
b264044294
98
internal/bigmod/nat_loong64.s
Normal file
98
internal/bigmod/nat_loong64.s
Normal file
@ -0,0 +1,98 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// derived from crypto/internal/fips140/bigmod/nat_riscv64.s
|
||||
|
||||
//go:build !purego
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
// func addMulVVW256(z, x *uint, y uint) (c uint)
|
||||
TEXT ·addMulVVW256(SB),$0-32
|
||||
MOVV $4, R8
|
||||
JMP addMulVVWx(SB)
|
||||
|
||||
// func addMulVVW1024(z, x *uint, y uint) (c uint)
|
||||
TEXT ·addMulVVW1024(SB),$0-32
|
||||
MOVV $16, R8
|
||||
JMP addMulVVWx(SB)
|
||||
|
||||
// func addMulVVW1536(z, x *uint, y uint) (c uint)
|
||||
TEXT ·addMulVVW1536(SB),$0-32
|
||||
MOVV $24, R8
|
||||
JMP addMulVVWx(SB)
|
||||
|
||||
// func addMulVVW2048(z, x *uint, y uint) (c uint)
|
||||
TEXT ·addMulVVW2048(SB),$0-32
|
||||
MOVV $32, R8
|
||||
JMP addMulVVWx(SB)
|
||||
|
||||
TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
|
||||
MOVV z+0(FP), R4
|
||||
MOVV x+8(FP), R6
|
||||
MOVV y+16(FP), R5
|
||||
MOVV $0, R7
|
||||
|
||||
BEQ R8, R0, done
|
||||
loop:
|
||||
MOVV 0*8(R4), R9 // z[0]
|
||||
MOVV 1*8(R4), R10 // z[1]
|
||||
MOVV 2*8(R4), R11 // z[2]
|
||||
MOVV 3*8(R4), R12 // z[3]
|
||||
|
||||
MOVV 0*8(R6), R13 // x[0]
|
||||
MOVV 1*8(R6), R14 // x[1]
|
||||
MOVV 2*8(R6), R15 // x[2]
|
||||
MOVV 3*8(R6), R16 // x[3]
|
||||
|
||||
MULHVU R13, R5, R17 // z_hi[0] = x[0] * y
|
||||
MULV R13, R5, R13 // z_lo[0] = x[0] * y
|
||||
ADDV R13, R9, R18 // z_lo[0] = x[0] * y + z[0]
|
||||
SGTU R13, R18, R19
|
||||
ADDV R17, R19, R17 // z_hi[0] = x[0] * y + z[0]
|
||||
ADDV R18, R7, R9 // z_lo[0] = x[0] * y + z[0] + c
|
||||
SGTU R18, R9, R19
|
||||
ADDV R17, R19, R7 // next c
|
||||
|
||||
MULHVU R14, R5, R24 // z_hi[1] = x[1] * y
|
||||
MULV R14, R5, R14 // z_lo[1] = x[1] * y
|
||||
ADDV R14, R10, R18 // z_lo[1] = x[1] * y + z[1]
|
||||
SGTU R14, R18, R19
|
||||
ADDV R24, R19, R24 // z_hi[1] = x[1] * y + z[1]
|
||||
ADDV R18, R7, R10 // z_lo[1] = x[1] * y + z[1] + c
|
||||
SGTU R18, R10, R19
|
||||
ADDV R24, R19, R7 // next c
|
||||
|
||||
MULHVU R15, R5, R25 // z_hi[2] = x[2] * y
|
||||
MULV R15, R5, R15 // z_lo[2] = x[2] * y
|
||||
ADDV R15, R11, R18 // z_lo[2] = x[2] * y + z[2]
|
||||
SGTU R15, R18, R19
|
||||
ADDV R25, R19, R25 // z_hi[2] = x[2] * y + z[2]
|
||||
ADDV R18, R7, R11 // z_lo[2] = x[2] * y + z[2] + c
|
||||
SGTU R18, R11, R19
|
||||
ADDV R25, R19, R7 // next c
|
||||
|
||||
MULHVU R16, R5, R26 // z_hi[3] = x[3] * y
|
||||
MULV R16, R5, R16 // z_lo[3] = x[3] * y
|
||||
ADDV R16, R12, R18 // z_lo[3] = x[3] * y + z[3]
|
||||
SGTU R16, R18, R19
|
||||
ADDV R26, R19, R26 // z_hi[3] = x[3] * y + z[3]
|
||||
ADDV R18, R7, R12 // z_lo[3] = x[3] * y + z[3] + c
|
||||
SGTU R18, R12, R19
|
||||
ADDV R26, R19, R7 // next c
|
||||
|
||||
MOVV R9, 0*8(R4) // z[0]
|
||||
MOVV R10, 1*8(R4) // z[1]
|
||||
MOVV R11, 2*8(R4) // z[2]
|
||||
MOVV R12, 3*8(R4) // z[3]
|
||||
|
||||
ADDV $32, R4
|
||||
ADDV $32, R6
|
||||
|
||||
SUBV $4, R8
|
||||
BNE R8, R0, loop
|
||||
|
||||
done:
|
||||
MOVV R7, c+24(FP)
|
||||
RET
|
@ -2,7 +2,7 @@
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build purego || !(386 || amd64 || arm || arm64 || ppc64 || ppc64le || riscv64 || s390x)
|
||||
//go:build purego || !(386 || amd64 || arm || arm64 || loong64 || ppc64 || ppc64le || riscv64 || s390x || wasm)
|
||||
|
||||
package bigmod
|
||||
|
||||
|
65
internal/bigmod/nat_wasm.go
Normal file
65
internal/bigmod/nat_wasm.go
Normal file
@ -0,0 +1,65 @@
|
||||
// Copyright 2024 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:build !purego
|
||||
|
||||
package bigmod
|
||||
|
||||
import "unsafe"
|
||||
|
||||
// The generic implementation relies on 64x64->128 bit multiplication and
|
||||
// 64-bit add-with-carry, which are compiler intrinsics on many architectures.
|
||||
// Wasm doesn't support those. Here we implement it with 32x32->64 bit
|
||||
// operations, which is more efficient on Wasm.
|
||||
|
||||
func idx(x *uint, i uintptr) *uint {
|
||||
return (*uint)(unsafe.Pointer(uintptr(unsafe.Pointer(x)) + i*8))
|
||||
}
|
||||
|
||||
func addMulVVWWasm(z, x *uint, y uint, n uintptr) (carry uint) {
|
||||
const mask32 = 1<<32 - 1
|
||||
y0 := y & mask32
|
||||
y1 := y >> 32
|
||||
for i := range n {
|
||||
xi := *idx(x, i)
|
||||
x0 := xi & mask32
|
||||
x1 := xi >> 32
|
||||
zi := *idx(z, i)
|
||||
z0 := zi & mask32
|
||||
z1 := zi >> 32
|
||||
c0 := carry & mask32
|
||||
c1 := carry >> 32
|
||||
|
||||
w00 := x0*y0 + z0 + c0
|
||||
l00 := w00 & mask32
|
||||
h00 := w00 >> 32
|
||||
|
||||
w01 := x0*y1 + z1 + h00
|
||||
l01 := w01 & mask32
|
||||
h01 := w01 >> 32
|
||||
|
||||
w10 := x1*y0 + c1 + l01
|
||||
h10 := w10 >> 32
|
||||
|
||||
carry = x1*y1 + h10 + h01
|
||||
*idx(z, i) = w10<<32 + l00
|
||||
}
|
||||
return carry
|
||||
}
|
||||
|
||||
func addMulVVW256(z, x *uint, y uint) (c uint) {
|
||||
return addMulVVWWasm(z, x, y, 256/_W)
|
||||
}
|
||||
|
||||
func addMulVVW1024(z, x *uint, y uint) (c uint) {
|
||||
return addMulVVWWasm(z, x, y, 1024/_W)
|
||||
}
|
||||
|
||||
func addMulVVW1536(z, x *uint, y uint) (c uint) {
|
||||
return addMulVVWWasm(z, x, y, 1536/_W)
|
||||
}
|
||||
|
||||
func addMulVVW2048(z, x *uint, y uint) (c uint) {
|
||||
return addMulVVWWasm(z, x, y, 2048/_W)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user