mirror of
https://github.com/emmansun/gmsm.git
synced 2025-04-26 20:26:19 +08:00
internal/sm2ec: use neon inst. for p256MovCond
This commit is contained in:
parent
39751be045
commit
06a310dd4d
2
.github/workflows/test_qemu.yml
vendored
2
.github/workflows/test_qemu.yml
vendored
@ -14,7 +14,7 @@ jobs:
|
|||||||
test:
|
test:
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
go-version: [1.21.x]
|
go-version: [1.16.x]
|
||||||
arch: [arm64]
|
arch: [arm64]
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
@ -99,41 +99,23 @@ TEXT ·p256MovCond(SB),NOSPLIT,$0
|
|||||||
MOVD b+16(FP), b_ptr
|
MOVD b+16(FP), b_ptr
|
||||||
MOVD cond+24(FP), R3
|
MOVD cond+24(FP), R3
|
||||||
|
|
||||||
CMP $0, R3
|
VEOR V0.B16, V0.B16, V0.B16
|
||||||
// Two remarks:
|
VMOV R3, V1.S4
|
||||||
// 1) Will want to revisit NEON, when support is better
|
VCMEQ V0.S4, V1.S4, V2.S4
|
||||||
// 2) CSEL might not be constant time on all ARM processors
|
|
||||||
LDP 0*16(a_ptr), (R4, R5)
|
|
||||||
LDP 1*16(a_ptr), (R6, R7)
|
|
||||||
LDP 2*16(a_ptr), (R8, R9)
|
|
||||||
LDP 0*16(b_ptr), (R16, R17)
|
|
||||||
LDP 1*16(b_ptr), (R19, R20)
|
|
||||||
LDP 2*16(b_ptr), (R21, R22)
|
|
||||||
CSEL EQ, R16, R4, R4
|
|
||||||
CSEL EQ, R17, R5, R5
|
|
||||||
CSEL EQ, R19, R6, R6
|
|
||||||
CSEL EQ, R20, R7, R7
|
|
||||||
CSEL EQ, R21, R8, R8
|
|
||||||
CSEL EQ, R22, R9, R9
|
|
||||||
STP (R4, R5), 0*16(res_ptr)
|
|
||||||
STP (R6, R7), 1*16(res_ptr)
|
|
||||||
STP (R8, R9), 2*16(res_ptr)
|
|
||||||
|
|
||||||
LDP 3*16(a_ptr), (R4, R5)
|
VLD1.P (48)(a_ptr), [V3.B16, V4.B16, V5.B16]
|
||||||
LDP 4*16(a_ptr), (R6, R7)
|
VLD1.P (48)(b_ptr), [V6.B16, V7.B16, V8.B16]
|
||||||
LDP 5*16(a_ptr), (R8, R9)
|
VBIT V2.B16, V6.B16, V3.B16
|
||||||
LDP 3*16(b_ptr), (R16, R17)
|
VBIT V2.B16, V7.B16, V4.B16
|
||||||
LDP 4*16(b_ptr), (R19, R20)
|
VBIT V2.B16, V8.B16, V5.B16
|
||||||
LDP 5*16(b_ptr), (R21, R22)
|
VST1.P [V3.B16, V4.B16, V5.B16], (48)(res_ptr)
|
||||||
CSEL EQ, R16, R4, R4
|
|
||||||
CSEL EQ, R17, R5, R5
|
VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16]
|
||||||
CSEL EQ, R19, R6, R6
|
VLD1 (b_ptr), [V6.B16, V7.B16, V8.B16]
|
||||||
CSEL EQ, R20, R7, R7
|
VBIT V2.B16, V6.B16, V3.B16
|
||||||
CSEL EQ, R21, R8, R8
|
VBIT V2.B16, V7.B16, V4.B16
|
||||||
CSEL EQ, R22, R9, R9
|
VBIT V2.B16, V8.B16, V5.B16
|
||||||
STP (R4, R5), 3*16(res_ptr)
|
VST1 [V3.B16, V4.B16, V5.B16], (res_ptr)
|
||||||
STP (R6, R7), 4*16(res_ptr)
|
|
||||||
STP (R8, R9), 5*16(res_ptr)
|
|
||||||
|
|
||||||
RET
|
RET
|
||||||
/* ---------------------------------------*/
|
/* ---------------------------------------*/
|
||||||
|
Loading…
x
Reference in New Issue
Block a user