internal/sm2ec: use neon inst. for p256MovCond

This commit is contained in:
Sun Yimin 2023-11-07 15:05:49 +08:00 committed by GitHub
parent 39751be045
commit 06a310dd4d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 35 deletions

View File

@ -14,7 +14,7 @@ jobs:
test: test:
strategy: strategy:
matrix: matrix:
go-version: [1.21.x] go-version: [1.16.x]
arch: [arm64] arch: [arm64]
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:

View File

@ -99,41 +99,23 @@ TEXT ·p256MovCond(SB),NOSPLIT,$0
MOVD b+16(FP), b_ptr MOVD b+16(FP), b_ptr
MOVD cond+24(FP), R3 MOVD cond+24(FP), R3
CMP $0, R3 VEOR V0.B16, V0.B16, V0.B16
// Two remarks: VMOV R3, V1.S4
// 1) Will want to revisit NEON, when support is better VCMEQ V0.S4, V1.S4, V2.S4
// 2) CSEL might not be constant time on all ARM processors
LDP 0*16(a_ptr), (R4, R5)
LDP 1*16(a_ptr), (R6, R7)
LDP 2*16(a_ptr), (R8, R9)
LDP 0*16(b_ptr), (R16, R17)
LDP 1*16(b_ptr), (R19, R20)
LDP 2*16(b_ptr), (R21, R22)
CSEL EQ, R16, R4, R4
CSEL EQ, R17, R5, R5
CSEL EQ, R19, R6, R6
CSEL EQ, R20, R7, R7
CSEL EQ, R21, R8, R8
CSEL EQ, R22, R9, R9
STP (R4, R5), 0*16(res_ptr)
STP (R6, R7), 1*16(res_ptr)
STP (R8, R9), 2*16(res_ptr)
LDP 3*16(a_ptr), (R4, R5) VLD1.P (48)(a_ptr), [V3.B16, V4.B16, V5.B16]
LDP 4*16(a_ptr), (R6, R7) VLD1.P (48)(b_ptr), [V6.B16, V7.B16, V8.B16]
LDP 5*16(a_ptr), (R8, R9) VBIT V2.B16, V6.B16, V3.B16
LDP 3*16(b_ptr), (R16, R17) VBIT V2.B16, V7.B16, V4.B16
LDP 4*16(b_ptr), (R19, R20) VBIT V2.B16, V8.B16, V5.B16
LDP 5*16(b_ptr), (R21, R22) VST1.P [V3.B16, V4.B16, V5.B16], (48)(res_ptr)
CSEL EQ, R16, R4, R4
CSEL EQ, R17, R5, R5 VLD1 (a_ptr), [V3.B16, V4.B16, V5.B16]
CSEL EQ, R19, R6, R6 VLD1 (b_ptr), [V6.B16, V7.B16, V8.B16]
CSEL EQ, R20, R7, R7 VBIT V2.B16, V6.B16, V3.B16
CSEL EQ, R21, R8, R8 VBIT V2.B16, V7.B16, V4.B16
CSEL EQ, R22, R9, R9 VBIT V2.B16, V8.B16, V5.B16
STP (R4, R5), 3*16(res_ptr) VST1 [V3.B16, V4.B16, V5.B16], (res_ptr)
STP (R6, R7), 4*16(res_ptr)
STP (R8, R9), 5*16(res_ptr)
RET RET
/* ---------------------------------------*/ /* ---------------------------------------*/