internal/subtle: apply VLM/VSTM in non-loop

This commit is contained in:
Sun Yimin 2024-08-28 13:18:42 +08:00 committed by GitHub
parent 40bd384f21
commit 45584ccaa6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -13,13 +13,22 @@ TEXT ·xorBytes(SB),NOSPLIT,$0-32
CMPBLT R4, $64, tail CMPBLT R4, $64, tail
loop_64: loop_64:
VLM 0(R2)(R5*1), V0, V3 VL 0(R2)(R5*1), V0
VLM 0(R3)(R5*1), V4, V7 VL 16(R2)(R5*1), V1
VL 32(R2)(R5*1), V2
VL 48(R2)(R5*1), V3
VL 0(R3)(R5*1), V4
VL 16(R3)(R5*1), V5
VL 32(R3)(R5*1), V6
VL 48(R3)(R5*1), V7
VX V0, V4, V4 VX V0, V4, V4
VX V1, V5, V5 VX V1, V5, V5
VX V2, V6, V6 VX V2, V6, V6
VX V3, V7, V7 VX V3, V7, V7
VSTM V4, V7, 0(R1)(R5*1) VST V4, 0(R1)(R5*1)
VST V5, 16(R1)(R5*1)
VST V6, 32(R1)(R5*1)
VST V7, 48(R1)(R5*1)
LAY 64(R5), R5 LAY 64(R5), R5
SUB $64, R4 SUB $64, R4
CMPBGE R4, $64, loop_64 CMPBGE R4, $64, loop_64
@ -27,14 +36,14 @@ loop_64:
tail: tail:
CMPBEQ R4, $0, done CMPBEQ R4, $0, done
CMPBLT R4, $32, less_than32 CMPBLT R4, $32, less_than32
VL 0(R2)(R5*1), V0 VLM 0(R2)(R5*1), V0, V1
VL 16(R2)(R5*1), V1 //VL 16(R2)(R5*1), V1
VL 0(R3)(R5*1), V2 VLM 0(R3)(R5*1), V2, V3
VL 16(R3)(R5*1), V3 //VL 16(R3)(R5*1), V3
VX V0, V2, V2 VX V0, V2, V2
VX V1, V3, V3 VX V1, V3, V3
VST V2, 0(R1)(R5*1) VSTM V2, V3, 0(R1)(R5*1)
VST V3, 16(R1)(R5*1) //VST V3, 16(R1)(R5*1)
LAY 32(R5), R5 LAY 32(R5), R5
SUB $32, R4 SUB $32, R4