gmsm/internal/subtle/xor_riscv64.s
2025-02-17 08:32:20 +08:00

170 lines
2.7 KiB
ArmAsm

// Copyright 2024 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
// func xorBytes(dst, a, b *byte, n int)
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
MOV dst+0(FP), X10
MOV a+8(FP), X11
MOV b+16(FP), X12
MOV n+24(FP), X13
MOV $32, X15
BLT X13, X15, loop4_check
// Check alignment - if alignment differs we have to do one byte at a time.
AND $7, X10, X5
AND $7, X11, X6
AND $7, X12, X7
BNE X5, X6, loop4_check
BNE X5, X7, loop4_check
BEQZ X5, loop64_check
// Check one byte at a time until we reach 8 byte alignment.
MOV $8, X8
SUB X5, X8
SUB X8, X13
align:
MOVBU 0(X11), X16
MOVBU 0(X12), X17
XOR X16, X17
MOVB X17, 0(X10)
ADD $1, X10
ADD $1, X11
ADD $1, X12
SUB $1, X8
BNEZ X8, align
loop64_check:
MOV $64, X15
BLT X13, X15, tail32_check
PCALIGN $16
loop64:
MOV 0(X11), X16
MOV 0(X12), X17
MOV 8(X11), X18
MOV 8(X12), X19
XOR X16, X17
XOR X18, X19
MOV X17, 0(X10)
MOV X19, 8(X10)
MOV 16(X11), X20
MOV 16(X12), X21
MOV 24(X11), X22
MOV 24(X12), X23
XOR X20, X21
XOR X22, X23
MOV X21, 16(X10)
MOV X23, 24(X10)
MOV 32(X11), X16
MOV 32(X12), X17
MOV 40(X11), X18
MOV 40(X12), X19
XOR X16, X17
XOR X18, X19
MOV X17, 32(X10)
MOV X19, 40(X10)
MOV 48(X11), X20
MOV 48(X12), X21
MOV 56(X11), X22
MOV 56(X12), X23
XOR X20, X21
XOR X22, X23
MOV X21, 48(X10)
MOV X23, 56(X10)
ADD $64, X10
ADD $64, X11
ADD $64, X12
SUB $64, X13
BGE X13, X15, loop64
BEQZ X13, done
tail32_check:
MOV $32, X15
BLT X13, X15, tail16_check
MOV 0(X11), X16
MOV 0(X12), X17
MOV 8(X11), X18
MOV 8(X12), X19
XOR X16, X17
XOR X18, X19
MOV X17, 0(X10)
MOV X19, 8(X10)
MOV 16(X11), X20
MOV 16(X12), X21
MOV 24(X11), X22
MOV 24(X12), X23
XOR X20, X21
XOR X22, X23
MOV X21, 16(X10)
MOV X23, 24(X10)
ADD $32, X10
ADD $32, X11
ADD $32, X12
SUB $32, X13
BEQZ X13, done
tail16_check:
MOV $16, X15
BLT X13, X15, loop4_check
MOV 0(X11), X16
MOV 0(X12), X17
MOV 8(X11), X18
MOV 8(X12), X19
XOR X16, X17
XOR X18, X19
MOV X17, 0(X10)
MOV X19, 8(X10)
ADD $16, X10
ADD $16, X11
ADD $16, X12
SUB $16, X13
BEQZ X13, done
loop4_check:
MOV $4, X15
BLT X13, X15, loop1
PCALIGN $16
loop4:
MOVBU 0(X11), X16
MOVBU 0(X12), X17
MOVBU 1(X11), X18
MOVBU 1(X12), X19
XOR X16, X17
XOR X18, X19
MOVB X17, 0(X10)
MOVB X19, 1(X10)
MOVBU 2(X11), X20
MOVBU 2(X12), X21
MOVBU 3(X11), X22
MOVBU 3(X12), X23
XOR X20, X21
XOR X22, X23
MOVB X21, 2(X10)
MOVB X23, 3(X10)
ADD $4, X10
ADD $4, X11
ADD $4, X12
SUB $4, X13
BGE X13, X15, loop4
PCALIGN $16
loop1:
BEQZ X13, done
MOVBU 0(X11), X16
MOVBU 0(X12), X17
XOR X16, X17
MOVB X17, 0(X10)
ADD $1, X10
ADD $1, X11
ADD $1, X12
SUB $1, X13
JMP loop1
done:
RET