2024-09-02 13:13:16 +08:00
|
|
|
// Copyright 2024 Sun Yimin. All rights reserved.
|
|
|
|
// Use of this source code is governed by a MIT-style
|
|
|
|
// license that can be found in the LICENSE file.
|
|
|
|
|
|
|
|
//go:build !purego
|
|
|
|
|
|
|
|
#include "textflag.h"
|
|
|
|
|
|
|
|
// func xorBytes(dst, a, b *byte, n int)
|
|
|
|
TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
|
2025-02-17 08:32:20 +08:00
|
|
|
MOV dst+0(FP), X10
|
|
|
|
MOV a+8(FP), X11
|
|
|
|
MOV b+16(FP), X12
|
|
|
|
MOV n+24(FP), X13
|
|
|
|
|
|
|
|
MOV $32, X15
|
|
|
|
BLT X13, X15, loop4_check
|
|
|
|
|
|
|
|
// Check alignment - if alignment differs we have to do one byte at a time.
|
|
|
|
AND $7, X10, X5
|
|
|
|
AND $7, X11, X6
|
|
|
|
AND $7, X12, X7
|
|
|
|
BNE X5, X6, loop4_check
|
|
|
|
BNE X5, X7, loop4_check
|
|
|
|
BEQZ X5, loop64_check
|
|
|
|
|
|
|
|
// Check one byte at a time until we reach 8 byte alignment.
|
|
|
|
MOV $8, X8
|
|
|
|
SUB X5, X8
|
|
|
|
SUB X8, X13
|
|
|
|
align:
|
|
|
|
MOVBU 0(X11), X16
|
|
|
|
MOVBU 0(X12), X17
|
|
|
|
XOR X16, X17
|
|
|
|
MOVB X17, 0(X10)
|
|
|
|
ADD $1, X10
|
|
|
|
ADD $1, X11
|
|
|
|
ADD $1, X12
|
|
|
|
SUB $1, X8
|
|
|
|
BNEZ X8, align
|
|
|
|
|
|
|
|
loop64_check:
|
|
|
|
MOV $64, X15
|
|
|
|
BLT X13, X15, tail32_check
|
|
|
|
PCALIGN $16
|
|
|
|
loop64:
|
|
|
|
MOV 0(X11), X16
|
|
|
|
MOV 0(X12), X17
|
|
|
|
MOV 8(X11), X18
|
|
|
|
MOV 8(X12), X19
|
|
|
|
XOR X16, X17
|
|
|
|
XOR X18, X19
|
|
|
|
MOV X17, 0(X10)
|
|
|
|
MOV X19, 8(X10)
|
|
|
|
MOV 16(X11), X20
|
|
|
|
MOV 16(X12), X21
|
|
|
|
MOV 24(X11), X22
|
|
|
|
MOV 24(X12), X23
|
|
|
|
XOR X20, X21
|
|
|
|
XOR X22, X23
|
|
|
|
MOV X21, 16(X10)
|
|
|
|
MOV X23, 24(X10)
|
|
|
|
MOV 32(X11), X16
|
|
|
|
MOV 32(X12), X17
|
|
|
|
MOV 40(X11), X18
|
|
|
|
MOV 40(X12), X19
|
|
|
|
XOR X16, X17
|
|
|
|
XOR X18, X19
|
|
|
|
MOV X17, 32(X10)
|
|
|
|
MOV X19, 40(X10)
|
|
|
|
MOV 48(X11), X20
|
|
|
|
MOV 48(X12), X21
|
|
|
|
MOV 56(X11), X22
|
|
|
|
MOV 56(X12), X23
|
|
|
|
XOR X20, X21
|
|
|
|
XOR X22, X23
|
|
|
|
MOV X21, 48(X10)
|
|
|
|
MOV X23, 56(X10)
|
|
|
|
ADD $64, X10
|
|
|
|
ADD $64, X11
|
|
|
|
ADD $64, X12
|
|
|
|
SUB $64, X13
|
|
|
|
BGE X13, X15, loop64
|
|
|
|
BEQZ X13, done
|
|
|
|
|
|
|
|
tail32_check:
|
|
|
|
MOV $32, X15
|
|
|
|
BLT X13, X15, tail16_check
|
|
|
|
MOV 0(X11), X16
|
|
|
|
MOV 0(X12), X17
|
|
|
|
MOV 8(X11), X18
|
|
|
|
MOV 8(X12), X19
|
|
|
|
XOR X16, X17
|
|
|
|
XOR X18, X19
|
|
|
|
MOV X17, 0(X10)
|
|
|
|
MOV X19, 8(X10)
|
|
|
|
MOV 16(X11), X20
|
|
|
|
MOV 16(X12), X21
|
|
|
|
MOV 24(X11), X22
|
|
|
|
MOV 24(X12), X23
|
|
|
|
XOR X20, X21
|
|
|
|
XOR X22, X23
|
|
|
|
MOV X21, 16(X10)
|
|
|
|
MOV X23, 24(X10)
|
|
|
|
ADD $32, X10
|
|
|
|
ADD $32, X11
|
|
|
|
ADD $32, X12
|
|
|
|
SUB $32, X13
|
|
|
|
BEQZ X13, done
|
|
|
|
|
|
|
|
tail16_check:
|
|
|
|
MOV $16, X15
|
|
|
|
BLT X13, X15, loop4_check
|
|
|
|
MOV 0(X11), X16
|
|
|
|
MOV 0(X12), X17
|
|
|
|
MOV 8(X11), X18
|
|
|
|
MOV 8(X12), X19
|
|
|
|
XOR X16, X17
|
|
|
|
XOR X18, X19
|
|
|
|
MOV X17, 0(X10)
|
|
|
|
MOV X19, 8(X10)
|
|
|
|
ADD $16, X10
|
|
|
|
ADD $16, X11
|
|
|
|
ADD $16, X12
|
|
|
|
SUB $16, X13
|
|
|
|
BEQZ X13, done
|
|
|
|
|
|
|
|
loop4_check:
|
|
|
|
MOV $4, X15
|
|
|
|
BLT X13, X15, loop1
|
|
|
|
PCALIGN $16
|
|
|
|
loop4:
|
|
|
|
MOVBU 0(X11), X16
|
|
|
|
MOVBU 0(X12), X17
|
|
|
|
MOVBU 1(X11), X18
|
|
|
|
MOVBU 1(X12), X19
|
|
|
|
XOR X16, X17
|
|
|
|
XOR X18, X19
|
|
|
|
MOVB X17, 0(X10)
|
|
|
|
MOVB X19, 1(X10)
|
|
|
|
MOVBU 2(X11), X20
|
|
|
|
MOVBU 2(X12), X21
|
|
|
|
MOVBU 3(X11), X22
|
|
|
|
MOVBU 3(X12), X23
|
|
|
|
XOR X20, X21
|
|
|
|
XOR X22, X23
|
|
|
|
MOVB X21, 2(X10)
|
|
|
|
MOVB X23, 3(X10)
|
|
|
|
ADD $4, X10
|
|
|
|
ADD $4, X11
|
|
|
|
ADD $4, X12
|
|
|
|
SUB $4, X13
|
|
|
|
BGE X13, X15, loop4
|
|
|
|
|
|
|
|
PCALIGN $16
|
|
|
|
loop1:
|
|
|
|
BEQZ X13, done
|
|
|
|
MOVBU 0(X11), X16
|
|
|
|
MOVBU 0(X12), X17
|
|
|
|
XOR X16, X17
|
|
|
|
MOVB X17, 0(X10)
|
|
|
|
ADD $1, X10
|
|
|
|
ADD $1, X11
|
|
|
|
ADD $1, X12
|
|
|
|
SUB $1, X13
|
|
|
|
JMP loop1
|
2024-09-02 13:13:16 +08:00
|
|
|
|
|
|
|
done:
|
|
|
|
RET
|