gmsm/internal/sm2ec/p256_asm_loong64.s

240 lines
5.0 KiB
ArmAsm

// Copyright 2025 Sun Yimin. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !purego
#include "textflag.h"
#define res_ptr R29
#define x_ptr R30
#define y_ptr R31
#define acc0 R8
#define acc1 R9
#define acc2 R10
#define acc3 R11
#define acc4 R12
#define acc5 R13
#define t0 R14
#define t1 R15
#define t2 R16
#define t3 R17
#define t4 R18
DATA p256p<>+0x00(SB)/8, $0xffffffffffffffff
DATA p256p<>+0x08(SB)/8, $0xffffffff00000000
DATA p256p<>+0x10(SB)/8, $0xffffffffffffffff
DATA p256p<>+0x18(SB)/8, $0xfffffffeffffffff
DATA p256ordK0<>+0x00(SB)/8, $0x327f9e8872350975
DATA p256ord<>+0x00(SB)/8, $0x53bbf40939d54123
DATA p256ord<>+0x08(SB)/8, $0x7203df6b21c6052b
DATA p256ord<>+0x10(SB)/8, $0xffffffffffffffff
DATA p256ord<>+0x18(SB)/8, $0xfffffffeffffffff
DATA p256one<>+0x00(SB)/8, $0x0000000000000001
DATA p256one<>+0x08(SB)/8, $0x00000000ffffffff
DATA p256one<>+0x10(SB)/8, $0x0000000000000000
DATA p256one<>+0x18(SB)/8, $0x0000000100000000
GLOBL p256p<>(SB), RODATA, $32
GLOBL p256ordK0<>(SB), RODATA, $8
GLOBL p256ord<>(SB), RODATA, $32
GLOBL p256one<>(SB), RODATA, $32
/* ---------------------------------------*/
// func p256OrdLittleToBig(res *[32]byte, in *p256OrdElement)
TEXT ·p256OrdLittleToBig(SB),NOSPLIT,$0
JMP ·p256BigToLittle(SB)
/* ---------------------------------------*/
// func p256OrdBigToLittle(res *p256OrdElement, in *[32]byte)
TEXT ·p256OrdBigToLittle(SB),NOSPLIT,$0
JMP ·p256BigToLittle(SB)
/* ---------------------------------------*/
// func p256LittleToBig(res *[32]byte, in *p256Element)
TEXT ·p256LittleToBig(SB),NOSPLIT,$0
JMP ·p256BigToLittle(SB)
/* ---------------------------------------*/
// func p256BigToLittle(res *p256Element, in *[32]byte)
TEXT ·p256BigToLittle(SB),NOSPLIT,$0
MOVV res+0(FP), res_ptr
MOVV in+8(FP), x_ptr
MOVV (8*0)(x_ptr), acc0
MOVV (8*1)(x_ptr), acc1
MOVV (8*2)(x_ptr), acc2
MOVV (8*3)(x_ptr), acc3
REVBV acc0, acc0
REVBV acc1, acc1
REVBV acc2, acc2
REVBV acc3, acc3
MOVV acc3, (8*0)(res_ptr)
MOVV acc2, (8*1)(res_ptr)
MOVV acc1, (8*2)(res_ptr)
MOVV acc0, (8*3)(res_ptr)
RET
/* ---------------------------------------*/
// func p256MovCond(res, a, b *SM2P256Point, cond int)
TEXT ·p256MovCond(SB),NOSPLIT,$0
MOVV res+0(FP), res_ptr
MOVV a+8(FP), x_ptr
MOVV b+16(FP), y_ptr
MOVV cond+24(FP), t0
// Load a.x
MOVV (8*0)(x_ptr), acc0
MOVV (8*1)(x_ptr), acc1
MOVV (8*2)(x_ptr), acc2
MOVV (8*3)(x_ptr), acc3
// Load b.x
MOVV (8*0)(y_ptr), t1
MOVV (8*1)(y_ptr), t2
MOVV (8*2)(y_ptr), t3
MOVV (8*3)(y_ptr), t4
// Conditional move
MASKNEZ t0, t1, t1
MASKEQZ t0, acc0, acc0
OR t1, acc0
MASKNEZ t0, t2, t2
MASKEQZ t0, acc1, acc1
OR t2, acc1
MASKNEZ t0, t3, t3
MASKEQZ t0, acc2, acc2
OR t3, acc2
MASKNEZ t0, t4, t4
MASKEQZ t0, acc3, acc3
OR t4, acc3
// Store result
MOVV acc0, (8*0)(res_ptr)
MOVV acc1, (8*1)(res_ptr)
MOVV acc2, (8*2)(res_ptr)
MOVV acc3, (8*3)(res_ptr)
// Load a.y
MOVV (8*4)(x_ptr), acc0
MOVV (8*5)(x_ptr), acc1
MOVV (8*6)(x_ptr), acc2
MOVV (8*7)(x_ptr), acc3
// Load b.y
MOVV (8*4)(y_ptr), t1
MOVV (8*5)(y_ptr), t2
MOVV (8*6)(y_ptr), t3
MOVV (8*7)(y_ptr), t4
// Conditional move
MASKNEZ t0, t1, t1
MASKEQZ t0, acc0, acc0
OR t1, acc0
MASKNEZ t0, t2, t2
MASKEQZ t0, acc1, acc1
OR t2, acc1
MASKNEZ t0, t3, t3
MASKEQZ t0, acc2, acc2
OR t3, acc2
MASKNEZ t0, t4, t4
MASKEQZ t0, acc3, acc3
OR t4, acc3
// Store result
MOVV acc0, (8*4)(res_ptr)
MOVV acc1, (8*5)(res_ptr)
MOVV acc2, (8*6)(res_ptr)
MOVV acc3, (8*7)(res_ptr)
// Load a.z
MOVV (8*8)(x_ptr), acc0
MOVV (8*9)(x_ptr), acc1
MOVV (8*10)(x_ptr), acc2
MOVV (8*11)(x_ptr), acc3
// Load b.z
MOVV (8*8)(y_ptr), t1
MOVV (8*9)(y_ptr), t2
MOVV (8*10)(y_ptr), t3
MOVV (8*11)(y_ptr), t4
// Conditional move
MASKNEZ t0, t1, t1
MASKEQZ t0, acc0, acc0
OR t1, acc0
MASKNEZ t0, t2, t2
MASKEQZ t0, acc1, acc1
OR t2, acc1
MASKNEZ t0, t3, t3
MASKEQZ t0, acc2, acc2
OR t3, acc2
MASKNEZ t0, t4, t4
MASKEQZ t0, acc3, acc3
OR t4, acc3
// Store result
MOVV acc0, (8*8)(res_ptr)
MOVV acc1, (8*9)(res_ptr)
MOVV acc2, (8*10)(res_ptr)
MOVV acc3, (8*11)(res_ptr)
RET
/* ---------------------------------------*/
// func p256NegCond(val *p256Element, cond int)
TEXT ·p256NegCond(SB),NOSPLIT,$0
MOVV val+0(FP), res_ptr
MOVV cond+8(FP), t0
// acc = poly
MOVV $-1, acc0
MOVV p256p<>+0x08(SB), acc1
MOVV $-1, acc2
MOVV p256p<>+0x18(SB), acc3
// Load the original value
MOVV (8*0)(res_ptr), acc4
MOVV (8*1)(res_ptr), x_ptr
MOVV (8*2)(res_ptr), y_ptr
MOVV (8*3)(res_ptr), acc5
// Speculatively subtract
SUBV acc4, acc0
SGTU x_ptr, acc1, t1
SUBV x_ptr, acc1
SUBV y_ptr, acc2
SGTU t1, acc2, t2
SUBV t1, acc2
SUBV acc5, acc3
SUBV t2, acc3
MASKNEZ t0, acc4, acc4
MASKEQZ t0, acc0, acc0
OR acc4, acc0
MASKNEZ t0, x_ptr, x_ptr
MASKEQZ t0, acc1, acc1
OR x_ptr, acc1
MASKNEZ t0, y_ptr, y_ptr
MASKEQZ t0, acc2, acc2
OR y_ptr, acc2
MASKNEZ t0, acc5, acc5
MASKEQZ t0, acc3, acc3
OR acc5, acc3
MOVV acc0, (8*0)(res_ptr)
MOVV acc1, (8*1)(res_ptr)
MOVV acc2, (8*2)(res_ptr)
MOVV acc3, (8*3)(res_ptr)
RET