diff --git a/internal/sm2ec/p256_asm_loong64.s b/internal/sm2ec/p256_asm_loong64.s
index 095cbe1..58ff2f2 100644
--- a/internal/sm2ec/p256_asm_loong64.s
+++ b/internal/sm2ec/p256_asm_loong64.s
@@ -1621,7 +1621,7 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-48
 	STx(h)
 
 	LDy(z1in)
-	CALL	p256MulInternal<>(SB)    // z3 = h * z1
+	CALL	sm2P256MulInternal<>(SB)    // z3 = h * z1
 
 	// iff select == 0, z3 = z1
 	MOVV (8*8)(a_ptr), acc0
@@ -1666,35 +1666,35 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-48
 	MOVV y3, (8*11)(t0)
 
 	LDy(z1sqr)
-	CALL	p256MulInternal<>(SB)    // z1 ^ 3
+	CALL	sm2P256MulInternal<>(SB)    // z1 ^ 3
 
 	LDx(y2in)
-	CALL	p256MulInternal<>(SB)    // s2 = y2 * z1ˆ3
+	CALL	sm2P256MulInternal<>(SB)    // s2 = y2 * z1ˆ3
 	STy(s2)
 
 	LDx(y1in)
-	CALL	p256SubInternal<>(SB)    // r = s2 - s1
+	CALL	sm2P256Subinternal<>(SB)    // r = s2 - s1
 	STx(r)
 
-	CALL	p256SqrInternal<>(SB)    // rsqr = rˆ2
+	CALL	sm2P256SqrInternal<>(SB)    // rsqr = rˆ2
 	STy	(rsqr)
 
 	LDx(h)
-	CALL	p256SqrInternal<>(SB)    // hsqr = hˆ2
+	CALL	sm2P256SqrInternal<>(SB)    // hsqr = hˆ2
 	STy(hsqr)
 
-	CALL	p256MulInternal<>(SB)    // hcub = hˆ3
+	CALL	sm2P256MulInternal<>(SB)    // hcub = hˆ3
 	STy(hcub)
 
 	LDx(y1in)
-	CALL	p256MulInternal<>(SB)    // y1 * hˆ3
+	CALL	sm2P256MulInternal<>(SB)    // y1 * hˆ3
 	STy(s2)
 
 	MOVV hsqr(0*8), x0
 	MOVV hsqr(1*8), x1
 	MOVV hsqr(2*8), x2
 	MOVV hsqr(3*8), x3
-	CALL	p256MulInternal<>(SB)    // hsqr * u1
+	CALL	sm2P256MulInternal<>(SB)    // hsqr * u1
 	MOVV y0, h(0*8)
 	MOVV y1, h(1*8)
 	MOVV y2, h(2*8)
@@ -1703,14 +1703,14 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-48
 	p256MulBy2Inline               // u1 * hˆ2 * 2, inline
 
 	LDy(rsqr)
-	CALL	p256SubInternal<>(SB)    // rˆ2 - u1 * hˆ2 * 2
+	CALL	sm2P256Subinternal<>(SB)    // rˆ2 - u1 * hˆ2 * 2
 
 	MOVV x0, y0 
 	MOVV x1, y1
 	MOVV x2, y2
 	MOVV x3, y3
 	LDy(hcub)
-	CALL	p256SubInternal<>(SB)
+	CALL	sm2P256Subinternal<>(SB)
 
 	MOVV (8*0)(a_ptr), acc0
 	MOVV (8*1)(a_ptr), acc1
@@ -1757,19 +1757,19 @@ TEXT ·p256PointAddAffineAsm(SB),0,$264-48
 	MOVV h(1*8), y1
 	MOVV h(2*8), y2
 	MOVV h(3*8), y3
-	CALL	p256SubInternal<>(SB)
+	CALL	sm2P256Subinternal<>(SB)
 
 	MOVV r(0*8), y0 
 	MOVV r(1*8), y1
 	MOVV r(2*8), y2
 	MOVV r(3*8), y3
-	CALL	p256MulInternal<>(SB)
+	CALL	sm2P256MulInternal<>(SB)
 
 	MOVV s2(0*8), x0 
 	MOVV s2(1*8), x1
 	MOVV s2(2*8), x2
 	MOVV s2(3*8), x3
-	CALL	p256SubInternal<>(SB)
+	CALL	sm2P256Subinternal<>(SB)
 
 	MOVV (8*4)(a_ptr), acc0
 	MOVV (8*5)(a_ptr), acc1
diff --git a/internal/sm2ec/sm2p256_asm.go b/internal/sm2ec/sm2p256_asm.go
index 4e85dbf..c6108b6 100644
--- a/internal/sm2ec/sm2p256_asm.go
+++ b/internal/sm2ec/sm2p256_asm.go
@@ -310,6 +310,11 @@ var supportBMI2 = cpu.X86.HasADX && cpu.X86.HasBMI2
 
 var supportAVX2 = cpu.X86.HasAVX2
 
+var (
+	supportLSX = cpu.Loong64.HasLSX
+	supportLASX = cpu.Loong64.HasLASX
+)
+
 // Montgomery multiplication. Sets res = in1 * in2 * R⁻¹ mod p.
 //
 //go:noescape