From 7c832f65e78cbc648a416af6225aafc825ce695d Mon Sep 17 00:00:00 2001 From: Sun Yimin Date: Tue, 21 May 2024 17:38:04 +0800 Subject: [PATCH] kdf-sm3: optimize arm64 --- sm3/sm3blocks_arm64.s | 12 ++++++------ sm3/sm3blocks_test.go | 8 -------- 2 files changed, 6 insertions(+), 14 deletions(-) diff --git a/sm3/sm3blocks_arm64.s b/sm3/sm3blocks_arm64.s index 02f0303..29e0ac7 100644 --- a/sm3/sm3blocks_arm64.s +++ b/sm3/sm3blocks_arm64.s @@ -197,7 +197,9 @@ TEXT ·blockMultBy4(SB), NOSPLIT, $0 MOVD (srcPtrPtr), srcPtr4 loop: + // reset wordPtr MOVD wordStart, wordPtr + // load message block prepare4Words prepare4Words @@ -271,9 +273,8 @@ loop: ROUND_16_63(62, T30, c, d, e, f, g, h, a, b) ROUND_16_63(63, T31, b, c, d, e, f, g, h, a) - MOVD statePtr, R20 - VLD1.P 64(R20), [V8.S4, V9.S4, V10.S4, V11.S4] - VLD1 (R20), [V12.S4, V13.S4, V14.S4, V15.S4] + VLD1.P 64(statePtr), [V8.S4, V9.S4, V10.S4, V11.S4] + VLD1 (statePtr), [V12.S4, V13.S4, V14.S4, V15.S4] VEOR a.B16, V8.B16, a.B16 VEOR b.B16, V9.B16, b.B16 VEOR c.B16, V10.B16, c.B16 @@ -282,9 +283,8 @@ loop: VEOR f.B16, V13.B16, f.B16 VEOR g.B16, V14.B16, g.B16 VEOR h.B16, V15.B16, h.B16 - MOVD statePtr, R20 - VST1.P [a.S4, b.S4, c.S4, d.S4], 64(R20) - VST1 [e.S4, f.S4, g.S4, h.S4], (R20) + VST1.P [e.S4, f.S4, g.S4, h.S4], -64(statePtr) + VST1 [a.S4, b.S4, c.S4, d.S4], (statePtr) SUB $1, blockCount CBNZ blockCount, loop diff --git a/sm3/sm3blocks_test.go b/sm3/sm3blocks_test.go index 63c5bae..5e78ce3 100644 --- a/sm3/sm3blocks_test.go +++ b/sm3/sm3blocks_test.go @@ -3,7 +3,6 @@ package sm3 import ( - "encoding/binary" "fmt" "testing" ) @@ -54,13 +53,6 @@ func TestBlockMultBy4(t *testing.T) { buffer := make([]byte, preallocSize) blockMultBy4(&digs[0], &p[0], &buffer[0], 1) expected := "[66c7f0f4 62eeedd9 d1f2d46b dc10e4e2 4167c487 5cf2f7a2 297da02b 8f4ba8e0]" - for i := 0; i < 128+68*4*4; i += 64 { - fmt.Printf("%08x %08x %08x %08x ", binary.LittleEndian.Uint32(buffer[i:]), binary.LittleEndian.Uint32(buffer[i+4:]), binary.LittleEndian.Uint32(buffer[i+8:]), binary.LittleEndian.Uint32(buffer[i+12:])) - fmt.Printf("%08x %08x %08x %08x ", binary.LittleEndian.Uint32(buffer[i+16:]), binary.LittleEndian.Uint32(buffer[i+20:]), binary.LittleEndian.Uint32(buffer[i+24:]), binary.LittleEndian.Uint32(buffer[i+28:])) - fmt.Printf("%08x %08x %08x %08x ", binary.LittleEndian.Uint32(buffer[i+32:]), binary.LittleEndian.Uint32(buffer[i+36:]), binary.LittleEndian.Uint32(buffer[i+40:]), binary.LittleEndian.Uint32(buffer[i+44:])) - fmt.Printf("%08x %08x %08x %08x ", binary.LittleEndian.Uint32(buffer[i+48:]), binary.LittleEndian.Uint32(buffer[i+52:]), binary.LittleEndian.Uint32(buffer[i+56:]), binary.LittleEndian.Uint32(buffer[i+60:])) - fmt.Println() - } s := fmt.Sprintf("%x", digs[0][:]) if s != expected { t.Errorf("digs[0] got %s", s)