From: Kevin Bracey <kevin@bracey.fi>
To: <linux-arm-kernel@lists.infradead.org>
Cc: Kevin Bracey <kevin@bracey.fi>,
Catalin Marinas <catalin.marinas@arm.com>,
Will Deacon <will@kernel.org>, Ard Biesheuvel <ardb@kernel.org>
Subject: [PATCH 4/4] arm64: accelerate crc32_be
Date: Thu, 13 Jan 2022 08:41:18 +0200 [thread overview]
Message-ID: <20220113064118.1580916-5-kevin@bracey.fi> (raw)
In-Reply-To: <20220113064118.1580916-1-kevin@bracey.fi>
It makes no sense to leave crc32_be using the generic code while we
only accelerate the little-endian ops.
Even though the big-endian form doesn't fit as smoothly into the arm64,
we can speed it up and avoid hitting the D cache.
Tested on Cortex-A53. Without acceleration:
crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
crc32: self tests passed, processed 225944 bytes in 192240 nsec
crc32c: CRC_LE_BITS = 64
crc32c: self tests passed, processed 112972 bytes in 21360 nsec
With acceleration:
crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
crc32: self tests passed, processed 225944 bytes in 53480 nsec
crc32c: CRC_LE_BITS = 64
crc32c: self tests passed, processed 112972 bytes in 21480 nsec
Signed-off-by: Kevin Bracey <kevin@bracey.fi>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Will Deacon <will@kernel.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
---
arch/arm64/lib/crc32.S | 87 +++++++++++++++++++++++++++++++++++-------
1 file changed, 73 insertions(+), 14 deletions(-)
diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S
index 0f9e10ecda23..906210441d76 100644
--- a/arch/arm64/lib/crc32.S
+++ b/arch/arm64/lib/crc32.S
@@ -11,7 +11,44 @@
.arch armv8-a+crc
- .macro __crc32, c
+ .macro byteorder, reg, be
+ .if \be
+CPU_LE( rev \reg, \reg )
+ .else
+CPU_BE( rev \reg, \reg )
+ .endif
+ .endm
+
+ .macro byteorder16, reg, be
+ .if \be
+CPU_LE( rev16 \reg, \reg )
+ .else
+CPU_BE( rev16 \reg, \reg )
+ .endif
+ .endm
+
+ .macro bitorder, reg, be
+ .if \be
+ rbit \reg, \reg
+ .endif
+ .endm
+
+ .macro bitorder16, reg, be
+ .if \be
+ rbit \reg, \reg
+ lsr \reg, \reg, #16
+ .endif
+ .endm
+
+ .macro bitorder8, reg, be
+ .if \be
+ rbit \reg, \reg
+ lsr \reg, \reg, #24
+ .endif
+ .endm
+
+ .macro __crc32, c, be=0
+ bitorder w0, \be
cmp x2, #16
b.lt 8f // less than 16 bytes
@@ -24,10 +61,14 @@
add x8, x8, x1
add x1, x1, x7
ldp x5, x6, [x8]
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
-CPU_BE( rev x5, x5 )
-CPU_BE( rev x6, x6 )
+ byteorder x3, \be
+ byteorder x4, \be
+ byteorder x5, \be
+ byteorder x6, \be
+ bitorder x3, \be
+ bitorder x4, \be
+ bitorder x5, \be
+ bitorder x6, \be
tst x7, #8
crc32\c\()x w8, w0, x3
@@ -55,33 +96,43 @@ CPU_BE( rev x6, x6 )
32: ldp x3, x4, [x1], #32
sub x2, x2, #32
ldp x5, x6, [x1, #-16]
-CPU_BE( rev x3, x3 )
-CPU_BE( rev x4, x4 )
-CPU_BE( rev x5, x5 )
-CPU_BE( rev x6, x6 )
+ byteorder x3, \be
+ byteorder x4, \be
+ byteorder x5, \be
+ byteorder x6, \be
+ bitorder x3, \be
+ bitorder x4, \be
+ bitorder x5, \be
+ bitorder x6, \be
crc32\c\()x w0, w0, x3
crc32\c\()x w0, w0, x4
crc32\c\()x w0, w0, x5
crc32\c\()x w0, w0, x6
cbnz x2, 32b
-0: ret
+0: bitorder w0, \be
+ ret
8: tbz x2, #3, 4f
ldr x3, [x1], #8
-CPU_BE( rev x3, x3 )
+ byteorder x3, \be
+ bitorder x3, \be
crc32\c\()x w0, w0, x3
4: tbz x2, #2, 2f
ldr w3, [x1], #4
-CPU_BE( rev w3, w3 )
+ byteorder w3, \be
+ bitorder w3, \be
crc32\c\()w w0, w0, w3
2: tbz x2, #1, 1f
ldrh w3, [x1], #2
-CPU_BE( rev16 w3, w3 )
+ byteorder16 w3, \be
+ bitorder16 w3, \be
crc32\c\()h w0, w0, w3
1: tbz x2, #0, 0f
ldrb w3, [x1]
+ bitorder8 w3, \be
crc32\c\()b w0, w0, w3
-0: ret
+0: bitorder w0, \be
+ ret
.endm
.align 5
@@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32
alternative_else_nop_endif
__crc32 c
SYM_FUNC_END(__crc32c_le)
+
+ .align 5
+SYM_FUNC_START(crc32_be)
+alternative_if_not ARM64_HAS_CRC32
+ b crc32_be_base
+alternative_else_nop_endif
+ __crc32 , 1
+SYM_FUNC_END(crc32_be)
--
2.25.1
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2022-01-13 6:43 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-01-13 6:41 [PATCH 0/4] arm64: accelerate crc32_be Kevin Bracey
2022-01-13 6:41 ` [PATCH 1/4] lib/crc32.c: remove unneeded casts Kevin Bracey
2022-01-13 6:41 ` [PATCH 2/4] lib/crc32.c: Make crc32_be weak for arch override Kevin Bracey
2022-01-13 6:41 ` [PATCH 3/4] lib/crc32test.c: correct printed bytes count Kevin Bracey
2022-01-13 6:41 ` Kevin Bracey [this message]
2022-01-13 11:56 ` [PATCH 4/4] arm64: accelerate crc32_be Ard Biesheuvel
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220113064118.1580916-5-kevin@bracey.fi \
--to=kevin@bracey.fi \
--cc=ardb@kernel.org \
--cc=catalin.marinas@arm.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.