[PATCH] lib/bch.c: increase bitrev single conversion length

From: John Sanpe <sanpeqf@gmail.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
	rdunlap@infradead.org, torvalds@linux-foundation.org,
	unixbhaskar@gmail.com
Cc: mm-commits@vger.kernel.org, linux-kernel@vger.kernel.org,
	John Sanpe <sanpeqf@gmail.com>
Subject: [PATCH] lib/bch.c: increase bitrev single conversion length
Date: Sun,  4 Feb 2024 16:51:55 +0800	[thread overview]
Message-ID: <20240204085155.6745-1-sanpeqf@gmail.com> (raw)

Optimized the performance of the three functions (load_ecc8 store_ecc8
and bch_encode) using a larger calculation length.

Signed-off-by: John Sanpe <sanpeqf@gmail.com>
---
 lib/bch.c | 87 +++++++++++++++++++++++++++++++------------------------
 1 file changed, 49 insertions(+), 38 deletions(-)

diff --git a/lib/bch.c b/lib/bch.c
index 5f71fd76eca8..e9a9517e7b1b 100644
--- a/lib/bch.c
+++ b/lib/bch.c
@@ -73,6 +73,7 @@
 #include <linux/bitops.h>
 #include <linux/bitrev.h>
 #include <asm/byteorder.h>
+#include <asm/unaligned.h>
 #include <linux/bch.h>
 
 #if defined(CONFIG_BCH_CONST_PARAMS)
@@ -115,14 +116,6 @@ struct gf_poly_deg1 {
 	unsigned int   c[2];
 };
 
-static u8 swap_bits(struct bch_control *bch, u8 in)
-{
-	if (!bch->swap_bits)
-		return in;
-
-	return bitrev8(in);
-}
-
 /*
  * same as bch_encode(), but process input data one byte at a time
  */
@@ -135,7 +128,10 @@ static void bch_encode_unaligned(struct bch_control *bch,
 	const int l = BCH_ECC_WORDS(bch)-1;
 
 	while (len--) {
-		u8 tmp = swap_bits(bch, *data++);
+		u8 tmp = *data++;
+
+		if (bch->swap_bits)
+			tmp = bitrev8(tmp);
 
 		p = bch->mod8_tab + (l+1)*(((ecc[0] >> 24)^(tmp)) & 0xff);
 
@@ -152,20 +148,27 @@ static void bch_encode_unaligned(struct bch_control *bch,
 static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 		      const uint8_t *src)
 {
-	uint8_t pad[4] = {0, 0, 0, 0};
 	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+	uint32_t value, bytes = 0;
+
+	for (i = 0; i < nwords; i++, src += 4) {
+		if (!bch->swap_bits)
+			value = get_unaligned_be32(src);
+		else {
+			value = get_unaligned_le32(src);
+			value = bitrev32(value);
+		}
+		dst[i] = value;
+	}
 
-	for (i = 0; i < nwords; i++, src += 4)
-		dst[i] = ((u32)swap_bits(bch, src[0]) << 24) |
-			((u32)swap_bits(bch, src[1]) << 16) |
-			((u32)swap_bits(bch, src[2]) << 8) |
-			swap_bits(bch, src[3]);
-
-	memcpy(pad, src, BCH_ECC_BYTES(bch)-4*nwords);
-	dst[nwords] = ((u32)swap_bits(bch, pad[0]) << 24) |
-		((u32)swap_bits(bch, pad[1]) << 16) |
-		((u32)swap_bits(bch, pad[2]) << 8) |
-		swap_bits(bch, pad[3]);
+	memcpy(&bytes, src, BCH_ECC_BYTES(bch)-4*nwords);
+	if (!bch->swap_bits)
+		value = be32_to_cpup((__be32 *)&bytes);
+	else {
+		value = le32_to_cpup((__le32 *)&bytes);
+		value = bitrev32(value);
+	}
+	dst[nwords] = value;
 }
 
 /*
@@ -174,20 +177,27 @@ static void load_ecc8(struct bch_control *bch, uint32_t *dst,
 static void store_ecc8(struct bch_control *bch, uint8_t *dst,
 		       const uint32_t *src)
 {
-	uint8_t pad[4];
 	unsigned int i, nwords = BCH_ECC_WORDS(bch)-1;
+	uint32_t value, bytes;
+
+	for (i = 0; i < nwords; i++, dst += 4) {
+		value = src[i];
+		if (!bch->swap_bits)
+			put_unaligned_be32(value, dst);
+		else {
+			value = bitrev32(value);
+			put_unaligned_le32(value, dst);
+		}
+	}
 
-	for (i = 0; i < nwords; i++) {
-		*dst++ = swap_bits(bch, src[i] >> 24);
-		*dst++ = swap_bits(bch, src[i] >> 16);
-		*dst++ = swap_bits(bch, src[i] >> 8);
-		*dst++ = swap_bits(bch, src[i]);
+	value = src[nwords];
+	if (!bch->swap_bits)
+		*(__be32 *)&bytes = cpu_to_be32(value);
+	else {
+		value = bitrev32(value);
+		*(__le32 *)&bytes = cpu_to_le32(value);
 	}
-	pad[0] = swap_bits(bch, src[nwords] >> 24);
-	pad[1] = swap_bits(bch, src[nwords] >> 16);
-	pad[2] = swap_bits(bch, src[nwords] >> 8);
-	pad[3] = swap_bits(bch, src[nwords]);
-	memcpy(dst, pad, BCH_ECC_BYTES(bch)-4*nwords);
+	memcpy(dst, &bytes, BCH_ECC_BYTES(bch)-4*nwords);
 }
 
 /**
@@ -257,12 +267,13 @@ void bch_encode(struct bch_control *bch, const uint8_t *data,
 	 */
 	while (mlen--) {
 		/* input data is read in big-endian format */
-		w = cpu_to_be32(*pdata++);
-		if (bch->swap_bits)
-			w = (u32)swap_bits(bch, w) |
-			    ((u32)swap_bits(bch, w >> 8) << 8) |
-			    ((u32)swap_bits(bch, w >> 16) << 16) |
-			    ((u32)swap_bits(bch, w >> 24) << 24);
+		if (!bch->swap_bits)
+			w = be32_to_cpup((__be32 *)pdata++);
+		else {
+			w = le32_to_cpup((__le32 *)pdata++);
+			w = bitrev32(w);
+		}
+
 		w ^= r[0];
 		p0 = tab0 + (l+1)*((w >>  0) & 0xff);
 		p1 = tab1 + (l+1)*((w >>  8) & 0xff);
-- 
2.43.0