All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code
@ 2014-05-30  5:35 George Spelvin
  2014-05-30  5:39 ` [PATCH 2/3] lib: crc32: mark test data __initconst George Spelvin
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: George Spelvin @ 2014-05-30  5:35 UTC (permalink / raw)
  To: davem, dborkman; +Cc: linux, linux-kernel

There's no need for a full 32x32 matrix, when rows before the last are
just shifted copies of the rows after them.

There's still room for improvement (especially on X86 processors with
CRC32 and PCLMUL instructions), but this is a large step in the
right direction.

The internal primitive is now called crc32_generic_shift and takes one
less argument; the XOR with crc2 is done in inline wrappers.

Signed-off-by: George Spelvin <linux@horizon.com>
---
This is the significant patch; there other two patches are minor
cleanups that I did while in the area.

Tested extensively in user-space (the "power" variable is identical
to the last row of the current even/odd matrix in the previous code)
and with CONFIG_CRC32_SELFTEST:

[    0.587152] crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
[    0.587186] crc32: self tests passed, processed 225944 bytes in 115727 nsec
[    0.587341] crc32c: CRC_LE_BITS = 64
[    0.587373] crc32c: self tests passed, processed 225944 bytes in 59505 nsec
[    0.596827] crc32_combine: 8373 self tests passed
[    0.606259] crc32c_combine: 8373 self tests passed

Code shrunk; comments increased to match.
Pulling 2x32x32 bits = 256 bytes off the stack is a nice bonus.

 include/linux/crc32.h |  12 ++++-
 lib/crc32.c           | 142 +++++++++++++++++++++++---------------------------
 2 files changed, 76 insertions(+), 78 deletions(-)

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 7d275c4f..bcb8e9d3 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -29,7 +29,11 @@ extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
  * 	   with the same initializer as crc1, and crc2 seed was 0. See
  * 	   also crc32_combine_test().
  */
-extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32  crc32_le_shift(u32 crc, size_t len) __attribute_const__;
+static inline u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return crc32_le_shift(crc1, len2) ^ crc2;
+}
 
 extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 
@@ -51,7 +55,11 @@ extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
  * 	   seeded with the same initializer as crc1, and crc2 seed
  * 	   was 0. See also crc32c_combine_test().
  */
-extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32  __crc32c_le_shift(u32 crc, size_t len) __attribute_const__;
+static inline u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+	return __crc32c_le_shift(crc1, len2) ^ crc2;
+}
 
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
 
diff --git a/lib/crc32.c b/lib/crc32.c
index 70f00ca5..a4a576f3 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,29 +50,6 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-#define GF2_DIM		32
-
-static u32 gf2_matrix_times(u32 *mat, u32 vec)
-{
-	u32 sum = 0;
-
-	while (vec) {
-		if (vec & 1)
-			sum ^= *mat;
-		vec >>= 1;
-		mat++;
-	}
-
-	return sum;
-}
-
-static void gf2_matrix_square(u32 *square, u32 *mat)
-{
-	int i;
-
-	for (i = 0; i < GF2_DIM; i++)
-		square[i] = gf2_matrix_times(mat, mat[i]);
-}
 
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
@@ -155,51 +132,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
 
-/* For conditions of distribution and use, see copyright notice in zlib.h */
-static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
-				 u32 polynomial)
-{
-	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
-	u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
-	u32 row;
-	int i;
-
-	if (len2 <= 0)
-		return crc1;
-
-	/* Put operator for one zero bit in odd */
-	odd[0] = polynomial;
-	row = 1;
-	for (i = 1; i < GF2_DIM; i++) {
-		odd[i] = row;
-		row <<= 1;
-	}
-
-	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
-	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
-
-	/* Apply len2 zeros to crc1 (first square will put the operator for one
-	 * zero byte, eight zero bits, in even).
-	 */
-	do {
-		/* Apply zeros operator for this bit of len2 */
-		gf2_matrix_square(even, odd);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(even, crc1);
-		len2 >>= 1;
-		/* If no more bits set, then done */
-		if (len2 == 0)
-			break;
-		/* Another iteration of the loop with odd and even swapped */
-		gf2_matrix_square(odd, even);
-		if (len2 & 1)
-			crc1 = gf2_matrix_times(odd, crc1);
-		len2 >>= 1;
-	} while (len2 != 0);
-
-	crc1 ^= crc2;
-	return crc1;
-}
 
 /**
  * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +203,77 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
 			(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
-u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(__crc32c_le);
+
+/*
+ * This multiplies the polynomials x and y modulo the given modulus.
+ * This follows the "little-endian" CRC convention that the lsbit
+ * represents the highest power of x, and the msbit represents x^0.
+ */
+static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+	int i;
+	u32 product = x & 1 ? y : 0;
+
+	for (i = 0; i < 31; i++) {
+		product = (product >> 1) ^ (product & 1 ? modulus : 0);
+		x >>= 1;
+		product ^= x & 1 ? y : 0;
+	}
+	return product;
 }
 
-u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+/**
+ * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
+ * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
+ * @len: The number of bytes.  @crc is multiplied by x^(8*@len)
+ # @polynomial: The modulus used to reduce the result to 32 bits.
+ *
+ * It's possible to parallelize CRC computations by computing a CRC
+ * over separate ranges of a buffer, then summing them.
+ * This shifts the given CRC by 8*len bits (i.e. produces the same effect
+ * as appending len bytes of zero to the data), in time proportional
+ * to log(len).
+ */
+static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
+				 u32 polynomial)
 {
-	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+	u32 power = polynomial;	/* CRC of x^32 */
+	int i;
+
+	/* Shift up to 32 bits in the simple linear way */
+	for (i = 0; i < 8*(int)(len & 3); i++)
+		crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
+	len >>= 2;
+	if (!len)
+		return crc;
+
+	for (;;) {
+		/* "power" is x^(2^i), modulo the polynomial */
+		if (len & 1)
+			crc = gf2_multiply(crc, power, polynomial);
+		len >>= 1;
+		if (!len)
+			break;
+		/* Square power, advancing to x^(2^(i+1)) */
+		power = gf2_multiply(power, power, polynomial);
+	}
+	return crc;
 }
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(crc32_le_combine);
-EXPORT_SYMBOL(__crc32c_le);
-EXPORT_SYMBOL(__crc32c_le_combine);
+
+u32 crc32_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRCPOLY_LE);
+}
+
+u32 __crc32c_le_shift(u32 crc, size_t len)
+{
+	return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
+}
+
+EXPORT_SYMBOL(crc32_le_shift);
+EXPORT_SYMBOL(__crc32c_le_shift);
 
 /**
  * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
-- 
1.9.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/3] lib: crc32: mark test data __initconst
  2014-05-30  5:35 [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code George Spelvin
@ 2014-05-30  5:39 ` George Spelvin
  2014-05-30  5:41 ` [PATCH 3/3] lib: crc32: Add some additional __pure annotations George Spelvin
  2014-06-04 12:46 ` [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code Daniel Borkmann
  2 siblings, 0 replies; 7+ messages in thread
From: George Spelvin @ 2014-05-30  5:39 UTC (permalink / raw)
  To: davem, dborkman; +Cc: linux-kernel, linux

>From 1ecab5281e3cfc8c2a61648410c8b25ba2654fcb Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@horizon.com>
Date: Thu, 29 May 2014 00:08:03 -0400
Subject: [PATCH 2/3] lib: crc32: mark test data __initconst

So it gets discarded after the selftest.

Signed-off-by: George Spelvin <linux@horizon.com>
---
This is the patch I started with, when I was looking for lib/ self-test
code to emulate for my still-pending glob.c code.  But curiosity about
the combine code led me notice problems with it, and to study the
arch/x86/crypto code, where I also have a proposed patch.

Aren't tangents fun?

 lib/crc32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/crc32.c b/lib/crc32.c
index a4a576f3..977167d6 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -341,7 +341,7 @@ EXPORT_SYMBOL(crc32_be);
 #ifdef CONFIG_CRC32_SELFTEST
 
 /* 4096 random bytes */
-static u8 __attribute__((__aligned__(8))) test_buf[] =
+static u8 const __aligned(8) test_buf[] __initconst =
 {
 	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
 	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
@@ -865,7 +865,7 @@ static struct crc_test {
 	u32 crc_le;	/* expected crc32_le result */
 	u32 crc_be;	/* expected crc32_be result */
 	u32 crc32c_le;	/* expected crc32c_le result */
-} test[] =
+} const test[] __initconst =
 {
 	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
 	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
-- 
1.9.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/3] lib: crc32: Add some additional __pure annotations
  2014-05-30  5:35 [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code George Spelvin
  2014-05-30  5:39 ` [PATCH 2/3] lib: crc32: mark test data __initconst George Spelvin
@ 2014-05-30  5:41 ` George Spelvin
  2014-06-04 12:46 ` [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code Daniel Borkmann
  2 siblings, 0 replies; 7+ messages in thread
From: George Spelvin @ 2014-05-30  5:41 UTC (permalink / raw)
  To: davem, dborkman; +Cc: linux-kernel, linux

In case they help the compiler.

Signed-off-by: George Spelvin <linux@horizon.com>
---
As long as I'm messing with it.  I also have a large patch to do this
to a number of lib/ headers if anyone wants.

Redundant "extern" removed to keep things within 80 columns.

 include/linux/crc32.h | 6 +++---
 lib/crc32.c           | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index bcb8e9d3..25e57f3a 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -8,8 +8,8 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
 
-extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
-extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
+u32  crc32_le(u32 crc, unsigned char const *p, size_t len) __pure;
+u32  crc32_be(u32 crc, unsigned char const *p, size_t len) __pure;
 
 /**
  * crc32_le_combine - Combine two crc32 check values into one. For two
@@ -35,7 +35,7 @@ static inline u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 	return crc32_le_shift(crc1, len2) ^ crc2;
 }
 
-extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
+u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len) __pure;
 
 /**
  * __crc32c_le_combine - Combine two crc32c check values into one. For two
diff --git a/lib/crc32.c b/lib/crc32.c
index 977167d6..cf0c8e08 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -54,7 +54,7 @@ MODULE_LICENSE("GPL");
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 
 /* implements slicing-by-4 or slicing-by-8 algorithm */
-static inline u32
+static inline u32 __pure
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
-- 
1.9.2


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code
  2014-05-30  5:35 [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code George Spelvin
  2014-05-30  5:39 ` [PATCH 2/3] lib: crc32: mark test data __initconst George Spelvin
  2014-05-30  5:41 ` [PATCH 3/3] lib: crc32: Add some additional __pure annotations George Spelvin
@ 2014-06-04 12:46 ` Daniel Borkmann
  2014-06-04 18:32   ` George Spelvin
  2 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2014-06-04 12:46 UTC (permalink / raw)
  To: George Spelvin; +Cc: davem, linux-kernel, akpm

Sorry for the late answer, this slipped somehow through ...

I think you might want to cc Andrew Morton <akpm@linux-foundation.org>
to let this go via akpm's tree for misc changes, perhaps?

The work from my side went via Dave's net-next tree as it had some
follow-up SCTP work that depended on these below bits.

No objections from my side if you really want this to go through
<netdev@vger.kernel.org>, though.

Anyway, some more comments inline:

On 05/30/2014 07:35 AM, George Spelvin wrote:
> There's no need for a full 32x32 matrix, when rows before the last are
> just shifted copies of the rows after them.
>
> There's still room for improvement (especially on X86 processors with
> CRC32 and PCLMUL instructions), but this is a large step in the
> right direction.
>
> The internal primitive is now called crc32_generic_shift and takes one
> less argument; the XOR with crc2 is done in inline wrappers.
>
> Signed-off-by: George Spelvin <linux@horizon.com>
> ---
> This is the significant patch; there other two patches are minor
> cleanups that I did while in the area.
>
> Tested extensively in user-space (the "power" variable is identical
> to the last row of the current even/odd matrix in the previous code)
> and with CONFIG_CRC32_SELFTEST:
>
> [    0.587152] crc32: CRC_LE_BITS = 64, CRC_BE BITS = 64
> [    0.587186] crc32: self tests passed, processed 225944 bytes in 115727 nsec
> [    0.587341] crc32c: CRC_LE_BITS = 64
> [    0.587373] crc32c: self tests passed, processed 225944 bytes in 59505 nsec
> [    0.596827] crc32_combine: 8373 self tests passed
> [    0.606259] crc32c_combine: 8373 self tests passed
>
> Code shrunk; comments increased to match.
> Pulling 2x32x32 bits = 256 bytes off the stack is a nice bonus.

Looks good to me! Do you have any performance numbers to share?

Some very minor nit-picking:

>   include/linux/crc32.h |  12 ++++-
>   lib/crc32.c           | 142 +++++++++++++++++++++++---------------------------
>   2 files changed, 76 insertions(+), 78 deletions(-)
>
> diff --git a/include/linux/crc32.h b/include/linux/crc32.h
> index 7d275c4f..bcb8e9d3 100644
> --- a/include/linux/crc32.h
> +++ b/include/linux/crc32.h
> @@ -29,7 +29,11 @@ extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
>    * 	   with the same initializer as crc1, and crc2 seed was 0. See
>    * 	   also crc32_combine_test().
>    */
> -extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
> +u32  crc32_le_shift(u32 crc, size_t len) __attribute_const__;

Perhaps a newline here.

> +static inline u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
> +{
> +	return crc32_le_shift(crc1, len2) ^ crc2;
> +}
>
>   extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
>
> @@ -51,7 +55,11 @@ extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
>    * 	   seeded with the same initializer as crc1, and crc2 seed
>    * 	   was 0. See also crc32c_combine_test().
>    */
> -extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
> +u32  __crc32c_le_shift(u32 crc, size_t len) __attribute_const__;

Ditto. Or, put both *_shift() helper signatures before the crc32_le_combine
kdoc comment.

> +static inline u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
> +{
> +	return __crc32c_le_shift(crc1, len2) ^ crc2;
> +}
>
>   #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
>
> diff --git a/lib/crc32.c b/lib/crc32.c
> index 70f00ca5..a4a576f3 100644
...
>
> -u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
> +/**
> + * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
> + * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
> + * @len: The number of bytes.  @crc is multiplied by x^(8*@len)
> + # @polynomial: The modulus used to reduce the result to 32 bits.

     ^^ seems this should have been a '*'

> + *
> + * It's possible to parallelize CRC computations by computing a CRC
> + * over separate ranges of a buffer, then summing them.
> + * This shifts the given CRC by 8*len bits (i.e. produces the same effect
> + * as appending len bytes of zero to the data), in time proportional
> + * to log(len).
> + */
> +static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
> +				 u32 polynomial)

u32 polynomial is not correctly aligned to the opening '(' from the previous line.

>   {
> -	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
> +	u32 power = polynomial;	/* CRC of x^32 */
> +	int i;
> +
> +	/* Shift up to 32 bits in the simple linear way */
...

The other two patches are fine, imho. Thanks George! :)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code
  2014-06-04 12:46 ` [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code Daniel Borkmann
@ 2014-06-04 18:32   ` George Spelvin
  2014-06-04 21:07     ` Daniel Borkmann
  0 siblings, 1 reply; 7+ messages in thread
From: George Spelvin @ 2014-06-04 18:32 UTC (permalink / raw)
  To: dborkman, linux; +Cc: akpm, davem, linux-kernel

Thanks for the nitpicks!

> I think you might want to cc Andrew Morton <akpm@linux-foundation.org>
> to let this go via akpm's tree for misc changes, perhaps?

I don't care, but akpm is fine by me.  I'll send out a v2 after I resolve
one minor point with you; see below.

Once that's done, may I add a Reviewed-by: or Acked-by: line from you?

> Looks good to me! Do you have any performance numbers to share?

Actually, I didn't bother benchmarking it because the improvement was
so obvious, but here's a quick test showing a 35.5x performance gain.

     Old       New      Delta
  0: 83005684  2314192 (-80691492)
  1: 82730196  2313836 (-80416360)
  2: 82805636  2312736 (-80492900)
  3: 82648160  2344304 (-80303856)
  4: 82531928  2314940 (-80216988)
  5: 82669440  2312976 (-80356464)
  6: 82528792  2313984 (-80214808)
  7: 82415116  2313796 (-80101320)
  8: 82451620  2314000 (-80137620)
  9: 82811052  2329708 (-80481344)
 10: 82903344  2311120 (-80592224)
 11: 82549032  2313540 (-80235492)
 12: 82564660  2330260 (-80234400)
 13: 82289788  2312972 (-79976816)
 14: 82535828  2312036 (-80223792)
 15: 82664040  2313284 (-80350756)
 16: 82629476  2309744 (-80319732)
 17: 82806812  2329628 (-80477184)
 18: 82379284  2312876 (-80066408)
 19: 82483400  2313004 (-80170396)
 20: 82651232  2314244 (-80336988)
 21: 82327508  2330456 (-79997052)
 22: 82641324  2330664 (-80310660)
 23: 82538192  2314024 (-80224168)
MIN: 82289788  2309744 (-79980044)

Here's the test loop.  Although it's subject to compiler rearrangements,
I tried to charge the loop overhead to the new code.

static void
do_test(uint64_t times[2])
{
	uint32_t crc0 = 1, crc1 = 1;
	uint64_t t0, t1, sum0 = 0, sum1 = 0;
	int i;

	t1 = t0 = rdtsc();
	for (i = 1024; i < 2048; i++) {
		sum0 += t1;
		crc0 = crc32_generic_shift(crc0, i, CRC32C_POLY_LE);
		sum1 += rdtsc();
		crc1 = crc32_generic_combine(crc1, 0, i, CRC32C_POLY_LE);
		t1 = rdtsc();
	}
	times[0] = sum0 + t1 - sum1 - t0;	// Old code
	times[1] = sum1 - sum0;			// New code
	if (crc0 != crc1)
		printf("Mismatch!  %08x != %08x\n", crc0, crc1);
}

It's possible to do a bit better with more effort (exploiting
the precomputed CRC tables for modular reduction) but this fruit
was hanging *so* low I couldn't resist grabbing it.

>> -extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
>> +u32  crc32_le_shift(u32 crc, size_t len) __attribute_const__;

> Perhaps a newline here.

Question: where do you think a newline should go?  It's not obvious
to me.  My style has been to keep as much of a declaration on one line
as possible so "git grep <function> include" is as informative as possible.

>> -extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
>> +u32  __crc32c_le_shift(u32 crc, size_t len) __attribute_const__;

> Ditto. Or, put both *_shift() helper signatures before the crc32_le_combine
> kdoc comment.

Um, same basic question.  I agree that putting a declaration
between the kdoc and the function is strange, but that doesn't
seem to be what you're commenting in...

Now that I've gotten an ack, I'm happy to be more aggressive about
tweaking comments.  I just wanted to focus the diff on the code changes.

>> +/**
>> + * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
>> + * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
>> + * @len: The number of bytes.  @crc is multiplied by x^(8*@len)
>> + # @polynomial: The modulus used to reduce the result to 32 bits.

>      ^^ seems this should have been a '*'

Yes, obviously.  Thanks for catching that.

>> +static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
>> +				 u32 polynomial)

> u32 polynomial is not correctly aligned to the opening '(' from the previous line.

Thanks again.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code
  2014-06-04 18:32   ` George Spelvin
@ 2014-06-04 21:07     ` Daniel Borkmann
  2014-06-04 23:12       ` George Spelvin
  0 siblings, 1 reply; 7+ messages in thread
From: Daniel Borkmann @ 2014-06-04 21:07 UTC (permalink / raw)
  To: George Spelvin; +Cc: akpm, davem, linux-kernel

On 06/04/2014 08:32 PM, George Spelvin wrote:
> Thanks for the nitpicks!
>
>> I think you might want to cc Andrew Morton <akpm@linux-foundation.org>
>> to let this go via akpm's tree for misc changes, perhaps?
>
> I don't care, but akpm is fine by me.  I'll send out a v2 after I resolve
> one minor point with you; see below.
>
> Once that's done, may I add a Reviewed-by: or Acked-by: line from you?

Yes, feel free to add my Reviewed-by tag and keep me in Cc.

>> Looks good to me! Do you have any performance numbers to share?
>
> Actually, I didn't bother benchmarking it because the improvement was
> so obvious, but here's a quick test showing a 35.5x performance gain.

That's great!

...
>>> -extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
>>> +u32  crc32_le_shift(u32 crc, size_t len) __attribute_const__;
>
>> Perhaps a newline here.
>
> Question: where do you think a newline should go?  It's not obvious
> to me.  My style has been to keep as much of a declaration on one line
> as possible so "git grep <function> include" is as informative as possible.

It's just nit, but since you've asked, end result like this:

--snip--
u32 crc32_le_shift(u32 crc, size_t len) __attribute_const__;

static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
{
	return crc32_le_shift(crc1, len2) ^ crc2;
}
--snap--

> Now that I've gotten an ack, I'm happy to be more aggressive about
> tweaking comments.  I just wanted to focus the diff on the code changes.

Sounds good, thanks!

>>> +/**
>>> + * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
>>> + * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
>>> + * @len: The number of bytes.  @crc is multiplied by x^(8*@len)
>>> + # @polynomial: The modulus used to reduce the result to 32 bits.
>
>>       ^^ seems this should have been a '*'
>
> Yes, obviously.  Thanks for catching that.
>
>>> +static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
>>> +				 u32 polynomial)
>
>> u32 polynomial is not correctly aligned to the opening '(' from the previous line.
>
> Thanks again.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code
  2014-06-04 21:07     ` Daniel Borkmann
@ 2014-06-04 23:12       ` George Spelvin
  0 siblings, 0 replies; 7+ messages in thread
From: George Spelvin @ 2014-06-04 23:12 UTC (permalink / raw)
  To: dborkman, linux; +Cc: akpm, davem, linux-kernel

>>> Perhaps a newline here.

>> Question: where do you think a newline should go?  It's not obvious
>> to me.  My style has been to keep as much of a declaration on one line
>> as possible so "git grep <function> include" is as informative as possible.

> It's just nit, but since you've asked, end result like this:
> 
> --snip--
> u32 crc32_le_shift(u32 crc, size_t len) __attribute_const__;
> 
> static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
> {
> 	return crc32_le_shift(crc1, len2) ^ crc2;
> }
> --snap--

Ah, got it!  I couldn't figure out where it would make sense to insert
a newline into the middle of one line, breaking it into two.  Adding a
blank line makes sense, and makes your other comment make sense.

Good suggestion; I'll do it.

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2014-06-04 23:12 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-05-30  5:35 [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code George Spelvin
2014-05-30  5:39 ` [PATCH 2/3] lib: crc32: mark test data __initconst George Spelvin
2014-05-30  5:41 ` [PATCH 3/3] lib: crc32: Add some additional __pure annotations George Spelvin
2014-06-04 12:46 ` [PATCH 1/3] lib: crc32: Greatly shrink CRC combining code Daniel Borkmann
2014-06-04 18:32   ` George Spelvin
2014-06-04 21:07     ` Daniel Borkmann
2014-06-04 23:12       ` George Spelvin

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.