All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
@ 2015-07-16 12:40 Denys Vlasenko
  2015-07-16 14:26 ` Alexander Duyck
  2015-07-16 15:43 ` Tom Herbert
  0 siblings, 2 replies; 9+ messages in thread
From: Denys Vlasenko @ 2015-07-16 12:40 UTC (permalink / raw)
  To: Thomas Graf
  Cc: Denys Vlasenko, Alexander Duyck, Jozsef Kadlecsik, Herbert Xu,
	netdev, linux-kernel

This patch deinlines jhash, jhash2 and __jhash_nwords.

It also removes rhashtable_jhash2(key, length, seed)
because it was merely calling jhash2(key, length, seed).

With this .config: http://busybox.net/~vda/kernel_config,
after deinlining these functions have sizes and callsite counts
as follows:

__jhash_nwords: 72 bytes, 75 calls
jhash: 297 bytes, 111 calls
jhash2: 205 bytes, 136 calls

Total size decrease is about 38,000 bytes:

    text     data      bss       dec     hex filename
90663567 17221960 36659200 144544727 89d93d7 vmlinux5
90625577 17221864 36659200 144506641 89cff11 vmlinux.after

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
CC: Thomas Graf <tgraf@suug.ch>
CC: Alexander Duyck <alexander.h.duyck@redhat.com>
CC: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
CC: Herbert Xu <herbert@gondor.apana.org.au>
CC: netdev@vger.kernel.org
CC: linux-kernel@vger.kernel.org
---
Changes in v2: created a new source file, jhash.c

 include/linux/jhash.h | 123 +----------------------------------------
 lib/Makefile          |   2 +-
 lib/jhash.c           | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/rhashtable.c      |  13 +++--
 4 files changed, 160 insertions(+), 127 deletions(-)
 create mode 100644 lib/jhash.c

diff --git a/include/linux/jhash.h b/include/linux/jhash.h
index 348c6f4..0b3f55d 100644
--- a/include/linux/jhash.h
+++ b/include/linux/jhash.h
@@ -31,131 +31,14 @@
 /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
 #define jhash_mask(n)   (jhash_size(n)-1)
 
-/* __jhash_mix -- mix 3 32-bit values reversibly. */
-#define __jhash_mix(a, b, c)			\
-{						\
-	a -= c;  a ^= rol32(c, 4);  c += b;	\
-	b -= a;  b ^= rol32(a, 6);  a += c;	\
-	c -= b;  c ^= rol32(b, 8);  b += a;	\
-	a -= c;  a ^= rol32(c, 16); c += b;	\
-	b -= a;  b ^= rol32(a, 19); a += c;	\
-	c -= b;  c ^= rol32(b, 4);  b += a;	\
-}
-
-/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
-#define __jhash_final(a, b, c)			\
-{						\
-	c ^= b; c -= rol32(b, 14);		\
-	a ^= c; a -= rol32(c, 11);		\
-	b ^= a; b -= rol32(a, 25);		\
-	c ^= b; c -= rol32(b, 16);		\
-	a ^= c; a -= rol32(c, 4);		\
-	b ^= a; b -= rol32(a, 14);		\
-	c ^= b; c -= rol32(b, 24);		\
-}
-
 /* An arbitrary initial parameter */
 #define JHASH_INITVAL		0xdeadbeef
 
-/* jhash - hash an arbitrary key
- * @k: sequence of bytes as key
- * @length: the length of the key
- * @initval: the previous hash, or an arbitray value
- *
- * The generic version, hashes an arbitrary sequence of bytes.
- * No alignment or length assumptions are made about the input key.
- *
- * Returns the hash value of the key. The result depends on endianness.
- */
-static inline u32 jhash(const void *key, u32 length, u32 initval)
-{
-	u32 a, b, c;
-	const u8 *k = key;
-
-	/* Set up the internal state */
-	a = b = c = JHASH_INITVAL + length + initval;
-
-	/* All but the last block: affect some 32 bits of (a,b,c) */
-	while (length > 12) {
-		a += __get_unaligned_cpu32(k);
-		b += __get_unaligned_cpu32(k + 4);
-		c += __get_unaligned_cpu32(k + 8);
-		__jhash_mix(a, b, c);
-		length -= 12;
-		k += 12;
-	}
-	/* Last block: affect all 32 bits of (c) */
-	/* All the case statements fall through */
-	switch (length) {
-	case 12: c += (u32)k[11]<<24;
-	case 11: c += (u32)k[10]<<16;
-	case 10: c += (u32)k[9]<<8;
-	case 9:  c += k[8];
-	case 8:  b += (u32)k[7]<<24;
-	case 7:  b += (u32)k[6]<<16;
-	case 6:  b += (u32)k[5]<<8;
-	case 5:  b += k[4];
-	case 4:  a += (u32)k[3]<<24;
-	case 3:  a += (u32)k[2]<<16;
-	case 2:  a += (u32)k[1]<<8;
-	case 1:  a += k[0];
-		 __jhash_final(a, b, c);
-	case 0: /* Nothing left to add */
-		break;
-	}
-
-	return c;
-}
-
-/* jhash2 - hash an array of u32's
- * @k: the key which must be an array of u32's
- * @length: the number of u32's in the key
- * @initval: the previous hash, or an arbitray value
- *
- * Returns the hash value of the key.
- */
-static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
-{
-	u32 a, b, c;
-
-	/* Set up the internal state */
-	a = b = c = JHASH_INITVAL + (length<<2) + initval;
-
-	/* Handle most of the key */
-	while (length > 3) {
-		a += k[0];
-		b += k[1];
-		c += k[2];
-		__jhash_mix(a, b, c);
-		length -= 3;
-		k += 3;
-	}
-
-	/* Handle the last 3 u32's: all the case statements fall through */
-	switch (length) {
-	case 3: c += k[2];
-	case 2: b += k[1];
-	case 1: a += k[0];
-		__jhash_final(a, b, c);
-	case 0:	/* Nothing left to add */
-		break;
-	}
-
-	return c;
-}
-
+u32 jhash(const void *key, u32 length, u32 initval);
+u32 jhash2(const u32 *k, u32 length, u32 initval);
 
 /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
-static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
-{
-	a += initval;
-	b += initval;
-	c += initval;
-
-	__jhash_final(a, b, c);
-
-	return c;
-}
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval);
 
 static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
 {
diff --git a/lib/Makefile b/lib/Makefile
index 6897b52..978be53 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
 	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
+	 percpu-refcount.o percpu_ida.o jhash.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
diff --git a/lib/jhash.c b/lib/jhash.c
new file mode 100644
index 0000000..cf3c277
--- /dev/null
+++ b/lib/jhash.c
@@ -0,0 +1,149 @@
+/* Jenkins hash support.
+ *
+ * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
+ *
+ * http://burtleburtle.net/bob/hash/
+ *
+ * These are the credits from Bob's sources:
+ *
+ * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
+ *
+ * These are functions for producing 32-bit hashes for hash table lookup.
+ * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
+ * are externally useful functions.  Routines to test the hash are included
+ * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
+ * the public domain.  It has no warranty.
+ *
+ * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
+ *
+ * I've modified Bob's hash to be useful in the Linux kernel, and
+ * any bugs present are my fault.
+ * Jozsef
+ */
+#include <linux/export.h>
+#include <linux/jhash.h>
+
+/* __jhash_mix -- mix 3 32-bit values reversibly. */
+#define __jhash_mix(a, b, c)			\
+{						\
+	a -= c;  a ^= rol32(c, 4);  c += b;	\
+	b -= a;  b ^= rol32(a, 6);  a += c;	\
+	c -= b;  c ^= rol32(b, 8);  b += a;	\
+	a -= c;  a ^= rol32(c, 16); c += b;	\
+	b -= a;  b ^= rol32(a, 19); a += c;	\
+	c -= b;  c ^= rol32(b, 4);  b += a;	\
+}
+
+/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
+#define __jhash_final(a, b, c)			\
+{						\
+	c ^= b; c -= rol32(b, 14);		\
+	a ^= c; a -= rol32(c, 11);		\
+	b ^= a; b -= rol32(a, 25);		\
+	c ^= b; c -= rol32(b, 16);		\
+	a ^= c; a -= rol32(c, 4);		\
+	b ^= a; b -= rol32(a, 14);		\
+	c ^= b; c -= rol32(b, 24);		\
+}
+
+/* jhash - hash an arbitrary key
+ * @k: sequence of bytes as key
+ * @length: the length of the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * The generic version, hashes an arbitrary sequence of bytes.
+ * No alignment or length assumptions are made about the input key.
+ *
+ * Returns the hash value of the key. The result depends on endianness.
+ */
+u32 jhash(const void *key, u32 length, u32 initval)
+{
+	u32 a, b, c;
+	const u8 *k = key;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + length + initval;
+
+	/* All but the last block: affect some 32 bits of (a,b,c) */
+	while (length > 12) {
+		a += __get_unaligned_cpu32(k);
+		b += __get_unaligned_cpu32(k + 4);
+		c += __get_unaligned_cpu32(k + 8);
+		__jhash_mix(a, b, c);
+		length -= 12;
+		k += 12;
+	}
+	/* Last block: affect all 32 bits of (c) */
+	/* All the case statements fall through */
+	switch (length) {
+	case 12: c += (u32)k[11]<<24;
+	case 11: c += (u32)k[10]<<16;
+	case 10: c += (u32)k[9]<<8;
+	case 9:  c += k[8];
+	case 8:  b += (u32)k[7]<<24;
+	case 7:  b += (u32)k[6]<<16;
+	case 6:  b += (u32)k[5]<<8;
+	case 5:  b += k[4];
+	case 4:  a += (u32)k[3]<<24;
+	case 3:  a += (u32)k[2]<<16;
+	case 2:  a += (u32)k[1]<<8;
+	case 1:  a += k[0];
+		 __jhash_final(a, b, c);
+	case 0: /* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+EXPORT_SYMBOL_GPL(jhash);
+
+/* jhash2 - hash an array of u32's
+ * @k: the key which must be an array of u32's
+ * @length: the number of u32's in the key
+ * @initval: the previous hash, or an arbitray value
+ *
+ * Returns the hash value of the key.
+ */
+u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+	u32 a, b, c;
+
+	/* Set up the internal state */
+	a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+	/* Handle most of the key */
+	while (length > 3) {
+		a += k[0];
+		b += k[1];
+		c += k[2];
+		__jhash_mix(a, b, c);
+		length -= 3;
+		k += 3;
+	}
+
+	/* Handle the last 3 u32's: all the case statements fall through */
+	switch (length) {
+	case 3: c += k[2];
+	case 2: b += k[1];
+	case 1: a += k[0];
+		__jhash_final(a, b, c);
+	case 0:	/* Nothing left to add */
+		break;
+	}
+
+	return c;
+}
+EXPORT_SYMBOL_GPL(jhash2);
+
+/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
+u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+	a += initval;
+	b += initval;
+	c += initval;
+
+	__jhash_final(a, b, c);
+
+	return c;
+}
+EXPORT_SYMBOL_GPL(__jhash_nwords);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index cc0c697..cf429c5 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -663,11 +663,6 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params)
 		   (unsigned long)params->min_size);
 }
 
-static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
-{
-	return jhash2(key, length, seed);
-}
-
 /**
  * rhashtable_init - initialize a new hash table
  * @ht:		hash table to be initialized
@@ -773,8 +768,14 @@ int rhashtable_init(struct rhashtable *ht,
 		ht->p.hashfn = jhash;
 
 		if (!(ht->key_len & (sizeof(u32) - 1))) {
+			typedef u32 (*hashfunc)(const void *k, u32 length, u32 initval);
+
 			ht->key_len /= sizeof(u32);
-			ht->p.hashfn = rhashtable_jhash2;
+			/*
+			 * jhash2() 1st param is const u32*,
+			 * p.hashfn wants const void*. Hence the cast:
+			 */
+			ht->p.hashfn = (hashfunc)jhash2;
 		}
 	}
 
-- 
1.8.1.4


^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 12:40 [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords Denys Vlasenko
@ 2015-07-16 14:26 ` Alexander Duyck
  2015-07-16 15:43 ` Tom Herbert
  1 sibling, 0 replies; 9+ messages in thread
From: Alexander Duyck @ 2015-07-16 14:26 UTC (permalink / raw)
  To: Denys Vlasenko, Thomas Graf
  Cc: Alexander Duyck, Jozsef Kadlecsik, Herbert Xu, netdev, linux-kernel

On 07/16/2015 05:40 AM, Denys Vlasenko wrote:
> This patch deinlines jhash, jhash2 and __jhash_nwords.
>
> It also removes rhashtable_jhash2(key, length, seed)
> because it was merely calling jhash2(key, length, seed).
>
> With this .config: http://busybox.net/~vda/kernel_config,
> after deinlining these functions have sizes and callsite counts
> as follows:
>
> __jhash_nwords: 72 bytes, 75 calls
> jhash: 297 bytes, 111 calls
> jhash2: 205 bytes, 136 calls
>
> Total size decrease is about 38,000 bytes:
>
>      text     data      bss       dec     hex filename
> 90663567 17221960 36659200 144544727 89d93d7 vmlinux5
> 90625577 17221864 36659200 144506641 89cff11 vmlinux.after
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Thomas Graf <tgraf@suug.ch>
> CC: Alexander Duyck <alexander.h.duyck@redhat.com>
> CC: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
> CC: Herbert Xu <herbert@gondor.apana.org.au>
> CC: netdev@vger.kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
> Changes in v2: created a new source file, jhash.c
>
>   include/linux/jhash.h | 123 +----------------------------------------
>   lib/Makefile          |   2 +-
>   lib/jhash.c           | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++
>   lib/rhashtable.c      |  13 +++--
>   4 files changed, 160 insertions(+), 127 deletions(-)
>   create mode 100644 lib/jhash.c
>
> diff --git a/include/linux/jhash.h b/include/linux/jhash.h
> index 348c6f4..0b3f55d 100644
> --- a/include/linux/jhash.h
> +++ b/include/linux/jhash.h
> @@ -31,131 +31,14 @@
>   /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
>   #define jhash_mask(n)   (jhash_size(n)-1)
>   
> -/* __jhash_mix -- mix 3 32-bit values reversibly. */
> -#define __jhash_mix(a, b, c)			\
> -{						\
> -	a -= c;  a ^= rol32(c, 4);  c += b;	\
> -	b -= a;  b ^= rol32(a, 6);  a += c;	\
> -	c -= b;  c ^= rol32(b, 8);  b += a;	\
> -	a -= c;  a ^= rol32(c, 16); c += b;	\
> -	b -= a;  b ^= rol32(a, 19); a += c;	\
> -	c -= b;  c ^= rol32(b, 4);  b += a;	\
> -}
> -
> -/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> -#define __jhash_final(a, b, c)			\
> -{						\
> -	c ^= b; c -= rol32(b, 14);		\
> -	a ^= c; a -= rol32(c, 11);		\
> -	b ^= a; b -= rol32(a, 25);		\
> -	c ^= b; c -= rol32(b, 16);		\
> -	a ^= c; a -= rol32(c, 4);		\
> -	b ^= a; b -= rol32(a, 14);		\
> -	c ^= b; c -= rol32(b, 24);		\
> -}
> -
>   /* An arbitrary initial parameter */
>   #define JHASH_INITVAL		0xdeadbeef
>   
> -/* jhash - hash an arbitrary key
> - * @k: sequence of bytes as key
> - * @length: the length of the key
> - * @initval: the previous hash, or an arbitray value
> - *
> - * The generic version, hashes an arbitrary sequence of bytes.
> - * No alignment or length assumptions are made about the input key.
> - *
> - * Returns the hash value of the key. The result depends on endianness.
> - */
> -static inline u32 jhash(const void *key, u32 length, u32 initval)
> -{
> -	u32 a, b, c;
> -	const u8 *k = key;
> -
> -	/* Set up the internal state */
> -	a = b = c = JHASH_INITVAL + length + initval;
> -
> -	/* All but the last block: affect some 32 bits of (a,b,c) */
> -	while (length > 12) {
> -		a += __get_unaligned_cpu32(k);
> -		b += __get_unaligned_cpu32(k + 4);
> -		c += __get_unaligned_cpu32(k + 8);
> -		__jhash_mix(a, b, c);
> -		length -= 12;
> -		k += 12;
> -	}
> -	/* Last block: affect all 32 bits of (c) */
> -	/* All the case statements fall through */
> -	switch (length) {
> -	case 12: c += (u32)k[11]<<24;
> -	case 11: c += (u32)k[10]<<16;
> -	case 10: c += (u32)k[9]<<8;
> -	case 9:  c += k[8];
> -	case 8:  b += (u32)k[7]<<24;
> -	case 7:  b += (u32)k[6]<<16;
> -	case 6:  b += (u32)k[5]<<8;
> -	case 5:  b += k[4];
> -	case 4:  a += (u32)k[3]<<24;
> -	case 3:  a += (u32)k[2]<<16;
> -	case 2:  a += (u32)k[1]<<8;
> -	case 1:  a += k[0];
> -		 __jhash_final(a, b, c);
> -	case 0: /* Nothing left to add */
> -		break;
> -	}
> -
> -	return c;
> -}
> -
> -/* jhash2 - hash an array of u32's
> - * @k: the key which must be an array of u32's
> - * @length: the number of u32's in the key
> - * @initval: the previous hash, or an arbitray value
> - *
> - * Returns the hash value of the key.
> - */
> -static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
> -{
> -	u32 a, b, c;
> -
> -	/* Set up the internal state */
> -	a = b = c = JHASH_INITVAL + (length<<2) + initval;
> -
> -	/* Handle most of the key */
> -	while (length > 3) {
> -		a += k[0];
> -		b += k[1];
> -		c += k[2];
> -		__jhash_mix(a, b, c);
> -		length -= 3;
> -		k += 3;
> -	}
> -
> -	/* Handle the last 3 u32's: all the case statements fall through */
> -	switch (length) {
> -	case 3: c += k[2];
> -	case 2: b += k[1];
> -	case 1: a += k[0];
> -		__jhash_final(a, b, c);
> -	case 0:	/* Nothing left to add */
> -		break;
> -	}
> -
> -	return c;
> -}
> -
> +u32 jhash(const void *key, u32 length, u32 initval);
> +u32 jhash2(const u32 *k, u32 length, u32 initval);
>   
>   /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
> -static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
> -{
> -	a += initval;
> -	b += initval;
> -	c += initval;
> -
> -	__jhash_final(a, b, c);
> -
> -	return c;
> -}
> +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval);
>   
>   static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
>   {
> diff --git a/lib/Makefile b/lib/Makefile
> index 6897b52..978be53 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>   	 bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
>   	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
>   	 bsearch.o find_bit.o llist.o memweight.o kfifo.o \
> -	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
> +	 percpu-refcount.o percpu_ida.o jhash.o rhashtable.o reciprocal_div.o
>   obj-y += string_helpers.o
>   obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
>   obj-y += hexdump.o
> diff --git a/lib/jhash.c b/lib/jhash.c
> new file mode 100644
> index 0000000..cf3c277
> --- /dev/null
> +++ b/lib/jhash.c
> @@ -0,0 +1,149 @@
> +/* Jenkins hash support.
> + *
> + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
> + *
> + * http://burtleburtle.net/bob/hash/
> + *
> + * These are the credits from Bob's sources:
> + *
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
> + *
> + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
> + *
> + * I've modified Bob's hash to be useful in the Linux kernel, and
> + * any bugs present are my fault.
> + * Jozsef
> + */
> +#include <linux/export.h>
> +#include <linux/jhash.h>
> +
> +/* __jhash_mix -- mix 3 32-bit values reversibly. */
> +#define __jhash_mix(a, b, c)			\
> +{						\
> +	a -= c;  a ^= rol32(c, 4);  c += b;	\
> +	b -= a;  b ^= rol32(a, 6);  a += c;	\
> +	c -= b;  c ^= rol32(b, 8);  b += a;	\
> +	a -= c;  a ^= rol32(c, 16); c += b;	\
> +	b -= a;  b ^= rol32(a, 19); a += c;	\
> +	c -= b;  c ^= rol32(b, 4);  b += a;	\
> +}
> +
> +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> +#define __jhash_final(a, b, c)			\
> +{						\
> +	c ^= b; c -= rol32(b, 14);		\
> +	a ^= c; a -= rol32(c, 11);		\
> +	b ^= a; b -= rol32(a, 25);		\
> +	c ^= b; c -= rol32(b, 16);		\
> +	a ^= c; a -= rol32(c, 4);		\
> +	b ^= a; b -= rol32(a, 14);		\
> +	c ^= b; c -= rol32(b, 24);		\
> +}
> +
> +/* jhash - hash an arbitrary key
> + * @k: sequence of bytes as key
> + * @length: the length of the key
> + * @initval: the previous hash, or an arbitray value
> + *
> + * The generic version, hashes an arbitrary sequence of bytes.
> + * No alignment or length assumptions are made about the input key.
> + *
> + * Returns the hash value of the key. The result depends on endianness.
> + */
> +u32 jhash(const void *key, u32 length, u32 initval)
> +{
> +	u32 a, b, c;
> +	const u8 *k = key;
> +
> +	/* Set up the internal state */
> +	a = b = c = JHASH_INITVAL + length + initval;
> +
> +	/* All but the last block: affect some 32 bits of (a,b,c) */
> +	while (length > 12) {
> +		a += __get_unaligned_cpu32(k);
> +		b += __get_unaligned_cpu32(k + 4);
> +		c += __get_unaligned_cpu32(k + 8);
> +		__jhash_mix(a, b, c);
> +		length -= 12;
> +		k += 12;
> +	}
> +	/* Last block: affect all 32 bits of (c) */
> +	/* All the case statements fall through */
> +	switch (length) {
> +	case 12: c += (u32)k[11]<<24;
> +	case 11: c += (u32)k[10]<<16;
> +	case 10: c += (u32)k[9]<<8;
> +	case 9:  c += k[8];
> +	case 8:  b += (u32)k[7]<<24;
> +	case 7:  b += (u32)k[6]<<16;
> +	case 6:  b += (u32)k[5]<<8;
> +	case 5:  b += k[4];
> +	case 4:  a += (u32)k[3]<<24;
> +	case 3:  a += (u32)k[2]<<16;
> +	case 2:  a += (u32)k[1]<<8;
> +	case 1:  a += k[0];
> +		 __jhash_final(a, b, c);
> +	case 0: /* Nothing left to add */
> +		break;
> +	}
> +
> +	return c;
> +}
> +EXPORT_SYMBOL_GPL(jhash);
> +

I think this should really be EXPORT_SYMBOL, not EXPORT_SYMBOL_GPL. The 
fact is the code this is based on is public domain so there isn't much 
point in requiring GPL as they could just go grab Bob's original code 
and have a hash function put together pretty quick.

> +/* jhash2 - hash an array of u32's
> + * @k: the key which must be an array of u32's
> + * @length: the number of u32's in the key
> + * @initval: the previous hash, or an arbitray value
> + *
> + * Returns the hash value of the key.
> + */
> +u32 jhash2(const u32 *k, u32 length, u32 initval)
> +{
> +	u32 a, b, c;
> +
> +	/* Set up the internal state */
> +	a = b = c = JHASH_INITVAL + (length<<2) + initval;
> +
> +	/* Handle most of the key */
> +	while (length > 3) {
> +		a += k[0];
> +		b += k[1];
> +		c += k[2];
> +		__jhash_mix(a, b, c);
> +		length -= 3;
> +		k += 3;
> +	}
> +
> +	/* Handle the last 3 u32's: all the case statements fall through */
> +	switch (length) {
> +	case 3: c += k[2];
> +	case 2: b += k[1];
> +	case 1: a += k[0];
> +		__jhash_final(a, b, c);
> +	case 0:	/* Nothing left to add */
> +		break;
> +	}
> +
> +	return c;
> +}
> +EXPORT_SYMBOL_GPL(jhash2);
> +

Same thing here.

> +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
> +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
> +{
> +	a += initval;
> +	b += initval;
> +	c += initval;
> +
> +	__jhash_final(a, b, c);
> +
> +	return c;
> +}
> +EXPORT_SYMBOL_GPL(__jhash_nwords);

This one too.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 12:40 [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords Denys Vlasenko
  2015-07-16 14:26 ` Alexander Duyck
@ 2015-07-16 15:43 ` Tom Herbert
  2015-07-16 18:17   ` David Miller
  1 sibling, 1 reply; 9+ messages in thread
From: Tom Herbert @ 2015-07-16 15:43 UTC (permalink / raw)
  To: Denys Vlasenko
  Cc: Thomas Graf, Alexander Duyck, Jozsef Kadlecsik, Herbert Xu,
	Linux Kernel Network Developers, LKML

On Thu, Jul 16, 2015 at 5:40 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> This patch deinlines jhash, jhash2 and __jhash_nwords.
>
> It also removes rhashtable_jhash2(key, length, seed)
> because it was merely calling jhash2(key, length, seed).
>
> With this .config: http://busybox.net/~vda/kernel_config,
> after deinlining these functions have sizes and callsite counts
> as follows:
>
> __jhash_nwords: 72 bytes, 75 calls
> jhash: 297 bytes, 111 calls
> jhash2: 205 bytes, 136 calls
>
jhash is used in several places in the critical data path. Does the
decrease in text size justify performance impact of not inlining it?

Tom

> Total size decrease is about 38,000 bytes:
>
>     text     data      bss       dec     hex filename
> 90663567 17221960 36659200 144544727 89d93d7 vmlinux5
> 90625577 17221864 36659200 144506641 89cff11 vmlinux.after
>
> Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
> CC: Thomas Graf <tgraf@suug.ch>
> CC: Alexander Duyck <alexander.h.duyck@redhat.com>
> CC: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
> CC: Herbert Xu <herbert@gondor.apana.org.au>
> CC: netdev@vger.kernel.org
> CC: linux-kernel@vger.kernel.org
> ---
> Changes in v2: created a new source file, jhash.c
>
>  include/linux/jhash.h | 123 +----------------------------------------
>  lib/Makefile          |   2 +-
>  lib/jhash.c           | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  lib/rhashtable.c      |  13 +++--
>  4 files changed, 160 insertions(+), 127 deletions(-)
>  create mode 100644 lib/jhash.c
>
> diff --git a/include/linux/jhash.h b/include/linux/jhash.h
> index 348c6f4..0b3f55d 100644
> --- a/include/linux/jhash.h
> +++ b/include/linux/jhash.h
> @@ -31,131 +31,14 @@
>  /* Mask the hash value, i.e (value & jhash_mask(n)) instead of (value % n) */
>  #define jhash_mask(n)   (jhash_size(n)-1)
>
> -/* __jhash_mix -- mix 3 32-bit values reversibly. */
> -#define __jhash_mix(a, b, c)                   \
> -{                                              \
> -       a -= c;  a ^= rol32(c, 4);  c += b;     \
> -       b -= a;  b ^= rol32(a, 6);  a += c;     \
> -       c -= b;  c ^= rol32(b, 8);  b += a;     \
> -       a -= c;  a ^= rol32(c, 16); c += b;     \
> -       b -= a;  b ^= rol32(a, 19); a += c;     \
> -       c -= b;  c ^= rol32(b, 4);  b += a;     \
> -}
> -
> -/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> -#define __jhash_final(a, b, c)                 \
> -{                                              \
> -       c ^= b; c -= rol32(b, 14);              \
> -       a ^= c; a -= rol32(c, 11);              \
> -       b ^= a; b -= rol32(a, 25);              \
> -       c ^= b; c -= rol32(b, 16);              \
> -       a ^= c; a -= rol32(c, 4);               \
> -       b ^= a; b -= rol32(a, 14);              \
> -       c ^= b; c -= rol32(b, 24);              \
> -}
> -
>  /* An arbitrary initial parameter */
>  #define JHASH_INITVAL          0xdeadbeef
>
> -/* jhash - hash an arbitrary key
> - * @k: sequence of bytes as key
> - * @length: the length of the key
> - * @initval: the previous hash, or an arbitray value
> - *
> - * The generic version, hashes an arbitrary sequence of bytes.
> - * No alignment or length assumptions are made about the input key.
> - *
> - * Returns the hash value of the key. The result depends on endianness.
> - */
> -static inline u32 jhash(const void *key, u32 length, u32 initval)
> -{
> -       u32 a, b, c;
> -       const u8 *k = key;
> -
> -       /* Set up the internal state */
> -       a = b = c = JHASH_INITVAL + length + initval;
> -
> -       /* All but the last block: affect some 32 bits of (a,b,c) */
> -       while (length > 12) {
> -               a += __get_unaligned_cpu32(k);
> -               b += __get_unaligned_cpu32(k + 4);
> -               c += __get_unaligned_cpu32(k + 8);
> -               __jhash_mix(a, b, c);
> -               length -= 12;
> -               k += 12;
> -       }
> -       /* Last block: affect all 32 bits of (c) */
> -       /* All the case statements fall through */
> -       switch (length) {
> -       case 12: c += (u32)k[11]<<24;
> -       case 11: c += (u32)k[10]<<16;
> -       case 10: c += (u32)k[9]<<8;
> -       case 9:  c += k[8];
> -       case 8:  b += (u32)k[7]<<24;
> -       case 7:  b += (u32)k[6]<<16;
> -       case 6:  b += (u32)k[5]<<8;
> -       case 5:  b += k[4];
> -       case 4:  a += (u32)k[3]<<24;
> -       case 3:  a += (u32)k[2]<<16;
> -       case 2:  a += (u32)k[1]<<8;
> -       case 1:  a += k[0];
> -                __jhash_final(a, b, c);
> -       case 0: /* Nothing left to add */
> -               break;
> -       }
> -
> -       return c;
> -}
> -
> -/* jhash2 - hash an array of u32's
> - * @k: the key which must be an array of u32's
> - * @length: the number of u32's in the key
> - * @initval: the previous hash, or an arbitray value
> - *
> - * Returns the hash value of the key.
> - */
> -static inline u32 jhash2(const u32 *k, u32 length, u32 initval)
> -{
> -       u32 a, b, c;
> -
> -       /* Set up the internal state */
> -       a = b = c = JHASH_INITVAL + (length<<2) + initval;
> -
> -       /* Handle most of the key */
> -       while (length > 3) {
> -               a += k[0];
> -               b += k[1];
> -               c += k[2];
> -               __jhash_mix(a, b, c);
> -               length -= 3;
> -               k += 3;
> -       }
> -
> -       /* Handle the last 3 u32's: all the case statements fall through */
> -       switch (length) {
> -       case 3: c += k[2];
> -       case 2: b += k[1];
> -       case 1: a += k[0];
> -               __jhash_final(a, b, c);
> -       case 0: /* Nothing left to add */
> -               break;
> -       }
> -
> -       return c;
> -}
> -
> +u32 jhash(const void *key, u32 length, u32 initval);
> +u32 jhash2(const u32 *k, u32 length, u32 initval);
>
>  /* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
> -static inline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
> -{
> -       a += initval;
> -       b += initval;
> -       c += initval;
> -
> -       __jhash_final(a, b, c);
> -
> -       return c;
> -}
> +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval);
>
>  static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval)
>  {
> diff --git a/lib/Makefile b/lib/Makefile
> index 6897b52..978be53 100644
> --- a/lib/Makefile
> +++ b/lib/Makefile
> @@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
>          bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \
>          gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \
>          bsearch.o find_bit.o llist.o memweight.o kfifo.o \
> -        percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
> +        percpu-refcount.o percpu_ida.o jhash.o rhashtable.o reciprocal_div.o
>  obj-y += string_helpers.o
>  obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
>  obj-y += hexdump.o
> diff --git a/lib/jhash.c b/lib/jhash.c
> new file mode 100644
> index 0000000..cf3c277
> --- /dev/null
> +++ b/lib/jhash.c
> @@ -0,0 +1,149 @@
> +/* Jenkins hash support.
> + *
> + * Copyright (C) 2006. Bob Jenkins (bob_jenkins@burtleburtle.net)
> + *
> + * http://burtleburtle.net/bob/hash/
> + *
> + * These are the credits from Bob's sources:
> + *
> + * lookup3.c, by Bob Jenkins, May 2006, Public Domain.
> + *
> + * These are functions for producing 32-bit hashes for hash table lookup.
> + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final()
> + * are externally useful functions.  Routines to test the hash are included
> + * if SELF_TEST is defined.  You can use this free for any purpose.  It's in
> + * the public domain.  It has no warranty.
> + *
> + * Copyright (C) 2009-2010 Jozsef Kadlecsik (kadlec@blackhole.kfki.hu)
> + *
> + * I've modified Bob's hash to be useful in the Linux kernel, and
> + * any bugs present are my fault.
> + * Jozsef
> + */
> +#include <linux/export.h>
> +#include <linux/jhash.h>
> +
> +/* __jhash_mix -- mix 3 32-bit values reversibly. */
> +#define __jhash_mix(a, b, c)                   \
> +{                                              \
> +       a -= c;  a ^= rol32(c, 4);  c += b;     \
> +       b -= a;  b ^= rol32(a, 6);  a += c;     \
> +       c -= b;  c ^= rol32(b, 8);  b += a;     \
> +       a -= c;  a ^= rol32(c, 16); c += b;     \
> +       b -= a;  b ^= rol32(a, 19); a += c;     \
> +       c -= b;  c ^= rol32(b, 4);  b += a;     \
> +}
> +
> +/* __jhash_final - final mixing of 3 32-bit values (a,b,c) into c */
> +#define __jhash_final(a, b, c)                 \
> +{                                              \
> +       c ^= b; c -= rol32(b, 14);              \
> +       a ^= c; a -= rol32(c, 11);              \
> +       b ^= a; b -= rol32(a, 25);              \
> +       c ^= b; c -= rol32(b, 16);              \
> +       a ^= c; a -= rol32(c, 4);               \
> +       b ^= a; b -= rol32(a, 14);              \
> +       c ^= b; c -= rol32(b, 24);              \
> +}
> +
> +/* jhash - hash an arbitrary key
> + * @k: sequence of bytes as key
> + * @length: the length of the key
> + * @initval: the previous hash, or an arbitray value
> + *
> + * The generic version, hashes an arbitrary sequence of bytes.
> + * No alignment or length assumptions are made about the input key.
> + *
> + * Returns the hash value of the key. The result depends on endianness.
> + */
> +u32 jhash(const void *key, u32 length, u32 initval)
> +{
> +       u32 a, b, c;
> +       const u8 *k = key;
> +
> +       /* Set up the internal state */
> +       a = b = c = JHASH_INITVAL + length + initval;
> +
> +       /* All but the last block: affect some 32 bits of (a,b,c) */
> +       while (length > 12) {
> +               a += __get_unaligned_cpu32(k);
> +               b += __get_unaligned_cpu32(k + 4);
> +               c += __get_unaligned_cpu32(k + 8);
> +               __jhash_mix(a, b, c);
> +               length -= 12;
> +               k += 12;
> +       }
> +       /* Last block: affect all 32 bits of (c) */
> +       /* All the case statements fall through */
> +       switch (length) {
> +       case 12: c += (u32)k[11]<<24;
> +       case 11: c += (u32)k[10]<<16;
> +       case 10: c += (u32)k[9]<<8;
> +       case 9:  c += k[8];
> +       case 8:  b += (u32)k[7]<<24;
> +       case 7:  b += (u32)k[6]<<16;
> +       case 6:  b += (u32)k[5]<<8;
> +       case 5:  b += k[4];
> +       case 4:  a += (u32)k[3]<<24;
> +       case 3:  a += (u32)k[2]<<16;
> +       case 2:  a += (u32)k[1]<<8;
> +       case 1:  a += k[0];
> +                __jhash_final(a, b, c);
> +       case 0: /* Nothing left to add */
> +               break;
> +       }
> +
> +       return c;
> +}
> +EXPORT_SYMBOL_GPL(jhash);
> +
> +/* jhash2 - hash an array of u32's
> + * @k: the key which must be an array of u32's
> + * @length: the number of u32's in the key
> + * @initval: the previous hash, or an arbitray value
> + *
> + * Returns the hash value of the key.
> + */
> +u32 jhash2(const u32 *k, u32 length, u32 initval)
> +{
> +       u32 a, b, c;
> +
> +       /* Set up the internal state */
> +       a = b = c = JHASH_INITVAL + (length<<2) + initval;
> +
> +       /* Handle most of the key */
> +       while (length > 3) {
> +               a += k[0];
> +               b += k[1];
> +               c += k[2];
> +               __jhash_mix(a, b, c);
> +               length -= 3;
> +               k += 3;
> +       }
> +
> +       /* Handle the last 3 u32's: all the case statements fall through */
> +       switch (length) {
> +       case 3: c += k[2];
> +       case 2: b += k[1];
> +       case 1: a += k[0];
> +               __jhash_final(a, b, c);
> +       case 0: /* Nothing left to add */
> +               break;
> +       }
> +
> +       return c;
> +}
> +EXPORT_SYMBOL_GPL(jhash2);
> +
> +/* __jhash_nwords - hash exactly 3, 2 or 1 word(s) */
> +u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
> +{
> +       a += initval;
> +       b += initval;
> +       c += initval;
> +
> +       __jhash_final(a, b, c);
> +
> +       return c;
> +}
> +EXPORT_SYMBOL_GPL(__jhash_nwords);
> diff --git a/lib/rhashtable.c b/lib/rhashtable.c
> index cc0c697..cf429c5 100644
> --- a/lib/rhashtable.c
> +++ b/lib/rhashtable.c
> @@ -663,11 +663,6 @@ static size_t rounded_hashtable_size(const struct rhashtable_params *params)
>                    (unsigned long)params->min_size);
>  }
>
> -static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed)
> -{
> -       return jhash2(key, length, seed);
> -}
> -
>  /**
>   * rhashtable_init - initialize a new hash table
>   * @ht:                hash table to be initialized
> @@ -773,8 +768,14 @@ int rhashtable_init(struct rhashtable *ht,
>                 ht->p.hashfn = jhash;
>
>                 if (!(ht->key_len & (sizeof(u32) - 1))) {
> +                       typedef u32 (*hashfunc)(const void *k, u32 length, u32 initval);
> +
>                         ht->key_len /= sizeof(u32);
> -                       ht->p.hashfn = rhashtable_jhash2;
> +                       /*
> +                        * jhash2() 1st param is const u32*,
> +                        * p.hashfn wants const void*. Hence the cast:
> +                        */
> +                       ht->p.hashfn = (hashfunc)jhash2;
>                 }
>         }
>
> --
> 1.8.1.4
>
> --
> To unsubscribe from this list: send the line "unsubscribe netdev" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 15:43 ` Tom Herbert
@ 2015-07-16 18:17   ` David Miller
  2015-07-16 19:23     ` Joe Perches
  2015-07-19 15:14     ` Denys Vlasenko
  0 siblings, 2 replies; 9+ messages in thread
From: David Miller @ 2015-07-16 18:17 UTC (permalink / raw)
  To: tom
  Cc: dvlasenk, tgraf, alexander.h.duyck, kadlec, herbert, netdev,
	linux-kernel

From: Tom Herbert <tom@herbertland.com>
Date: Thu, 16 Jul 2015 08:43:25 -0700

> On Thu, Jul 16, 2015 at 5:40 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>> This patch deinlines jhash, jhash2 and __jhash_nwords.
>>
>> It also removes rhashtable_jhash2(key, length, seed)
>> because it was merely calling jhash2(key, length, seed).
>>
>> With this .config: http://busybox.net/~vda/kernel_config,
>> after deinlining these functions have sizes and callsite counts
>> as follows:
>>
>> __jhash_nwords: 72 bytes, 75 calls
>> jhash: 297 bytes, 111 calls
>> jhash2: 205 bytes, 136 calls
>>
> jhash is used in several places in the critical data path. Does the
> decrease in text size justify performance impact of not inlining it?

Tom took the words right out of my mouth.

Denys, you keep making deinlining changes like this all the time, like
a robot.  But I never see you make any effort to look into the performance
nor code generation ramifications of your changes.

And frankly that makes your patches quite tiring to deal with.

Your changes potentially have large performance implications, yet you
don't put any effort into considering that aspect at all.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 18:17   ` David Miller
@ 2015-07-16 19:23     ` Joe Perches
  2015-07-17 13:44       ` Hagen Paul Pfeifer
  2015-07-19 15:14     ` Denys Vlasenko
  1 sibling, 1 reply; 9+ messages in thread
From: Joe Perches @ 2015-07-16 19:23 UTC (permalink / raw)
  To: David Miller
  Cc: tom, dvlasenk, tgraf, alexander.h.duyck, kadlec, herbert, netdev,
	linux-kernel

On Thu, 2015-07-16 at 11:17 -0700, David Miller wrote:
> From: Tom Herbert <tom@herbertland.com>
> Date: Thu, 16 Jul 2015 08:43:25 -0700
> 
> > On Thu, Jul 16, 2015 at 5:40 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
> >> This patch deinlines jhash, jhash2 and __jhash_nwords.
> >>
> >> It also removes rhashtable_jhash2(key, length, seed)
> >> because it was merely calling jhash2(key, length, seed).
> >>
> >> With this .config: http://busybox.net/~vda/kernel_config,
> >> after deinlining these functions have sizes and callsite counts
> >> as follows:
> >>
> >> __jhash_nwords: 72 bytes, 75 calls
> >> jhash: 297 bytes, 111 calls
> >> jhash2: 205 bytes, 136 calls
> >>
> > jhash is used in several places in the critical data path. Does the
> > decrease in text size justify performance impact of not inlining it?
> 
> Tom took the words right out of my mouth.
> 
> Denys, you keep making deinlining changes like this all the time, like
> a robot.  But I never see you make any effort to look into the performance
> nor code generation ramifications of your changes.
> 
> And frankly that makes your patches quite tiring to deal with.
> 
> Your changes potentially have large performance implications, yet you
> don't put any effort into considering that aspect at all.

It might be useful to have these performance impacting
changes guarded by something like CONFIG_CC_OPTIMIZE_FOR_SIZE
with another static __always_inline __<func> and a function &
EXPORT_SYMBOL or just a static inline so that where code size
is critical it's uninlined.

Though even for tiny embedded uses, the additional code
complexity might not be worth it.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 19:23     ` Joe Perches
@ 2015-07-17 13:44       ` Hagen Paul Pfeifer
  2015-07-17 22:53         ` Joe Perches
  0 siblings, 1 reply; 9+ messages in thread
From: Hagen Paul Pfeifer @ 2015-07-17 13:44 UTC (permalink / raw)
  To: Joe Perches, David Miller
  Cc: tom, herbert, tgraf, netdev, linux-kernel, dvlasenk,
	alexander.h.duyck, kadlec

> On July 16, 2015 at 9:23 PM Joe Perches <joe@perches.com> wrote:
> 
> It might be useful to have these performance impacting
> changes guarded by something like CONFIG_CC_OPTIMIZE_FOR_SIZE
> with another static __always_inline __<func> and a function &
> EXPORT_SYMBOL or just a static inline so that where code size
> is critical it's uninlined.

But keep in mind that jhash, jhash2 and __jhash_nwords are *not*
one-instruction long functions. We duplicate code over and over resulting
probably in more cache misses. __always_inline__ is probably too strict
and a vanilla inline is already for 99% of all distribution builds a
 __always_inline__, see ARCH_SUPPORTS_OPTIMIZED_INLINING and
CONFIG_CC_OPTIMIZE_FOR_SIZE.

The answer depends on the specific workload. Sometimes an enforced inline
perform better and sometimes a call is the better solution (read: less
cache misses). General purpose vendors with a larger working set size
should reduce cache misses by deinline many functions. For
high-performance special fast-path operations a strong inlined kernel
build is probably faster. __always_inline__ makes it impossible for the
user to deinline functions or not.

Hagen

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-17 13:44       ` Hagen Paul Pfeifer
@ 2015-07-17 22:53         ` Joe Perches
  0 siblings, 0 replies; 9+ messages in thread
From: Joe Perches @ 2015-07-17 22:53 UTC (permalink / raw)
  To: Hagen Paul Pfeifer
  Cc: David Miller, tom, herbert, tgraf, netdev, linux-kernel,
	dvlasenk, alexander.h.duyck, kadlec

On Fri, 2015-07-17 at 15:44 +0200, Hagen Paul Pfeifer wrote:
> > On July 16, 2015 at 9:23 PM Joe Perches <joe@perches.com> wrote:
> > 
> > It might be useful to have these performance impacting
> > changes guarded by something like CONFIG_CC_OPTIMIZE_FOR_SIZE
> > with another static __always_inline __<func> and a function &
> > EXPORT_SYMBOL or just a static inline so that where code size
> > is critical it's uninlined.
> 
> But keep in mind that jhash, jhash2 and __jhash_nwords are *not*
> one-instruction long functions. We duplicate code over and over resulting
> probably in more cache misses. __always_inline__ is probably too strict
> and a vanilla inline is already for 99% of all distribution builds a
>  __always_inline__, see ARCH_SUPPORTS_OPTIMIZED_INLINING and
> CONFIG_CC_OPTIMIZE_FOR_SIZE.

Hello.

Perhaps I wasn't clear/explicit enough.

I tried to suggest using a single __always_inline like this:

Two #ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE (or some other
CONFIG_FOO) functions, one in a .h for the existing behavior
and another in a .c for more code space compact uses.

jhash.h

static __always_inline u32 __jhash(const void *key, u32 length, u32 initval)
{
	[current jhash implementation...]
}

#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
u32 jhash(const void *key, u32 length, u32 initval);
#else
static inline u32 jhash(const void *key, u32 length, u32 initval)
{
	return __jhash(key, length, initval);	
}
#endif

jhash.c:

#ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
u32 jhash(const void *key, u32 length, u32 initval)
{
	return __jhash(key, length, initval);	
}
EXPORT_SYMBOL(jhash)
#endif

etc...

Perhaps the additional code complexity is not worth it.



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-16 18:17   ` David Miller
  2015-07-16 19:23     ` Joe Perches
@ 2015-07-19 15:14     ` Denys Vlasenko
  2015-07-19 18:40       ` David Miller
  1 sibling, 1 reply; 9+ messages in thread
From: Denys Vlasenko @ 2015-07-19 15:14 UTC (permalink / raw)
  To: David Miller, tom
  Cc: tgraf, alexander.h.duyck, kadlec, herbert, netdev, linux-kernel

On 07/16/2015 08:17 PM, David Miller wrote:
> From: Tom Herbert <tom@herbertland.com>
> Date: Thu, 16 Jul 2015 08:43:25 -0700
> 
>> On Thu, Jul 16, 2015 at 5:40 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>>> This patch deinlines jhash, jhash2 and __jhash_nwords.
>>>
>>> It also removes rhashtable_jhash2(key, length, seed)
>>> because it was merely calling jhash2(key, length, seed).
>>>
>>> With this .config: http://busybox.net/~vda/kernel_config,
>>> after deinlining these functions have sizes and callsite counts
>>> as follows:
>>>
>>> __jhash_nwords: 72 bytes, 75 calls
>>> jhash: 297 bytes, 111 calls
>>> jhash2: 205 bytes, 136 calls
>>>
>> jhash is used in several places in the critical data path. Does the
>> decrease in text size justify performance impact of not inlining it?
> 
> Tom took the words right out of my mouth.
> 
> Denys, you keep making deinlining changes like this all the time, like
> a robot.  But I never see you make any effort to look into the performance
> nor code generation ramifications of your changes.

The performance impact of the call/ret pair on modern x86
CPUs is only 5 cycles. To make a real difference in execution
speed, the function has to be less than 100 bytes of code.

jhash and jhash2, at more than 200 bytes of code, are definitely
far too large for inlining. Here is the smaller of two, jhash2:

<jhash2>:
       8d 84 b2 ef be ad de    lea    -0x21524111(%rdx,%rsi,4),%eax
       55                      push   %rbp
       89 c1                   mov    %eax,%ecx
       48 89 e5                mov    %rsp,%rbp
       89 c2                   mov    %eax,%edx
       eb 62                   jmp    ffffffff81a0d204 <jhash2+0x73>
       03 47 08                add    0x8(%rdi),%eax
       03 17                   add    (%rdi),%edx
       83 ee 03                sub    $0x3,%esi
       03 4f 04                add    0x4(%rdi),%ecx
       48 83 c7 0c             add    $0xc,%rdi
       41 89 c0                mov    %eax,%r8d
       29 c2                   sub    %eax,%edx
       41 c1 c8 1c             ror    $0x1c,%r8d
       01 c8                   add    %ecx,%eax
       44 31 c2                xor    %r8d,%edx
       41 89 d0                mov    %edx,%r8d
       29 d1                   sub    %edx,%ecx
       01 c2                   add    %eax,%edx
       41 c1 c8 1a             ror    $0x1a,%r8d
       41 31 c8                xor    %ecx,%r8d
       44 89 c1                mov    %r8d,%ecx
       44 29 c0                sub    %r8d,%eax
       41 01 d0                add    %edx,%r8d
       c1 c9 18                ror    $0x18,%ecx
       31 c1                   xor    %eax,%ecx
       89 c8                   mov    %ecx,%eax
       29 ca                   sub    %ecx,%edx
       46 8d 0c 01             lea    (%rcx,%r8,1),%r9d
       c1 c8 10                ror    $0x10,%eax
       31 d0                   xor    %edx,%eax
       89 c1                   mov    %eax,%ecx
       41 29 c0                sub    %eax,%r8d
       42 8d 14 08             lea    (%rax,%r9,1),%edx
       c1 c9 0d                ror    $0xd,%ecx
       44 31 c1                xor    %r8d,%ecx
       89 c8                   mov    %ecx,%eax
       41 29 c9                sub    %ecx,%r9d
       01 d1                   add    %edx,%ecx
       c1 c8 1c                ror    $0x1c,%eax
       44 31 c8                xor    %r9d,%eax
       83 fe 03                cmp    $0x3,%esi
       77 99                   ja     ffffffff81a0d1a2 <jhash2+0x11>
       83 fe 02                cmp    $0x2,%esi
       74 0e                   je     ffffffff81a0d21c <jhash2+0x8b>
       83 fe 03                cmp    $0x3,%esi
       74 06                   je     ffffffff81a0d219 <jhash2+0x88>
       ff ce                   dec    %esi
       75 45                   jne    ffffffff81a0d25c <jhash2+0xcb>
       eb 06                   jmp    ffffffff81a0d21f <jhash2+0x8e>
       03 47 08                add    0x8(%rdi),%eax
       03 4f 04                add    0x4(%rdi),%ecx
       89 ce                   mov    %ecx,%esi
       03 17                   add    (%rdi),%edx
       31 c8                   xor    %ecx,%eax
       c1 ce 12                ror    $0x12,%esi
       29 f0                   sub    %esi,%eax
       89 c6                   mov    %eax,%esi
       31 c2                   xor    %eax,%edx
       c1 ce 15                ror    $0x15,%esi
       29 f2                   sub    %esi,%edx
       89 d6                   mov    %edx,%esi
       31 d1                   xor    %edx,%ecx
       c1 ce 07                ror    $0x7,%esi
       29 f1                   sub    %esi,%ecx
       89 ce                   mov    %ecx,%esi
       31 c8                   xor    %ecx,%eax
       c1 ce 10                ror    $0x10,%esi
       29 f0                   sub    %esi,%eax
       89 c6                   mov    %eax,%esi
       31 c2                   xor    %eax,%edx
       c1 ce 1c                ror    $0x1c,%esi
       29 f2                   sub    %esi,%edx
       31 d1                   xor    %edx,%ecx
       c1 ca 12                ror    $0x12,%edx
       29 d1                   sub    %edx,%ecx
       31 c8                   xor    %ecx,%eax
       c1 c9 08                ror    $0x8,%ecx
       29 c8                   sub    %ecx,%eax
       5d                      pop    %rbp
       c3                      retq

Yes, I do think that this much code should not be inlined.

__jhash_nwords is smaller and it is used for hashing 2 or 3 32-bit words,
it may be reasonable to inline it.
I'll send a new patch which keeps it inlined.

> Your changes potentially have large performance implications, yet you
> don't put any effort into considering that aspect at all.

I don't know why you think I'm some sort of zealot hell-bent
on deinlining everything. I'm not. I do listen to other people.

For example:
One of my patches was deninlining net_generic(), you proposed
to drop some BUG_ONs in it instead.
I wasn't insisting to have it "my way".
I sent you a patch which removed BUG_ON but left net_generic()
inlined, and you took the patch.


^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords
  2015-07-19 15:14     ` Denys Vlasenko
@ 2015-07-19 18:40       ` David Miller
  0 siblings, 0 replies; 9+ messages in thread
From: David Miller @ 2015-07-19 18:40 UTC (permalink / raw)
  To: dvlasenk
  Cc: tom, tgraf, alexander.h.duyck, kadlec, herbert, netdev, linux-kernel

From: Denys Vlasenko <dvlasenk@redhat.com>
Date: Sun, 19 Jul 2015 17:14:53 +0200

> On 07/16/2015 08:17 PM, David Miller wrote:
>> From: Tom Herbert <tom@herbertland.com>
>> Date: Thu, 16 Jul 2015 08:43:25 -0700
>> 
>>> On Thu, Jul 16, 2015 at 5:40 AM, Denys Vlasenko <dvlasenk@redhat.com> wrote:
>>>> This patch deinlines jhash, jhash2 and __jhash_nwords.
>>>>
>>>> It also removes rhashtable_jhash2(key, length, seed)
>>>> because it was merely calling jhash2(key, length, seed).
>>>>
>>>> With this .config: http://busybox.net/~vda/kernel_config,
>>>> after deinlining these functions have sizes and callsite counts
>>>> as follows:
>>>>
>>>> __jhash_nwords: 72 bytes, 75 calls
>>>> jhash: 297 bytes, 111 calls
>>>> jhash2: 205 bytes, 136 calls
>>>>
>>> jhash is used in several places in the critical data path. Does the
>>> decrease in text size justify performance impact of not inlining it?
>> 
>> Tom took the words right out of my mouth.
>> 
>> Denys, you keep making deinlining changes like this all the time, like
>> a robot.  But I never see you make any effort to look into the performance
>> nor code generation ramifications of your changes.
> 
> The performance impact of the call/ret pair on modern x86
> CPUs is only 5 cycles. To make a real difference in execution
> speed, the function has to be less than 100 bytes of code.

What performance metrics have you collected to assert that deinlining
doesn't matter?  What networking tests have you done which stress the
socket demux path?

You still aren't addressing any of my concerns.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2015-07-19 18:40 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-16 12:40 [PATCH v2] jhash: Deinline jhash, jhash2 and __jhash_nwords Denys Vlasenko
2015-07-16 14:26 ` Alexander Duyck
2015-07-16 15:43 ` Tom Herbert
2015-07-16 18:17   ` David Miller
2015-07-16 19:23     ` Joe Perches
2015-07-17 13:44       ` Hagen Paul Pfeifer
2015-07-17 22:53         ` Joe Perches
2015-07-19 15:14     ` Denys Vlasenko
2015-07-19 18:40       ` David Miller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.