[PATCH 05/10] Eliminate bad hash multipliers from hash_32() and hash_64()

From: "George Spelvin" <linux@sciencehorizons.net>
To: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org
Cc: linux@sciencehorizons.net, tglx@linutronix.de
Subject: [PATCH 05/10] Eliminate bad hash multipliers from hash_32() and hash_64()
Date: 25 May 2016 03:29:33 -0400	[thread overview]
Message-ID: <20160525072933.5483.qmail@ns.sciencehorizons.net> (raw)
In-Reply-To: <CA+55aFxPSW+84KfQ1N_WmND-wtvgj2zQm8nFPkRcc+gyU=uing@mail.gmail.com>

To avoid inefficiency, hash_64() on 32-bit systems is changed
to use a different algorithm.  It makes two calls to hash_32()
instead.

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
---
 include/linux/hash.h | 100 ++++++++++++++++++++++-----------------------------
 1 file changed, 43 insertions(+), 57 deletions(-)

diff --git a/include/linux/hash.h b/include/linux/hash.h
index b9201c33..8926f369 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -3,91 +3,76 @@
 /* Fast hashing routine for ints,  longs and pointers.
    (C) 2002 Nadia Yvette Chambers, IBM */
 
-/*
- * Knuth recommends primes in approximately golden ratio to the maximum
- * integer representable by a machine word for multiplicative hashing.
- * Chuck Lever verified the effectiveness of this technique:
- * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
- *
- * These primes are chosen to be bit-sparse, that is operations on
- * them can use shifts and additions instead of multiplications for
- * machines where multiplications are slow.
- */
-
 #include <asm/types.h>
 #include <linux/compiler.h>
 
-/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
-#define GOLDEN_RATIO_PRIME_32 0x9e370001UL
-/*  2^63 + 2^61 - 2^57 + 2^54 - 2^51 - 2^18 + 1 */
-#define GOLDEN_RATIO_PRIME_64 0x9e37fffffffc0001UL
-
+/*
+ * The "GOLDEN_RATIO_PRIME" is used in ifs/btrfs/brtfs_inode.h and
+ * fs/inode.c.  It's not actually prime any more (the previous primes
+ * were actively bad for hashing), but the name remains.
+ */
 #if BITS_PER_LONG == 32
-#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_32
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_32
 #define hash_long(val, bits) hash_32(val, bits)
 #elif BITS_PER_LONG == 64
 #define hash_long(val, bits) hash_64(val, bits)
-#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_PRIME_64
+#define GOLDEN_RATIO_PRIME GOLDEN_RATIO_64
 #else
 #error Wordsize not 32 or 64
 #endif
 
 /*
- * The above primes are actively bad for hashing, since they are
- * too sparse. The 32-bit one is mostly ok, the 64-bit one causes
- * real problems. Besides, the "prime" part is pointless for the
- * multiplicative hash.
+ * This hash multiplies the input by a large odd number and takes the
+ * high bits.  Since multiplication propagates changes to the most
+ * significant end only, it is essential that the high bits of the
+ * product be used for the hash value.
+ *
+ * Chuck Lever verified the effectiveness of this technique:
+ * http://www.citi.umich.edu/techreports/reports/citi-tr-00-1.pdf
  *
  * Although a random odd number will do, it turns out that the golden
  * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice
- * properties.
+ * properties.  (See Knuth vol 3, section 6.4, exercise 9.)
  *
- * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2.
- * (See Knuth vol 3, section 6.4, exercise 9.)
+ * These are the negative, (1 - phi) = phi**2 = (3 - sqrt(5))/2,
+ * which is very slightly easier to multiply by and makes no
+ * difference to the hash distribution.
  */
 #define GOLDEN_RATIO_32 0x61C88647
 #define GOLDEN_RATIO_64 0x61C8864680B583EBull
 
+static inline u32 __hash_32(u32 val)
+{
+	return val * GOLDEN_RATIO_32;
+}
+
+static inline u32 hash_32(u32 val, unsigned int bits)
+{
+	/* High bits are more random, so use them. */
+	return __hash_32(val) >> (32 - bits);
+}
+
 static __always_inline u32 hash_64(u64 val, unsigned int bits)
 {
-	u64 hash = val;
-
-#if BITS_PER_LONG == 64
-	hash = hash * GOLDEN_RATIO_64;
-#else
-	/*  Sigh, gcc can't optimise this alone like it does for 32 bits. */
-	u64 n = hash;
-	n <<= 18;
-	hash -= n;
-	n <<= 33;
-	hash -= n;
-	n <<= 3;
-	hash += n;
-	n <<= 3;
-	hash -= n;
-	n <<= 4;
-	hash += n;
-	n <<= 2;
-	hash += n;
-#endif
-
 	if (__builtin_constant_p(bits > 32 || bits == 0)) {
 		BUILD_BUG_ON(bits > 32 || bits == 0);
 	} else {
 		WARN_ON(bits > 32 || bits == 0);
 	}
 
-	/* High bits are more random, so use them. */
-	return (unsigned)(hash >> (64 - bits));
-}
-
-static inline u32 hash_32(u32 val, unsigned int bits)
-{
-	/* On some cpus multiply is faster, on others gcc will do shifts */
-	u32 hash = val * GOLDEN_RATIO_PRIME_32;
-
-	/* High bits are more random, so use them. */
-	return hash >> (32 - bits);
+#if BITS_PER_LONG == 64
+	/* 64x64-bit multiply is efficient on all 64-bit processors */
+	return val * GOLDEN_RATIO_64 >> (64 - bits);
+#else
+	/*
+	 * Hash 64 bits using only 32x32-bit multiply.	GOLDEN_RATIO is
+	 * phi**2 = 1-phi = 0.38196601.  The square of that is phi**4 =
+	 * 0.14589803 = 1/6.85, which is starting to have the low bits of
+	 * (val >> 32) not affect the high bits of the hash.  By subtracting,
+	 * we end up with phi**3 = 0.23606798, which is a bit better.
+	 */
+	return hash_32((u32)val - __hash_32(val >> 32), bits);
+#endif
 }
 
 static inline u32 hash_ptr(const void *ptr, unsigned int bits)
@@ -95,6 +80,7 @@ static inline u32 hash_ptr(const void *ptr, unsigned int bits)
 	return hash_long((unsigned long)ptr, bits);
 }
 
+/* This really should be called fold32_ptr; it does no hashing to speak of. */
 static inline u32 hash32_ptr(const void *ptr)
 {
 	unsigned long val = (unsigned long)ptr;
-- 
2.8.1