linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "George Spelvin" <linux@sciencehorizons.net>
To: linux-kernel@vger.kernel.org, torvalds@linux-foundation.org
Cc: linux@sciencehorizons.net, tglx@linutronix.de
Subject: [PATCH 01/10] Pull out string hash to <linux/stringhash.h>
Date: 25 May 2016 03:21:28 -0400	[thread overview]
Message-ID: <20160525072128.5153.qmail@ns.sciencehorizons.net> (raw)
In-Reply-To: <CA+55aFxPSW+84KfQ1N_WmND-wtvgj2zQm8nFPkRcc+gyU=uing@mail.gmail.com>

... so they can be used without the rest of <linux/dcache.h>

The hashlen_* macros will make sense next patch.

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
---
 include/linux/dcache.h     | 27 +----------------
 include/linux/stringhash.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 73 insertions(+), 26 deletions(-)
 create mode 100644 include/linux/stringhash.h

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 7e9422cb..0f9a977c 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -10,6 +10,7 @@
 #include <linux/cache.h>
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
+#include <linux/stringhash.h>
 
 struct path;
 struct vfsmount;
@@ -52,9 +53,6 @@ struct qstr {
 };
 
 #define QSTR_INIT(n,l) { { { .len = l } }, .name = n }
-#define hashlen_hash(hashlen) ((u32) (hashlen))
-#define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
-#define hashlen_create(hash,len) (((u64)(len)<<32)|(u32)(hash))
 
 struct dentry_stat_t {
 	long nr_dentry;
@@ -65,29 +63,6 @@ struct dentry_stat_t {
 };
 extern struct dentry_stat_t dentry_stat;
 
-/* Name hashing routines. Initial hash value */
-/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
-#define init_name_hash()		0
-
-/* partial hash update function. Assume roughly 4 bits per character */
-static inline unsigned long
-partial_name_hash(unsigned long c, unsigned long prevhash)
-{
-	return (prevhash + (c << 4) + (c >> 4)) * 11;
-}
-
-/*
- * Finally: cut down the number of bits to a int value (and try to avoid
- * losing bits)
- */
-static inline unsigned long end_name_hash(unsigned long hash)
-{
-	return (unsigned int) hash;
-}
-
-/* Compute the hash for a name string. */
-extern unsigned int full_name_hash(const unsigned char *, unsigned int);
-
 /*
  * Try to keep struct dentry aligned on 64 byte cachelines (this will
  * give reasonable cacheline footprint with larger lines without the
diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h
new file mode 100644
index 00000000..144d8c0f
--- /dev/null
+++ b/include/linux/stringhash.h
@@ -0,0 +1,72 @@
+#ifndef __LINUX_STRINGHASH_H
+#define __LINUX_STRINGHASH_H
+
+#include <linux/types.h>
+
+/*
+ * Routines for hashing strings of bytes to a 32-bit hash value.
+ *
+ * These hash functions are NOT GUARANTEED STABLE between kernel
+ * versions, architectures, or even repeated boots of the same kernel.
+ * (E.g. they may depend on boot-time hardware detection or be
+ * deliberately randomized.)
+ *
+ * They are also not intended to be secure against collisions caused by
+ * malicious inputs; much slower hash functions are required for that.
+ *
+ * They are optimized for pathname components, meaning short strings.
+ * Even if a majority of files have longer names, the dynamic profile of
+ * pathname components skews short due to short directory names.
+ * (E.g. /usr/lib/libsesquipedalianism.so.3.141.)
+ */
+
+/*
+ * Version 1: one byte at a time.  Example of use:
+ *
+ * unsigned long hash = init_name_hash;
+ * while (*p)
+ *	hash = partial_name_hash(tolower(*p++), hash);
+ * hash = end_name_hash(hash);
+ *
+ * Although this is designed for bytes, fs/hfsplus/unicode.c
+ * abuses it to hash 16-bit values.
+ */
+
+/* Hash courtesy of the R5 hash in reiserfs modulo sign bits */
+#define init_name_hash()		0
+
+/* partial hash update function. Assume roughly 4 bits per character */
+static inline unsigned long
+partial_name_hash(unsigned long c, unsigned long prevhash)
+{
+	return (prevhash + (c << 4) + (c >> 4)) * 11;
+}
+
+/*
+ * Finally: cut down the number of bits to a int value (and try to avoid
+ * losing bits)
+ */
+static inline unsigned long end_name_hash(unsigned long hash)
+{
+	return (unsigned int)hash;
+}
+
+/*
+ * Version 2: One word (32 or 64 bits) at a time.
+ * If CONFIG_DCACHE_WORD_ACCESS is defined (meaning <asm/word-at-a-time.h>
+ * exists, which describes major Linux platforms like x86 and ARM), then
+ * this computes a different hash function much faster.
+ *
+ * If not set, this falls back to a wrapper around the preceding.
+ */
+extern unsigned int full_name_hash(const unsigned char *, unsigned int);
+
+/*
+ * A hash_len is a u64 with the hash of a string in the low
+ * half and the length in the high half.
+ */
+#define hashlen_hash(hashlen) ((u32)(hashlen))
+#define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
+#define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
+
+#endif	/* __LINUX_STRINGHASH_H */
-- 
2.8.1

  parent reply	other threads:[~2016-05-25  7:21 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CA+55aFxPSW+84KfQ1N_WmND-wtvgj2zQm8nFPkRcc+gyU=uing@mail.gmail.com>
2016-05-25  7:20 ` [PATCH 00/10] String hash improvements George Spelvin
2016-05-25  8:00   ` Geert Uytterhoeven
2016-05-25  8:11     ` George Spelvin
2016-05-25  8:50       ` Geert Uytterhoeven
2016-05-25  9:07         ` George Spelvin
2016-05-25 16:08   ` Linus Torvalds
2016-05-28 19:57     ` [PATCH v3 " George Spelvin
2016-05-28 19:57       ` [PATCH v3 01/10] Pull out string hash to <linux/stringhash.h> George Spelvin
2016-05-28 19:57       ` [PATCH v3 02/10] fs/namei.c: Add hashlen_string() function George Spelvin
2016-05-28 19:57       ` [PATCH v3 03/10] <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string() George Spelvin
2016-05-28 19:57       ` [PATCH v3 04/10] Change hash_64() return value to 32 bits George Spelvin
2016-05-28 19:57       ` [PATCH v3 05/10] Eliminate bad hash multipliers from hash_32() and hash_64() George Spelvin
2016-05-28 19:57       ` [PATCH v3 06/10] fs/namei.c: Improve dcache hash function George Spelvin
2016-05-30 15:11         ` Peter Zijlstra
2016-05-30 16:06           ` George Spelvin
2016-05-30 16:27             ` Peter Zijlstra
2016-05-30 18:10               ` George Spelvin
2016-06-02  1:18                 ` Linus Torvalds
2016-06-02  2:31                   ` George Spelvin
2016-06-02 16:35                     ` Linus Torvalds
2016-06-02 18:23                       ` George Spelvin
2016-05-28 19:57       ` [PATCH v3 07/10] <linux/hash.h>: Add support for architecture-specific functions George Spelvin
2016-05-29  7:57         ` Geert Uytterhoeven
2016-05-28 19:57       ` [PATCH v3 08/10] m68k: Add <asm/hash.h> George Spelvin
2016-05-28 19:57       ` [PATCH v3 09/10] microblaze: " George Spelvin
2016-05-28 19:57       ` [PATCH v3 10/10] h8300: " George Spelvin
2016-05-28 20:47       ` [PATCH v3 00/10] String hash improvements Linus Torvalds
2016-05-28 20:54         ` George Spelvin
2016-06-02 22:59     ` [PATCH " Fubo Chen
2016-05-26 17:09   ` [PATCH v2 " George Spelvin
2016-05-25  7:21 ` George Spelvin [this message]
2016-05-25  7:22 ` [PATCH 02/10] fs/namei.c: Add hash_string() function George Spelvin
2016-05-25  7:26 ` [PATCH 03/10] <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hash_string() George Spelvin
2016-05-25  7:28 ` [PATCH 04/10] Change hash_64() return value to 32 bits George Spelvin
2016-05-25  7:29 ` [PATCH 05/10] Eliminate bad hash multipliers from hash_32() and hash_64() George Spelvin
2016-05-25  7:31 ` [PATCH 06/10] fs/namei.c: Improve dcache hash function George Spelvin
2016-05-25  7:33 ` [PATCH 07/10] <linux/hash.h>: Add support for architecture-specific functions George Spelvin
2016-05-26 17:16   ` [PATCH v2 " George Spelvin
2016-05-25  7:34 ` [PATCH 08/10] m68k: Add <asm/archhash.h> George Spelvin
2016-05-25  7:34 ` George Spelvin
2016-05-25  8:07   ` Geert Uytterhoeven
2016-05-25  8:19     ` George Spelvin
2016-05-25  8:24     ` [PATCH 08v2/10] " George Spelvin
2016-05-25  8:48       ` Geert Uytterhoeven
2016-05-25  8:56   ` [PATCH 08/10] " Philippe De Muyter
2016-05-25  9:14     ` George Spelvin
2016-05-25  9:31       ` Andreas Schwab
2016-05-25  9:51       ` Philippe De Muyter
2016-05-25 13:24   ` Philippe De Muyter
2016-05-25 13:42     ` George Spelvin
2016-05-26 17:19   ` [PATCH v2 08/10] m68k: Add <asm/hash.h> George Spelvin
2016-05-25  7:37 ` [PATCH 09/10] microblaze: Add <asm/archhash.h> George Spelvin
2016-05-26 17:21   ` [PATCH v2 09/10] microblaze: Add <asm/hash.h> George Spelvin
2016-05-25  7:38 ` [PATCH 10/10] h8300: Add <asm/archhash.h> George Spelvin
2016-05-26 17:23   ` [PATCH v2 10/10] h8300: Add <asm/hash.h> George Spelvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160525072128.5153.qmail@ns.sciencehorizons.net \
    --to=linux@sciencehorizons.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).