linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: George Spelvin <linux@sciencehorizons.net>
To: Linus Torvalds <torvalds@linux-foundation.org>,
	lkml <linux-kernel@vger.kernel.org>
Cc: "J . Bruce Fields" <bfields@redhat.com>,
	George Spelvin <linux@sciencehorizons.net>
Subject: [PATCH v3 02/10] fs/namei.c: Add hashlen_string() function
Date: Sat, 28 May 2016 15:57:15 -0400	[thread overview]
Message-ID: <1464465443-25305-3-git-send-email-linux@sciencehorizons.net> (raw)
In-Reply-To: <1464465443-25305-1-git-send-email-linux@sciencehorizons.net>

We'd like to make more use of the highly-optimized dcache hash functions
throughout the kernel, rather than have every subsystem create its own,
and a function that hashes basic null-terminated strings is required
for that.

(The name is to emphasize that it returns both hash and length.)

It's actually useful in the dcache itself, specifically d_alloc_name().
Other uses in the next patch.

full_name_hash() is also tweaked to make it more generally useful:
1) Take a "char *" rather than "unsigned char *" argument, to
   be consistent with hash_name().
2) Handle zero-length inputs.  If we want more callers, we don't want
   to make them worry about corner cases.

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
---
 fs/dcache.c                |  3 +--
 fs/namei.c                 | 51 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/stringhash.h |  8 ++++++--
 3 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/fs/dcache.c b/fs/dcache.c
index d5ecc6e4..19b75180 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1653,8 +1653,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 	struct qstr q;
 
 	q.name = name;
-	q.len = strlen(name);
-	q.hash = full_name_hash(q.name, q.len);
+	q.hash_len = hashlen_string(name);
 	return d_alloc(parent, &q);
 }
 EXPORT_SYMBOL(d_alloc_name);
diff --git a/fs/namei.c b/fs/namei.c
index 42f8ca03..dd98d43a 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1822,19 +1822,20 @@ static inline unsigned long mix_hash(unsigned long hash)
 
 #endif
 
-unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+/* Return the hash of a string of known length */
+unsigned int full_name_hash(const char *name, unsigned int len)
 {
 	unsigned long a, hash = 0;
 
 	for (;;) {
+		if (!len)
+			goto done;
 		a = load_unaligned_zeropad(name);
 		if (len < sizeof(unsigned long))
 			break;
 		hash = mix_hash(hash + a);
 		name += sizeof(unsigned long);
 		len -= sizeof(unsigned long);
-		if (!len)
-			goto done;
 	}
 	hash += a & bytemask_from_count(len);
 done:
@@ -1842,6 +1843,29 @@ done:
 }
 EXPORT_SYMBOL(full_name_hash);
 
+/* Return the "hash_len" (hash and length) of a null-terminated string */
+u64 hashlen_string(const char *name)
+{
+	unsigned long a, adata, mask, hash, len;
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+
+	hash = a = 0;
+	len = -sizeof(unsigned long);
+	do {
+		hash = mix_hash(hash + a);
+		len += sizeof(unsigned long);
+		a = load_unaligned_zeropad(name+len);
+	} while (!has_zero(a, &adata, &constants));
+
+	adata = prep_zero_mask(a, adata, &constants);
+	mask = create_zero_mask(adata);
+	hash += a & zero_bytemask(mask);
+	len += find_zero(mask);
+
+	return hashlen_create(fold_hash(hash), len);
+}
+EXPORT_SYMBOL(hashlen_string);
+
 /*
  * Calculate the length and hash of the path component, and
  * return the "hash_len" as the result.
@@ -1872,15 +1896,32 @@ static inline u64 hash_name(const char *name)
 
 #else
 
-unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+/* Return the hash of a string of known length */
+unsigned int full_name_hash(const char *name, unsigned int len)
 {
 	unsigned long hash = init_name_hash();
 	while (len--)
-		hash = partial_name_hash(*name++, hash);
+		hash = partial_name_hash((unsigned char)*name++, hash);
 	return end_name_hash(hash);
 }
 EXPORT_SYMBOL(full_name_hash);
 
+/* Return the "hash_len" (hash and length) of a null-terminated string */
+u64 hash_string(const char *name)
+{
+	unsigned long hash = init_name_hash();
+	unsigned long len = 0, c;
+
+	c = (unsigned char)*name;
+	do {
+		len++;
+		hash = partial_name_hash(c, hash);
+		c = (unsigned char)name[len];
+	} while (c);
+	return hashlen_create(end_name_hash(hash), len);
+}
+EXPORT_SYMBOL(hash_string);
+
 /*
  * We know there's a real path component here of at least
  * one character.
diff --git a/include/linux/stringhash.h b/include/linux/stringhash.h
index 2eaaaf6d..451771d9 100644
--- a/include/linux/stringhash.h
+++ b/include/linux/stringhash.h
@@ -1,7 +1,8 @@
 #ifndef __LINUX_STRINGHASH_H
 #define __LINUX_STRINGHASH_H
 
-#include <linux/types.h>
+#include <linux/compiler.h>	/* For __pure */
+#include <linux/types.h>	/* For u32, u64 */
 
 /*
  * Routines for hashing strings of bytes to a 32-bit hash value.
@@ -59,7 +60,7 @@ static inline unsigned long end_name_hash(unsigned long hash)
  *
  * If not set, this falls back to a wrapper around the preceding.
  */
-extern unsigned int full_name_hash(const unsigned char *, unsigned int);
+extern unsigned int __pure full_name_hash(const char *, unsigned int);
 
 /*
  * A hash_len is a u64 with the hash of a string in the low
@@ -69,4 +70,7 @@ extern unsigned int full_name_hash(const unsigned char *, unsigned int);
 #define hashlen_len(hashlen)  ((u32)((hashlen) >> 32))
 #define hashlen_create(hash, len) ((u64)(len)<<32 | (u32)(hash))
 
+/* Return the "hash_len" (hash and length) of a null-terminated string */
+extern u64 __pure hashlen_string(const char *name);
+
 #endif	/* __LINUX_STRINGHASH_H */
-- 
2.8.1

  parent reply	other threads:[~2016-05-28 20:00 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CA+55aFxPSW+84KfQ1N_WmND-wtvgj2zQm8nFPkRcc+gyU=uing@mail.gmail.com>
2016-05-25  7:20 ` [PATCH 00/10] String hash improvements George Spelvin
2016-05-25  8:00   ` Geert Uytterhoeven
2016-05-25  8:11     ` George Spelvin
2016-05-25  8:50       ` Geert Uytterhoeven
2016-05-25  9:07         ` George Spelvin
2016-05-25 16:08   ` Linus Torvalds
2016-05-28 19:57     ` [PATCH v3 " George Spelvin
2016-05-28 19:57       ` [PATCH v3 01/10] Pull out string hash to <linux/stringhash.h> George Spelvin
2016-05-28 19:57       ` George Spelvin [this message]
2016-05-28 19:57       ` [PATCH v3 03/10] <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hashlen_string() George Spelvin
2016-05-28 19:57       ` [PATCH v3 04/10] Change hash_64() return value to 32 bits George Spelvin
2016-05-28 19:57       ` [PATCH v3 05/10] Eliminate bad hash multipliers from hash_32() and hash_64() George Spelvin
2016-05-28 19:57       ` [PATCH v3 06/10] fs/namei.c: Improve dcache hash function George Spelvin
2016-05-30 15:11         ` Peter Zijlstra
2016-05-30 16:06           ` George Spelvin
2016-05-30 16:27             ` Peter Zijlstra
2016-05-30 18:10               ` George Spelvin
2016-06-02  1:18                 ` Linus Torvalds
2016-06-02  2:31                   ` George Spelvin
2016-06-02 16:35                     ` Linus Torvalds
2016-06-02 18:23                       ` George Spelvin
2016-05-28 19:57       ` [PATCH v3 07/10] <linux/hash.h>: Add support for architecture-specific functions George Spelvin
2016-05-29  7:57         ` Geert Uytterhoeven
2016-05-28 19:57       ` [PATCH v3 08/10] m68k: Add <asm/hash.h> George Spelvin
2016-05-28 19:57       ` [PATCH v3 09/10] microblaze: " George Spelvin
2016-05-28 19:57       ` [PATCH v3 10/10] h8300: " George Spelvin
2016-05-28 20:47       ` [PATCH v3 00/10] String hash improvements Linus Torvalds
2016-05-28 20:54         ` George Spelvin
2016-06-02 22:59     ` [PATCH " Fubo Chen
2016-05-26 17:09   ` [PATCH v2 " George Spelvin
2016-05-25  7:21 ` [PATCH 01/10] Pull out string hash to <linux/stringhash.h> George Spelvin
2016-05-25  7:22 ` [PATCH 02/10] fs/namei.c: Add hash_string() function George Spelvin
2016-05-25  7:26 ` [PATCH 03/10] <linux/sunrpc/svcauth.h>: Define hash_str() in terms of hash_string() George Spelvin
2016-05-25  7:28 ` [PATCH 04/10] Change hash_64() return value to 32 bits George Spelvin
2016-05-25  7:29 ` [PATCH 05/10] Eliminate bad hash multipliers from hash_32() and hash_64() George Spelvin
2016-05-25  7:31 ` [PATCH 06/10] fs/namei.c: Improve dcache hash function George Spelvin
2016-05-25  7:33 ` [PATCH 07/10] <linux/hash.h>: Add support for architecture-specific functions George Spelvin
2016-05-26 17:16   ` [PATCH v2 " George Spelvin
2016-05-25  7:34 ` [PATCH 08/10] m68k: Add <asm/archhash.h> George Spelvin
2016-05-25  7:34 ` George Spelvin
2016-05-25  8:07   ` Geert Uytterhoeven
2016-05-25  8:19     ` George Spelvin
2016-05-25  8:24     ` [PATCH 08v2/10] " George Spelvin
2016-05-25  8:48       ` Geert Uytterhoeven
2016-05-25  8:56   ` [PATCH 08/10] " Philippe De Muyter
2016-05-25  9:14     ` George Spelvin
2016-05-25  9:31       ` Andreas Schwab
2016-05-25  9:51       ` Philippe De Muyter
2016-05-25 13:24   ` Philippe De Muyter
2016-05-25 13:42     ` George Spelvin
2016-05-26 17:19   ` [PATCH v2 08/10] m68k: Add <asm/hash.h> George Spelvin
2016-05-25  7:37 ` [PATCH 09/10] microblaze: Add <asm/archhash.h> George Spelvin
2016-05-26 17:21   ` [PATCH v2 09/10] microblaze: Add <asm/hash.h> George Spelvin
2016-05-25  7:38 ` [PATCH 10/10] h8300: Add <asm/archhash.h> George Spelvin
2016-05-26 17:23   ` [PATCH v2 10/10] h8300: Add <asm/hash.h> George Spelvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1464465443-25305-3-git-send-email-linux@sciencehorizons.net \
    --to=linux@sciencehorizons.net \
    --cc=bfields@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).