git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Brandon Williams <bmwill@google.com>
To: git@vger.kernel.org
Cc: Brandon Williams <bmwill@google.com>,
	sbeller@google.com, gitster@pobox.com, pclouds@gmail.com
Subject: [PATCH v2 21/27] attr: use hashmap for attribute dictionary
Date: Mon, 23 Jan 2017 12:35:19 -0800	[thread overview]
Message-ID: <20170123203525.185058-22-bmwill@google.com> (raw)
In-Reply-To: <20170123203525.185058-1-bmwill@google.com>

The current implementation of the attribute dictionary uses a custom
hashtable.  This modernizes the dictionary by converting it to the builtin
'hashmap' structure.

Also, in order to enable a threaded API in the future add an
accompanying mutex which must be acquired prior to accessing the
dictionary of interned attributes.

Signed-off-by: Brandon Williams <bmwill@google.com>
---
 attr.c        | 173 +++++++++++++++++++++++++++++++++++++++++++---------------
 attr.h        |   2 +
 common-main.c |   3 +
 3 files changed, 133 insertions(+), 45 deletions(-)

diff --git a/attr.c b/attr.c
index 5399e1cb3..d2ece4eba 100644
--- a/attr.c
+++ b/attr.c
@@ -14,6 +14,7 @@
 #include "dir.h"
 #include "utf8.h"
 #include "quote.h"
+#include "thread-utils.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -23,28 +24,17 @@ static const char git_attr__unknown[] = "(builtin)unknown";
 #define ATTR__UNSET NULL
 #define ATTR__UNKNOWN git_attr__unknown
 
-/* This is a randomly chosen prime. */
-#define HASHSIZE 257
-
 #ifndef DEBUG_ATTR
 #define DEBUG_ATTR 0
 #endif
 
-/*
- * NEEDSWORK: the global dictionary of the interned attributes
- * must stay a singleton even after we become thread-ready.
- * Access to these must be surrounded with mutex when it happens.
- */
 struct git_attr {
-	struct git_attr *next;
-	unsigned h;
-	int attr_nr;
+	int attr_nr; /* unique attribute number */
 	int maybe_macro;
 	int maybe_real;
-	char name[FLEX_ARRAY];
+	char name[FLEX_ARRAY]; /* attribute name */
 };
 static int attr_nr;
-static struct git_attr *(git_attr_hash[HASHSIZE]);
 
 /*
  * NEEDSWORK: maybe-real, maybe-macro are not property of
@@ -63,15 +53,94 @@ const char *git_attr_name(const struct git_attr *attr)
 	return attr->name;
 }
 
-static unsigned hash_name(const char *name, int namelen)
+struct attr_hashmap {
+	struct hashmap map;
+#ifndef NO_PTHREADS
+	pthread_mutex_t mutex;
+#endif
+};
+
+static inline void hashmap_lock(struct attr_hashmap *map)
+{
+#ifndef NO_PTHREADS
+	pthread_mutex_lock(&map->mutex);
+#endif
+}
+
+static inline void hashmap_unlock(struct attr_hashmap *map)
 {
-	unsigned val = 0, c;
+#ifndef NO_PTHREADS
+	pthread_mutex_unlock(&map->mutex);
+#endif
+}
 
-	while (namelen--) {
-		c = *name++;
-		val = ((val << 7) | (val >> 22)) ^ c;
-	}
-	return val;
+/*
+ * The global dictionary of all interned attributes.  This
+ * is a singleton object which is shared between threads.
+ * Access to this dictionary must be surrounded with a mutex.
+ */
+static struct attr_hashmap g_attr_hashmap;
+
+/* The container for objects stored in "struct attr_hashmap" */
+struct attr_hash_entry {
+	struct hashmap_entry ent; /* must be the first member! */
+	const char *key; /* the key; memory should be owned by value */
+	size_t keylen; /* length of the key */
+	void *value; /* the stored value */
+};
+
+/* attr_hashmap comparison function */
+static int attr_hash_entry_cmp(const struct attr_hash_entry *a,
+			       const struct attr_hash_entry *b,
+			       void *unused)
+{
+	return (a->keylen != b->keylen) || strncmp(a->key, b->key, a->keylen);
+}
+
+/* Initialize an 'attr_hashmap' object */
+static void attr_hashmap_init(struct attr_hashmap *map)
+{
+	hashmap_init(&map->map, (hashmap_cmp_fn) attr_hash_entry_cmp, 0);
+}
+
+/*
+ * Retrieve the 'value' stored in a hashmap given the provided 'key'.
+ * If there is no matching entry, return NULL.
+ */
+static void *attr_hashmap_get(struct attr_hashmap *map,
+			      const char *key, size_t keylen)
+{
+	struct attr_hash_entry k;
+	struct attr_hash_entry *e;
+
+	if (!map->map.tablesize)
+		attr_hashmap_init(map);
+
+	hashmap_entry_init(&k, memhash(key, keylen));
+	k.key = key;
+	k.keylen = keylen;
+	e = hashmap_get(&map->map, &k, NULL);
+
+	return e ? e->value : NULL;
+}
+
+/* Add 'value' to a hashmap based on the provided 'key'. */
+static void attr_hashmap_add(struct attr_hashmap *map,
+			     const char *key, size_t keylen,
+			     void *value)
+{
+	struct attr_hash_entry *e;
+
+	if (!map->map.tablesize)
+		attr_hashmap_init(map);
+
+	e = xmalloc(sizeof(struct attr_hash_entry));
+	hashmap_entry_init(e, memhash(key, keylen));
+	e->key = key;
+	e->keylen = keylen;
+	e->value = value;
+
+	hashmap_add(&map->map, e);
 }
 
 static int attr_name_valid(const char *name, size_t namelen)
@@ -103,37 +172,44 @@ static void report_invalid_attr(const char *name, size_t len,
 	strbuf_release(&err);
 }
 
-static struct git_attr *git_attr_internal(const char *name, int len)
+/*
+ * Given a 'name', lookup and return the corresponding attribute in the global
+ * dictionary.  If no entry is found, create a new attribute and store it in
+ * the dictionary.
+ */
+static struct git_attr *git_attr_internal(const char *name, int namelen)
 {
-	unsigned hval = hash_name(name, len);
-	unsigned pos = hval % HASHSIZE;
 	struct git_attr *a;
 
-	for (a = git_attr_hash[pos]; a; a = a->next) {
-		if (a->h == hval &&
-		    !memcmp(a->name, name, len) && !a->name[len])
-			return a;
-	}
-
-	if (!attr_name_valid(name, len))
+	if (!attr_name_valid(name, namelen))
 		return NULL;
 
-	FLEX_ALLOC_MEM(a, name, name, len);
-	a->h = hval;
-	a->next = git_attr_hash[pos];
-	a->attr_nr = attr_nr++;
-	a->maybe_macro = 0;
-	a->maybe_real = 0;
-	git_attr_hash[pos] = a;
+	hashmap_lock(&g_attr_hashmap);
+
+	a = attr_hashmap_get(&g_attr_hashmap, name, namelen);
+
+	if (!a) {
+		FLEX_ALLOC_MEM(a, name, name, namelen);
+		a->attr_nr = g_attr_hashmap.map.size;
+		a->maybe_real = 0;
+		a->maybe_macro = 0;
+
+		attr_hashmap_add(&g_attr_hashmap, a->name, namelen, a);
+		assert(a->attr_nr == (g_attr_hashmap.map.size - 1));
+
+		/*
+		 * NEEDSWORK: per git_attr_check check_all_attr
+		 * will be initialized a lot more lazily, not
+		 * like this, and not here.
+		 */
+		REALLOC_ARRAY(check_all_attr, ++attr_nr);
+		check_all_attr[a->attr_nr].attr = a;
+		check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
+		assert(a->attr_nr == (attr_nr - 1));
+	}
+
+	hashmap_unlock(&g_attr_hashmap);
 
-	/*
-	 * NEEDSWORK: per git_attr_check check_all_attr
-	 * will be initialized a lot more lazily, not
-	 * like this, and not here.
-	 */
-	REALLOC_ARRAY(check_all_attr, attr_nr);
-	check_all_attr[a->attr_nr].attr = a;
-	check_all_attr[a->attr_nr].value = ATTR__UNKNOWN;
 	return a;
 }
 
@@ -941,3 +1017,10 @@ void git_attr_set_direction(enum git_attr_direction new, struct index_state *ist
 		drop_attr_stack();
 	use_index = istate;
 }
+
+void attr_start(void)
+{
+#ifndef NO_PTHREADS
+	pthread_mutex_init(&g_attr_hashmap.mutex, NULL);
+#endif
+}
diff --git a/attr.h b/attr.h
index 3db9893ef..8505bca79 100644
--- a/attr.h
+++ b/attr.h
@@ -67,4 +67,6 @@ enum git_attr_direction {
 };
 void git_attr_set_direction(enum git_attr_direction, struct index_state *);
 
+extern void attr_start(void);
+
 #endif /* ATTR_H */
diff --git a/common-main.c b/common-main.c
index c654f9555..6a689007e 100644
--- a/common-main.c
+++ b/common-main.c
@@ -1,5 +1,6 @@
 #include "cache.h"
 #include "exec_cmd.h"
+#include "attr.h"
 
 /*
  * Many parts of Git have subprograms communicate via pipe, expect the
@@ -33,6 +34,8 @@ int main(int argc, const char **argv)
 
 	git_setup_gettext();
 
+	attr_start();
+
 	git_extract_argv0_path(argv[0]);
 
 	restore_sigpipe_to_default();
-- 
2.11.0.483.g087da7b7c-goog


  parent reply	other threads:[~2017-01-23 20:36 UTC|newest]

Thread overview: 111+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-12 23:53 [PATCH 00/27] Revamp the attribute system; another round Brandon Williams
2017-01-12 23:53 ` [PATCH 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-12 23:53 ` [PATCH 02/27] attr.c: " Brandon Williams
2017-01-12 23:53 ` [PATCH 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-12 23:53 ` [PATCH 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-12 23:53 ` [PATCH 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-12 23:53 ` [PATCH 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-12 23:53 ` [PATCH 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-12 23:53 ` [PATCH 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-12 23:53 ` [PATCH 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-12 23:53 ` [PATCH 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-12 23:53 ` [PATCH 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-12 23:53 ` [PATCH 12/27] Documentation: fix a typo Brandon Williams
2017-01-12 23:53 ` [PATCH 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-12 23:53 ` [PATCH 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-12 23:53 ` [PATCH 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-12 23:53 ` [PATCH 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-12 23:53 ` [PATCH 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-12 23:53 ` [PATCH 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-12 23:53 ` [PATCH 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-12 23:53 ` [PATCH 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-12 23:53 ` [PATCH 21/27] attr: use hashmap for attribute dictionary Brandon Williams
2017-01-18 20:20   ` Stefan Beller
2017-01-18 20:23     ` Brandon Williams
2017-01-12 23:53 ` [PATCH 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-12 23:53 ` [PATCH 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-12 23:53 ` [PATCH 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-12 23:53 ` [PATCH 25/27] attr: store attribute stacks in hashmap Brandon Williams
2017-01-13 21:20   ` Junio C Hamano
2017-01-18 20:34     ` Brandon Williams
2017-01-23 18:08       ` Brandon Williams
2017-01-18 20:39   ` Stefan Beller
2017-01-18 20:45     ` Stefan Beller
2017-01-18 20:50     ` Brandon Williams
2017-01-12 23:53 ` [PATCH 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-12 23:53 ` [PATCH 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-01-15 23:47 ` [PATCH 00/27] Revamp the attribute system; another round Junio C Hamano
2017-01-16  8:10   ` Jeff King
2017-01-23 20:34 ` [PATCH v2 " Brandon Williams
2017-01-23 20:34   ` [PATCH v2 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-23 20:35   ` [PATCH v2 02/27] attr.c: " Brandon Williams
2017-01-23 20:35   ` [PATCH v2 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-23 20:35   ` [PATCH v2 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-23 20:35   ` [PATCH v2 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-23 20:35   ` [PATCH v2 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-23 20:35   ` [PATCH v2 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-23 20:35   ` [PATCH v2 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-23 20:35   ` [PATCH v2 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-23 20:35   ` [PATCH v2 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-23 20:35   ` [PATCH v2 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-23 20:35   ` [PATCH v2 12/27] Documentation: fix a typo Brandon Williams
2017-01-23 20:35   ` [PATCH v2 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-23 20:35   ` [PATCH v2 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-23 20:35   ` [PATCH v2 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-23 20:35   ` [PATCH v2 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-23 20:35   ` [PATCH v2 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-23 20:35   ` [PATCH v2 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-23 20:35   ` [PATCH v2 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-23 20:35   ` [PATCH v2 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-23 20:35   ` Brandon Williams [this message]
2017-01-23 20:35   ` [PATCH v2 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-23 21:11     ` Junio C Hamano
2017-01-23 20:35   ` [PATCH v2 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-23 20:35   ` [PATCH v2 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-23 20:35   ` [PATCH v2 25/27] attr: store attribute stack in attr_check structure Brandon Williams
2017-01-23 21:42     ` Junio C Hamano
2017-01-23 22:06       ` Brandon Williams
2017-01-24  1:11         ` Brandon Williams
2017-01-24  2:28           ` Junio C Hamano
2017-01-25 19:57             ` Brandon Williams
2017-01-25 20:10               ` Stefan Beller
2017-01-25 20:14               ` Junio C Hamano
2017-01-25 21:54                 ` Brandon Williams
2017-01-25 23:19                   ` Brandon Williams
2017-01-23 20:35   ` [PATCH v2 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-23 20:35   ` [PATCH v2 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-01-28  2:01   ` [PATCH v3 00/27] Revamp the attribute system; another round Brandon Williams
2017-01-28  2:01     ` [PATCH v3 01/27] commit.c: use strchrnul() to scan for one line Brandon Williams
2017-01-28  2:01     ` [PATCH v3 02/27] attr.c: " Brandon Williams
2017-01-28  2:01     ` [PATCH v3 03/27] attr.c: update a stale comment on "struct match_attr" Brandon Williams
2017-01-28  2:01     ` [PATCH v3 04/27] attr.c: explain the lack of attr-name syntax check in parse_attr() Brandon Williams
2017-01-28  2:01     ` [PATCH v3 05/27] attr.c: complete a sentence in a comment Brandon Williams
2017-01-28  2:01     ` [PATCH v3 06/27] attr.c: mark where #if DEBUG ends more clearly Brandon Williams
2017-01-28  2:01     ` [PATCH v3 07/27] attr.c: simplify macroexpand_one() Brandon Williams
2017-01-28  2:01     ` [PATCH v3 08/27] attr.c: tighten constness around "git_attr" structure Brandon Williams
2017-01-28  2:01     ` [PATCH v3 09/27] attr.c: plug small leak in parse_attr_line() Brandon Williams
2017-01-28  2:01     ` [PATCH v3 10/27] attr: support quoting pathname patterns in C style Brandon Williams
2017-01-28  2:01     ` [PATCH v3 11/27] attr.c: add push_stack() helper Brandon Williams
2017-01-28  2:01     ` [PATCH v3 12/27] Documentation: fix a typo Brandon Williams
2017-01-28  2:01     ` [PATCH v3 13/27] attr.c: outline the future plans by heavily commenting Brandon Williams
2017-01-28  2:01     ` [PATCH v3 14/27] attr: rename function and struct related to checking attributes Brandon Williams
2017-01-28  2:01     ` [PATCH v3 15/27] attr: (re)introduce git_check_attr() and struct attr_check Brandon Williams
2017-01-30 18:05       ` Brandon Williams
2017-01-28  2:01     ` [PATCH v3 16/27] attr: convert git_all_attrs() to use "struct attr_check" Brandon Williams
2017-01-28 23:50       ` Stefan Beller
2017-01-29  2:44         ` Brandon Williams
2017-01-30 18:06       ` Brandon Williams
2017-01-28  2:01     ` [PATCH v3 17/27] attr: convert git_check_attrs() callers to use the new API Brandon Williams
2017-01-28  2:01     ` [PATCH v3 18/27] attr: retire git_check_attrs() API Brandon Williams
2017-01-28  2:01     ` [PATCH v3 19/27] attr: pass struct attr_check to collect_some_attrs Brandon Williams
2017-01-28  2:02     ` [PATCH v3 20/27] attr: change validity check for attribute names to use positive logic Brandon Williams
2017-01-28  2:02     ` [PATCH v3 21/27] attr: use hashmap for attribute dictionary Brandon Williams
2017-01-28  2:02     ` [PATCH v3 22/27] attr: eliminate global check_all_attr array Brandon Williams
2017-01-28  2:02     ` [PATCH v3 23/27] attr: remove maybe-real, maybe-macro from git_attr Brandon Williams
2017-01-28  2:02     ` [PATCH v3 24/27] attr: tighten const correctness with git_attr and match_attr Brandon Williams
2017-01-28  2:02     ` [PATCH v3 25/27] attr: store attribute stack in attr_check structure Brandon Williams
2017-01-28  2:02     ` [PATCH v3 26/27] attr: push the bare repo check into read_attr() Brandon Williams
2017-01-28  2:02     ` [PATCH v3 27/27] attr: reformat git_attr_set_direction() function Brandon Williams
2017-02-02 19:14     ` [PATCH v3 00/27] Revamp the attribute system; another round Junio C Hamano
2017-02-09 17:18       ` Brandon Williams
2017-02-09 19:31         ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170123203525.185058-22-bmwill@google.com \
    --to=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=pclouds@gmail.com \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).