All of lore.kernel.org
 help / color / mirror / Atom feed
From: Karsten Blees <karsten.blees@gmail.com>
To: Git List <git@vger.kernel.org>, Junio C Hamano <gitster@pobox.com>
Cc: Thomas Rast <tr@thomasrast.ch>,
	Jens Lehmann <Jens.Lehmann@web.de>,
	Karsten Blees <karsten.blees@gmail.com>
Subject: [PATCH v4 10/14] name-hash.c: remove cache entries instead of marking them CE_UNHASHED
Date: Thu, 07 Nov 2013 15:39:46 +0100	[thread overview]
Message-ID: <527BA632.4070106@gmail.com> (raw)
In-Reply-To: <527BA483.6040803@gmail.com>

The new hashmap implementation supports remove, so really remove unused
cache entries from the name hashmap instead of just marking them.

The CE_UNHASHED flag and CE_STATE_MASK are no longer needed.

Keep the CE_HASHED flag to prevent adding entries twice.

Signed-off-by: Karsten Blees <blees@dcon.de>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h        |  6 ++----
 name-hash.c    | 46 ++++++++++++++++++++++------------------------
 read-cache.c   |  2 +-
 unpack-trees.c |  2 +-
 4 files changed, 26 insertions(+), 30 deletions(-)

diff --git a/cache.h b/cache.h
index cedc7ae..05b8011 100644
--- a/cache.h
+++ b/cache.h
@@ -160,7 +160,6 @@ struct cache_entry {
 #define CE_ADDED             (1 << 19)
 
 #define CE_HASHED            (1 << 20)
-#define CE_UNHASHED          (1 << 21)
 #define CE_WT_REMOVE         (1 << 22) /* remove in work directory */
 #define CE_CONFLICTED        (1 << 23)
 
@@ -196,11 +195,10 @@ struct pathspec;
  * Copy the sha1 and stat state of a cache entry from one to
  * another. But we never change the name, or the hash state!
  */
-#define CE_STATE_MASK (CE_HASHED | CE_UNHASHED)
 static inline void copy_cache_entry(struct cache_entry *dst,
 				    const struct cache_entry *src)
 {
-	unsigned int state = dst->ce_flags & CE_STATE_MASK;
+	unsigned int state = dst->ce_flags & CE_HASHED;
 
 	/* Don't copy hash chain and name */
 	memcpy(&dst->ce_stat_data, &src->ce_stat_data,
@@ -208,7 +206,7 @@ static inline void copy_cache_entry(struct cache_entry *dst,
 			offsetof(struct cache_entry, ce_stat_data));
 
 	/* Restore the hash state */
-	dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state;
+	dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state;
 }
 
 static inline unsigned create_ce_flags(unsigned stage)
diff --git a/name-hash.c b/name-hash.c
index 1ec82a3..8871d8e 100644
--- a/name-hash.c
+++ b/name-hash.c
@@ -106,17 +106,29 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce)
 	hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce)));
 	hashmap_add(&istate->name_hash, ce);
 
-	if (ignore_case && !(ce->ce_flags & CE_UNHASHED))
+	if (ignore_case)
 		add_dir_entry(istate, ce);
 }
 
+static int cache_entry_cmp(const struct cache_entry *ce1,
+		const struct cache_entry *ce2, const void *remove)
+{
+	/*
+	 * For remove_name_hash, find the exact entry (pointer equality); for
+	 * index_name_exists, find all entries with matching hash code and
+	 * decide whether the entry matches in same_name.
+	 */
+	return remove ? !(ce1 == ce2) : 0;
+}
+
 static void lazy_init_name_hash(struct index_state *istate)
 {
 	int nr;
 
 	if (istate->name_hash_initialized)
 		return;
-	hashmap_init(&istate->name_hash, NULL, istate->cache_nr);
+	hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp,
+			istate->cache_nr);
 	hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0);
 	for (nr = 0; nr < istate->cache_nr; nr++)
 		hash_index_entry(istate, istate->cache[nr]);
@@ -125,31 +137,19 @@ static void lazy_init_name_hash(struct index_state *istate)
 
 void add_name_hash(struct index_state *istate, struct cache_entry *ce)
 {
-	/* if already hashed, add reference to directory entries */
-	if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK)
-		add_dir_entry(istate, ce);
-
-	ce->ce_flags &= ~CE_UNHASHED;
 	if (istate->name_hash_initialized)
 		hash_index_entry(istate, ce);
 }
 
-/*
- * We don't actually *remove* it, we can just mark it invalid so that
- * we won't find it in lookups.
- *
- * Not only would we have to search the lists (simple enough), but
- * we'd also have to rehash other hash buckets in case this makes the
- * hash bucket empty (common). So it's much better to just mark
- * it.
- */
 void remove_name_hash(struct index_state *istate, struct cache_entry *ce)
 {
-	/* if already hashed, release reference to directory entries */
-	if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED)
-		remove_dir_entry(istate, ce);
+	if (!istate->name_hash_initialized || !(ce->ce_flags & CE_HASHED))
+		return;
+	ce->ce_flags &= ~CE_HASHED;
+	hashmap_remove(&istate->name_hash, ce, ce);
 
-	ce->ce_flags |= CE_UNHASHED;
+	if (ignore_case)
+		remove_dir_entry(istate, ce);
 }
 
 static int slow_same_name(const char *name1, int len1, const char *name2, int len2)
@@ -220,10 +220,8 @@ struct cache_entry *index_file_exists(struct index_state *istate, const char *na
 	hashmap_entry_init(&key, memihash(name, namelen));
 	ce = hashmap_get(&istate->name_hash, &key, NULL);
 	while (ce) {
-		if (!(ce->ce_flags & CE_UNHASHED)) {
-			if (same_name(ce, name, namelen, icase))
-				return ce;
-		}
+		if (same_name(ce, name, namelen, icase))
+			return ce;
 		ce = hashmap_get_next(&istate->name_hash, ce);
 	}
 	return NULL;
diff --git a/read-cache.c b/read-cache.c
index 33dd676..00af9ad 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -58,7 +58,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 
 	new = xmalloc(cache_entry_size(namelen));
 	copy_cache_entry(new, old);
-	new->ce_flags &= ~CE_STATE_MASK;
+	new->ce_flags &= ~CE_HASHED;
 	new->ce_namelen = namelen;
 	memcpy(new->name, new_name, namelen + 1);
 
diff --git a/unpack-trees.c b/unpack-trees.c
index f8985d4..82b652f 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -105,7 +105,7 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts,
 static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce,
 			 unsigned int set, unsigned int clear)
 {
-	clear |= CE_HASHED | CE_UNHASHED;
+	clear |= CE_HASHED;
 
 	if (set & CE_REMOVE)
 		set |= CE_WT_REMOVE;
-- 
1.8.4.msysgit.0.12.g88f5ed0

  parent reply	other threads:[~2013-11-07 14:39 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-07 14:32 [PATCH v4 00/14] New hash table implementation Karsten Blees
2013-11-07 14:33 ` [PATCH v4 01/14] submodule: don't access the .gitmodules cache entry after removing it Karsten Blees
2013-11-07 22:27   ` Heiko Voigt
2013-11-07 14:34 ` [PATCH v4 02/14] add a hashtable implementation that supports O(1) removal Karsten Blees
2013-11-07 21:40   ` Junio C Hamano
2013-11-08 10:27     ` Karsten Blees
2013-11-08 16:45       ` Philip Oakley
2013-11-08 17:08       ` Junio C Hamano
2013-11-13 16:37         ` Karsten Blees
2013-11-07 14:35 ` [PATCH v4 03/14] buitin/describe.c: use new hash map implementation Karsten Blees
2013-11-07 14:36 ` [PATCH v4 04/14] diffcore-rename.c: move code around to prepare for the next patch Karsten Blees
2013-11-07 14:36 ` [PATCH v4 05/14] diffcore-rename.c: simplify finding exact renames Karsten Blees
2013-11-07 14:37 ` [PATCH v4 06/14] diffcore-rename.c: use new hash map implementation Karsten Blees
2013-11-07 14:38 ` [PATCH v4 07/14] name-hash.c: use new hash map implementation for directories Karsten Blees
2013-11-07 14:38 ` [PATCH v4 08/14] name-hash.c: remove unreferenced directory entries Karsten Blees
2013-11-07 14:39 ` [PATCH v4 09/14] name-hash.c: use new hash map implementation for cache entries Karsten Blees
2013-11-07 14:39 ` Karsten Blees [this message]
2013-11-07 14:40 ` [PATCH v4 11/14] remove old hash.[ch] implementation Karsten Blees
2013-11-07 14:43 ` [PATCH v4 12/14] fix 'git update-index --verbose --again' output Karsten Blees
2013-11-07 22:12   ` Junio C Hamano
2013-11-08 10:27     ` Karsten Blees
2013-11-07 14:44 ` [PATCH v4 13/14] builtin/update-index.c: cleanup update_one Karsten Blees
2013-11-07 21:40   ` Junio C Hamano
2013-11-08 10:27     ` Karsten Blees
2013-11-07 14:45 ` [PATCH v4 14/14] read-cache.c: fix memory leaks caused by removed cache entries Karsten Blees
2013-11-07 21:40   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=527BA632.4070106@gmail.com \
    --to=karsten.blees@gmail.com \
    --cc=Jens.Lehmann@web.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=tr@thomasrast.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.