All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/2] do_compare_entry: use already-computed path
@ 2015-12-21 22:34 David Turner
  2015-12-21 22:34 ` [PATCH v2 1/2] traverse_info: make mostly const David Turner
                   ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: David Turner @ 2015-12-21 22:34 UTC (permalink / raw)
  To: git; +Cc: David Turner

This version strdups the path to avoid it getting mutated.  Problem
noted by Junio.


David Turner (2):
  traverse_info: make mostly const
  do_compare_entry: use already-computed path

 builtin/merge-tree.c     |  2 +-
 cache-tree.c             |  4 ++--
 cache-tree.h             |  2 +-
 t/t4010-diff-pathspec.sh |  2 +-
 tree-walk.c              |  9 +++++++-
 tree-walk.h              |  5 +++--
 unpack-trees.c           | 55 ++++++++++++++++++++++++++++++++++++++++--------
 7 files changed, 62 insertions(+), 17 deletions(-)

-- 
2.4.2.749.g730654d-twtrsrc

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH v2 1/2] traverse_info: make mostly const
  2015-12-21 22:34 [PATCH v2 0/2] do_compare_entry: use already-computed path David Turner
@ 2015-12-21 22:34 ` David Turner
  2015-12-21 23:15   ` David Turner
  2015-12-21 22:34 ` [PATCH v2 2/2] do_compare_entry: use already-computed path David Turner
  2015-12-21 23:27 ` [PATCH v2 0/2] " Junio C Hamano
  2 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-12-21 22:34 UTC (permalink / raw)
  To: git; +Cc: David Turner

We don't usually modify traverse_info, so make it const across a wide
range of functions.

Signed-off-by: David Turner <dturner@twopensource.com>
---
 builtin/merge-tree.c     |  2 +-
 cache-tree.c             |  4 ++--
 cache-tree.h             |  2 +-
 t/t4010-diff-pathspec.sh |  2 +-
 tree-walk.c              |  2 +-
 tree-walk.h              |  4 ++--
 unpack-trees.c           | 14 +++++++-------
 7 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/builtin/merge-tree.c b/builtin/merge-tree.c
index d4f0cbd..6de2da9 100644
--- a/builtin/merge-tree.c
+++ b/builtin/merge-tree.c
@@ -304,7 +304,7 @@ static void unresolved(const struct traverse_info *info, struct name_entry n[3])
  * The successful merge rules are the same as for the three-way merge
  * in git-read-tree.
  */
-static int threeway_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *info)
+static int threeway_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, const struct traverse_info *info)
 {
 	/* Same in both? */
 	if (same_entry(entry+1, entry+2) || both_empty(entry+1, entry+2)) {
diff --git a/cache-tree.c b/cache-tree.c
index a59e6f1..0fd2ab5 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -694,7 +694,7 @@ void prime_cache_tree(struct index_state *istate, struct tree *tree)
  *     above us, and find ourselves in there.
  */
 static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root,
-							 struct traverse_info *info)
+							 const struct traverse_info *info)
 {
 	struct cache_tree *our_parent;
 
@@ -706,7 +706,7 @@ static struct cache_tree *find_cache_tree_from_traversal(struct cache_tree *root
 
 int cache_tree_matches_traversal(struct cache_tree *root,
 				 struct name_entry *ent,
-				 struct traverse_info *info)
+				 const struct traverse_info *info)
 {
 	struct cache_tree *it;
 
diff --git a/cache-tree.h b/cache-tree.h
index 41c5746..7f99cc8 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -50,6 +50,6 @@ int write_index_as_tree(unsigned char *sha1, struct index_state *index_state, co
 int write_cache_as_tree(unsigned char *sha1, int flags, const char *prefix);
 void prime_cache_tree(struct index_state *, struct tree *);
 
-extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, struct traverse_info *info);
+extern int cache_tree_matches_traversal(struct cache_tree *, struct name_entry *ent, const struct traverse_info *info);
 
 #endif
diff --git a/t/t4010-diff-pathspec.sh b/t/t4010-diff-pathspec.sh
index 43c488b..ad91130 100755
--- a/t/t4010-diff-pathspec.sh
+++ b/t/t4010-diff-pathspec.sh
@@ -52,7 +52,7 @@ cat >expected <<\EOF
 EOF
 test_expect_success \
     '"*file1" should show path1/file1' \
-    'git diff-index --cached $tree -- "*file1" >current &&
+    'echo "$tree" && git diff-index --cached $tree -- "*file1" >current &&
      compare_diff_raw current expected'
 
 cat >expected <<\EOF
diff --git a/tree-walk.c b/tree-walk.c
index 6dccd2d..5eee262 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -301,7 +301,7 @@ static void free_extended_entry(struct tree_desc_x *t)
 }
 
 static inline int prune_traversal(struct name_entry *e,
-				  struct traverse_info *info,
+				  const struct traverse_info *info,
 				  struct strbuf *base,
 				  int still_interesting)
 {
diff --git a/tree-walk.h b/tree-walk.h
index 3b2f7bf..f0a457b 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -37,7 +37,7 @@ int tree_entry(struct tree_desc *, struct name_entry *);
 void *fill_tree_descriptor(struct tree_desc *desc, const unsigned char *sha1);
 
 struct traverse_info;
-typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, struct traverse_info *);
+typedef int (*traverse_callback_t)(int n, unsigned long mask, unsigned long dirmask, struct name_entry *entry, const struct traverse_info *);
 int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info);
 
 enum follow_symlinks_result {
@@ -59,7 +59,7 @@ enum follow_symlinks_result {
 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
 
 struct traverse_info {
-	struct traverse_info *prev;
+	const struct traverse_info *prev;
 	struct name_entry name;
 	int pathlen;
 	struct pathspec *pathspec;
diff --git a/unpack-trees.c b/unpack-trees.c
index 8e2032f..d4bedac 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -422,7 +422,7 @@ static int unpack_index_entry(struct cache_entry *ce,
 	return ret;
 }
 
-static int find_cache_pos(struct traverse_info *, const struct name_entry *);
+static int find_cache_pos(const struct traverse_info *, const struct name_entry *);
 
 static void restore_cache_bottom(struct traverse_info *info, int bottom)
 {
@@ -453,7 +453,7 @@ static int switch_cache_bottom(struct traverse_info *info)
 static int traverse_trees_recursive(int n, unsigned long dirmask,
 				    unsigned long df_conflicts,
 				    struct name_entry *names,
-				    struct traverse_info *info)
+				    const struct traverse_info *info)
 {
 	int i, ret, bottom;
 	struct tree_desc t[MAX_UNPACK_TREES];
@@ -637,7 +637,7 @@ static int unpack_failed(struct unpack_trees_options *o, const char *message)
  * anything, as we will want to match it when the traversal descends into
  * the directory.
  */
-static int find_cache_pos(struct traverse_info *info,
+static int find_cache_pos(const struct traverse_info *info,
 			  const struct name_entry *p)
 {
 	int pos;
@@ -692,7 +692,7 @@ static int find_cache_pos(struct traverse_info *info,
 	return -1;
 }
 
-static struct cache_entry *find_cache_entry(struct traverse_info *info,
+static struct cache_entry *find_cache_entry(const struct traverse_info *info,
 					    const struct name_entry *p)
 {
 	int pos = find_cache_pos(info, p);
@@ -704,7 +704,7 @@ static struct cache_entry *find_cache_entry(struct traverse_info *info,
 		return NULL;
 }
 
-static void debug_path(struct traverse_info *info)
+static void debug_path(const struct traverse_info *info)
 {
 	if (info->prev) {
 		debug_path(info->prev);
@@ -725,7 +725,7 @@ static void debug_unpack_callback(int n,
 				  unsigned long mask,
 				  unsigned long dirmask,
 				  struct name_entry *names,
-				  struct traverse_info *info)
+				  const struct traverse_info *info)
 {
 	int i;
 	printf("* unpack mask %lu, dirmask %lu, cnt %d ",
@@ -736,7 +736,7 @@ static void debug_unpack_callback(int n,
 		debug_name_entry(i, names + i);
 }
 
-static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, struct traverse_info *info)
+static int unpack_callback(int n, unsigned long mask, unsigned long dirmask, struct name_entry *names, const struct traverse_info *info)
 {
 	struct cache_entry *src[MAX_UNPACK_TREES + 1] = { NULL, };
 	struct unpack_trees_options *o = info->data;
-- 
2.4.2.749.g730654d-twtrsrc

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* [PATCH v2 2/2] do_compare_entry: use already-computed path
  2015-12-21 22:34 [PATCH v2 0/2] do_compare_entry: use already-computed path David Turner
  2015-12-21 22:34 ` [PATCH v2 1/2] traverse_info: make mostly const David Turner
@ 2015-12-21 22:34 ` David Turner
  2016-01-05 19:40   ` David Turner
  2015-12-21 23:27 ` [PATCH v2 0/2] " Junio C Hamano
  2 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-12-21 22:34 UTC (permalink / raw)
  To: git; +Cc: David Turner

In traverse_trees, we generate the complete traverse path for a
traverse_info.  Later, in do_compare_entry, we used to go do a bunch
of work to compare the traverse_info to a cache_entry's name without
computing that path.  But since we already have that path, we don't
need to do all that work.  Instead, we can just put the generated
path into the traverse_info, and do the comparison more directly.

We copy the path because prune_traversal might mutate `base`. This
doesn't happen in any codepaths where do_compare_entry is called,
but it's better to be safe.

This makes git checkout much faster -- about 25% on Twitter's
monorepo.  Deeper directory trees are likely to benefit more than
shallower ones.

Signed-off-by: David Turner <dturner@twopensource.com>
---
 tree-walk.c    |  7 +++++++
 tree-walk.h    |  1 +
 unpack-trees.c | 41 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 47 insertions(+), 2 deletions(-)

diff --git a/tree-walk.c b/tree-walk.c
index 5eee262..7a0419e 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -320,6 +320,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 	struct strbuf base = STRBUF_INIT;
 	int interesting = 1;
+	char *traverse_path;
 
 	for (i = 0; i < n; i++)
 		tx[i].d = t[i];
@@ -329,7 +330,11 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 		make_traverse_path(base.buf, info->prev, &info->name);
 		base.buf[info->pathlen-1] = '/';
 		strbuf_setlen(&base, info->pathlen);
+		traverse_path = xstrndup(base.buf, info->pathlen);
+	} else {
+		traverse_path = xstrndup(info->name.path, info->pathlen);
 	}
+	info->traverse_path = traverse_path;
 	for (;;) {
 		int trees_used;
 		unsigned long mask, dirmask;
@@ -411,6 +416,8 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	for (i = 0; i < n; i++)
 		free_extended_entry(tx + i);
 	free(tx);
+	free(traverse_path);
+	info->traverse_path = NULL;
 	strbuf_release(&base);
 	return error;
 }
diff --git a/tree-walk.h b/tree-walk.h
index f0a457b..a94357a 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -59,6 +59,7 @@ enum follow_symlinks_result {
 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
 
 struct traverse_info {
+	const char *traverse_path;
 	const struct traverse_info *prev;
 	struct name_entry name;
 	int pathlen;
diff --git a/unpack-trees.c b/unpack-trees.c
index d4bedac..37328de 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
  * itself - the caller needs to do the final check for the cache
  * entry having more data at the end!
  */
-static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
+static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int len, pathlen, ce_len;
 	const char *ce_name;
 
 	if (info->prev) {
-		int cmp = do_compare_entry(ce, info->prev, &info->name);
+		int cmp = do_compare_entry_piecewise(ce, info->prev,
+						     &info->name);
 		if (cmp)
 			return cmp;
 	}
@@ -522,6 +523,42 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
 	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
 }
 
+static int do_compare_entry(const struct cache_entry *ce,
+			    const struct traverse_info *info,
+			    const struct name_entry *n)
+{
+	int len, pathlen, ce_len;
+	const char *ce_name;
+	int cmp;
+
+	/*
+	 * If we have not precomputed the traverse path, it is quicker
+	 * to avoid doing so.  But if we have precomputed it,
+	 * it is quicker to use the precomputed version.
+	 */
+	if (!info->traverse_path)
+		return do_compare_entry_piecewise(ce, info, n);
+
+	cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
+	if (cmp)
+		return cmp;
+
+	pathlen = info->pathlen;
+	ce_len = ce_namelen(ce);
+
+	if (ce_len < pathlen) {
+		if (do_compare_entry_piecewise(ce, info, n) >= 0)
+			die("pathlen");
+		return -1;
+	}
+
+	ce_len -= pathlen;
+	ce_name = ce->name + pathlen;
+
+	len = tree_entry_len(n);
+	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+}
+
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int cmp = do_compare_entry(ce, info, n);
-- 
2.4.2.749.g730654d-twtrsrc

^ permalink raw reply related	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 1/2] traverse_info: make mostly const
  2015-12-21 22:34 ` [PATCH v2 1/2] traverse_info: make mostly const David Turner
@ 2015-12-21 23:15   ` David Turner
  0 siblings, 0 replies; 9+ messages in thread
From: David Turner @ 2015-12-21 23:15 UTC (permalink / raw)
  To: git

On Mon, 2015-12-21 at 17:34 -0500, David Turner wrote:
> We don't usually modify traverse_info, so make it const across a wide
> range of functions.
> 
> Signed-off-by: David Turner <dturner@twopensource.com>
> ---
>  builtin/merge-tree.c     |  2 +-
>  cache-tree.c             |  4 ++--
>  cache-tree.h             |  2 +-
>  t/t4010-diff-pathspec.sh |  2 +-

no, that change was not necessary at all.  

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] do_compare_entry: use already-computed path
  2015-12-21 22:34 [PATCH v2 0/2] do_compare_entry: use already-computed path David Turner
  2015-12-21 22:34 ` [PATCH v2 1/2] traverse_info: make mostly const David Turner
  2015-12-21 22:34 ` [PATCH v2 2/2] do_compare_entry: use already-computed path David Turner
@ 2015-12-21 23:27 ` Junio C Hamano
  2015-12-21 23:33   ` David Turner
  2 siblings, 1 reply; 9+ messages in thread
From: Junio C Hamano @ 2015-12-21 23:27 UTC (permalink / raw)
  To: David Turner; +Cc: git

Thanks.  Does the number still stay at 25% improvement?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] do_compare_entry: use already-computed path
  2015-12-21 23:27 ` [PATCH v2 0/2] " Junio C Hamano
@ 2015-12-21 23:33   ` David Turner
       [not found]     ` <CAPc5daW4ru0j4Zd3ynnRcG8df7sZ9ZuVHu8mz2rxVonZpyE4Gw@mail.gmail.com>
  0 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-12-21 23:33 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, 2015-12-21 at 15:27 -0800, Junio C Hamano wrote:
> Thanks.  Does the number still stay at 25% improvement?

Yes.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] do_compare_entry: use already-computed path
       [not found]     ` <CAPc5daW4ru0j4Zd3ynnRcG8df7sZ9ZuVHu8mz2rxVonZpyE4Gw@mail.gmail.com>
@ 2015-12-22  4:26       ` David Turner
  2015-12-22 18:51         ` Junio C Hamano
  0 siblings, 1 reply; 9+ messages in thread
From: David Turner @ 2015-12-22  4:26 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git mailing list

On Mon, 2015-12-21 at 15:34 -0800, Junio C Hamano wrote:
> Great. Thanks, will queue w/o 1/2 (though I do not think it would
> hurt).
> 
> On Mon, Dec 21, 2015 at 3:33 PM, David Turner <
> dturner@twopensource.com> wrote:
> > On Mon, 2015-12-21 at 15:27 -0800, Junio C Hamano wrote:
> > > Thanks.  Does the number still stay at 25% improvement?
> > 
> > Yes.

BTW, that function, via ce_in_traverse_path, gets called about 40
million times when switching (checking out) between master and a branch
that's a few months old (and that contains relatively small changes
from master-as-of-then. Our repo only has approximately a quarter
-million files.  This seems somewhat unreasonable to me, but I haven't
really looked into what's going on. Do you happen to know why this is
and whether it is likely to be a bug?

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 0/2] do_compare_entry: use already-computed path
  2015-12-22  4:26       ` David Turner
@ 2015-12-22 18:51         ` Junio C Hamano
  0 siblings, 0 replies; 9+ messages in thread
From: Junio C Hamano @ 2015-12-22 18:51 UTC (permalink / raw)
  To: David Turner; +Cc: git mailing list

David Turner <dturner@twopensource.com> writes:

> On Mon, 2015-12-21 at 15:34 -0800, Junio C Hamano wrote:
>> Great. Thanks, will queue w/o 1/2 (though I do not think it would
>> hurt).
>> 
>> On Mon, Dec 21, 2015 at 3:33 PM, David Turner <
>> dturner@twopensource.com> wrote:
>> > On Mon, 2015-12-21 at 15:27 -0800, Junio C Hamano wrote:
>> > > Thanks.  Does the number still stay at 25% improvement?
>> > 
>> > Yes.
>
> BTW, that function, via ce_in_traverse_path, gets called about 40
> million times when switching (checking out) between master and a
> branch that's a few months old (and that contains relatively small
> changes from master-as-of-then. Our repo only has approximately a
> quarter -million files.  This seems somewhat unreasonable to me,
> but I haven't really looked into what's going on.  Do you happen
> to know why this is and whether it is likely to be a bug?

That does sound excessive; smells like somebody is being overly
cautious (i.e. a performance bug).

We might be seeing something similar to what e53e6b44 observed and
attempted to correct.  I dunno.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH v2 2/2] do_compare_entry: use already-computed path
  2015-12-21 22:34 ` [PATCH v2 2/2] do_compare_entry: use already-computed path David Turner
@ 2016-01-05 19:40   ` David Turner
  0 siblings, 0 replies; 9+ messages in thread
From: David Turner @ 2016-01-05 19:40 UTC (permalink / raw)
  To: git

[-- Attachment #1: Type: text/plain, Size: 202 bytes --]

On Mon, 2015-12-21 at 17:34 -0500, David Turner wrote:
> In traverse_trees, we generate the complete traverse path for a

Please replace with the attached version, which eliminates an
unnecessary check.

[-- Attachment #2: 0001-do_compare_entry-use-already-computed-path.patch --]
[-- Type: text/x-patch, Size: 4769 bytes --]

From 520cfbff15faa6de50d37b4a476b21dbe1598433 Mon Sep 17 00:00:00 2001
From: David Turner <dturner@twopensource.com>
Date: Mon, 21 Dec 2015 17:34:20 -0500
Subject: [PATCH] do_compare_entry: use already-computed path

In traverse_trees, we generate the complete traverse path for a
traverse_info.  Later, in do_compare_entry, we used to go do a bunch
of work to compare the traverse_info to a cache_entry's name without
computing that path.  But since we already have that path, we don't
need to do all that work.  Instead, we can just put the generated
path into the traverse_info, and do the comparison more directly.

We copy the path because prune_traversal might mutate `base`. This
doesn't happen in any codepaths where do_compare_entry is called,
but it's better to be safe.

This makes git checkout much faster -- about 25% on Twitter's
monorepo.  Deeper directory trees are likely to benefit more than
shallower ones.

Signed-off-by: David Turner <dturner@twopensource.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 tree-walk.c    |  7 +++++++
 tree-walk.h    |  1 +
 unpack-trees.c | 38 ++++++++++++++++++++++++++++++++++++--
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tree-walk.c b/tree-walk.c
index 6dccd2d..cd4bb2c 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -320,6 +320,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	struct tree_desc_x *tx = xcalloc(n, sizeof(*tx));
 	struct strbuf base = STRBUF_INIT;
 	int interesting = 1;
+	char *traverse_path;
 
 	for (i = 0; i < n; i++)
 		tx[i].d = t[i];
@@ -329,7 +330,11 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 		make_traverse_path(base.buf, info->prev, &info->name);
 		base.buf[info->pathlen-1] = '/';
 		strbuf_setlen(&base, info->pathlen);
+		traverse_path = xstrndup(base.buf, info->pathlen);
+	} else {
+		traverse_path = xstrndup(info->name.path, info->pathlen);
 	}
+	info->traverse_path = traverse_path;
 	for (;;) {
 		int trees_used;
 		unsigned long mask, dirmask;
@@ -411,6 +416,8 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	for (i = 0; i < n; i++)
 		free_extended_entry(tx + i);
 	free(tx);
+	free(traverse_path);
+	info->traverse_path = NULL;
 	strbuf_release(&base);
 	return error;
 }
diff --git a/tree-walk.h b/tree-walk.h
index 3b2f7bf..174eb61 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -59,6 +59,7 @@ enum follow_symlinks_result {
 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
 
 struct traverse_info {
+	const char *traverse_path;
 	struct traverse_info *prev;
 	struct name_entry name;
 	int pathlen;
diff --git a/unpack-trees.c b/unpack-trees.c
index 8e2032f..5f541c2 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
  * itself - the caller needs to do the final check for the cache
  * entry having more data at the end!
  */
-static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
+static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int len, pathlen, ce_len;
 	const char *ce_name;
 
 	if (info->prev) {
-		int cmp = do_compare_entry(ce, info->prev, &info->name);
+		int cmp = do_compare_entry_piecewise(ce, info->prev,
+						     &info->name);
 		if (cmp)
 			return cmp;
 	}
@@ -522,6 +523,39 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
 	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
 }
 
+static int do_compare_entry(const struct cache_entry *ce,
+			    const struct traverse_info *info,
+			    const struct name_entry *n)
+{
+	int len, pathlen, ce_len;
+	const char *ce_name;
+	int cmp;
+
+	/*
+	 * If we have not precomputed the traverse path, it is quicker
+	 * to avoid doing so.  But if we have precomputed it,
+	 * it is quicker to use the precomputed version.
+	 */
+	if (!info->traverse_path)
+		return do_compare_entry_piecewise(ce, info, n);
+
+	cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
+	if (cmp)
+		return cmp;
+
+	pathlen = info->pathlen;
+	ce_len = ce_namelen(ce);
+
+	if (ce_len < pathlen)
+		return -1;
+
+	ce_len -= pathlen;
+	ce_name = ce->name + pathlen;
+
+	len = tree_entry_len(n);
+	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+}
+
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int cmp = do_compare_entry(ce, info, n);
-- 
2.4.2.749.g730654d-twtrsrc


^ permalink raw reply related	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2016-01-05 19:41 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-21 22:34 [PATCH v2 0/2] do_compare_entry: use already-computed path David Turner
2015-12-21 22:34 ` [PATCH v2 1/2] traverse_info: make mostly const David Turner
2015-12-21 23:15   ` David Turner
2015-12-21 22:34 ` [PATCH v2 2/2] do_compare_entry: use already-computed path David Turner
2016-01-05 19:40   ` David Turner
2015-12-21 23:27 ` [PATCH v2 0/2] " Junio C Hamano
2015-12-21 23:33   ` David Turner
     [not found]     ` <CAPc5daW4ru0j4Zd3ynnRcG8df7sZ9ZuVHu8mz2rxVonZpyE4Gw@mail.gmail.com>
2015-12-22  4:26       ` David Turner
2015-12-22 18:51         ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.