All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] do_compare_entry: use already-computed path
@ 2015-12-19  1:40 David Turner
  2015-12-21 17:17 ` Junio C Hamano
  0 siblings, 1 reply; 2+ messages in thread
From: David Turner @ 2015-12-19  1:40 UTC (permalink / raw)
  To: git; +Cc: David Turner

In traverse_trees, we generate the complete traverse path for a
traverse_info.  Later, in do_compare_entry, we used to go do a bunch
of work to compare the traverse_info to a cache_entry's name without
computing that path.  But since we already have that path, we don't
need to do all that work.  Instead, we can just stuff the generated
path into the traverse_info, and do the comparison more directly.
This makes git checkout much faster -- about 25% on Twitter's
monorepo.  Deeper directory trees are likely to benefit more than
shallower ones.

Signed-off-by: David Turner <dturner@twopensource.com>
---
 tree-walk.c    |  4 ++++
 tree-walk.h    |  1 +
 unpack-trees.c | 41 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/tree-walk.c b/tree-walk.c
index 6dccd2d..4cab3e1 100644
--- a/tree-walk.c
+++ b/tree-walk.c
@@ -329,6 +329,9 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 		make_traverse_path(base.buf, info->prev, &info->name);
 		base.buf[info->pathlen-1] = '/';
 		strbuf_setlen(&base, info->pathlen);
+		info->traverse_path = base.buf;
+	} else {
+		info->traverse_path = info->name.path;
 	}
 	for (;;) {
 		int trees_used;
@@ -411,6 +414,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
 	for (i = 0; i < n; i++)
 		free_extended_entry(tx + i);
 	free(tx);
+	info->traverse_path = NULL;
 	strbuf_release(&base);
 	return error;
 }
diff --git a/tree-walk.h b/tree-walk.h
index 3b2f7bf..174eb61 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -59,6 +59,7 @@ enum follow_symlinks_result {
 enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
 
 struct traverse_info {
+	const char *traverse_path;
 	struct traverse_info *prev;
 	struct name_entry name;
 	int pathlen;
diff --git a/unpack-trees.c b/unpack-trees.c
index 8e2032f..127dd4d 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
  * itself - the caller needs to do the final check for the cache
  * entry having more data at the end!
  */
-static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
+static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int len, pathlen, ce_len;
 	const char *ce_name;
 
 	if (info->prev) {
-		int cmp = do_compare_entry(ce, info->prev, &info->name);
+		int cmp = do_compare_entry_piecewise(ce, info->prev,
+						     &info->name);
 		if (cmp)
 			return cmp;
 	}
@@ -522,6 +523,42 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
 	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
 }
 
+static int do_compare_entry(const struct cache_entry *ce,
+			    const struct traverse_info *info,
+			    const struct name_entry *n)
+{
+	int len, pathlen, ce_len;
+	const char *ce_name;
+	int cmp;
+
+	/*
+	 * If we have not precomputed the traverse path, it is quicker
+	 * to avoid doing so.  But if we have precomputed it,
+	 * it is quicker to use the precomputed version.
+	 */
+	if (!info->traverse_path)
+		return do_compare_entry_piecewise(ce, info, n);
+
+	cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
+	if (cmp)
+		return cmp;
+
+	pathlen = info->pathlen;
+	ce_len = ce_namelen(ce);
+
+	if (ce_len < pathlen) {
+		if (do_compare_entry_piecewise(ce, info, n) >= 0)
+			die("pathlen");
+		return -1;
+	}
+
+	ce_len -= pathlen;
+	ce_name = ce->name + pathlen;
+
+	len = tree_entry_len(n);
+	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
+}
+
 static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
 {
 	int cmp = do_compare_entry(ce, info, n);
-- 
2.4.2.749.g730654d-twtrsrc

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH] do_compare_entry: use already-computed path
  2015-12-19  1:40 [PATCH] do_compare_entry: use already-computed path David Turner
@ 2015-12-21 17:17 ` Junio C Hamano
  0 siblings, 0 replies; 2+ messages in thread
From: Junio C Hamano @ 2015-12-21 17:17 UTC (permalink / raw)
  To: David Turner; +Cc: git

David Turner <dturner@twopensource.com> writes:

> In traverse_trees, we generate the complete traverse path for a
> traverse_info.  Later, in do_compare_entry, we used to go do a bunch
> of work to compare the traverse_info to a cache_entry's name without
> computing that path.  But since we already have that path, we don't
> need to do all that work.  Instead, we can just stuff the generated
> path into the traverse_info, and do the comparison more directly.
> This makes git checkout much faster -- about 25% on Twitter's
> monorepo.  Deeper directory trees are likely to benefit more than
> shallower ones.

Great work.

IIRC, very early incarnation of the code avoided hard to build the
full path and that was why the path-component-wise comparison was
used heavily in this codepath; at some point we just gave up, I
think.  If we can pass this flattened representation around to
callees that can make good use of it, that makes tons of sense.  I
like the basic idea of this change.

I am bit worried that &base is passed to some function from here,
and they do not take "const struct strbuf *", but a non-const one,
allowing them to potentially modify its contents while this new
field in info struct wants to have the original base.buf, but I
trust you traced the callchain to make sure nothing wrong happens?
Even if that is the case, I feel that this change is setting up a
trap somebody else would easily trigger unknowingly--I wonder how
we can avoid future breakages.


> Signed-off-by: David Turner <dturner@twopensource.com>
> ---
>  tree-walk.c    |  4 ++++
>  tree-walk.h    |  1 +
>  unpack-trees.c | 41 +++++++++++++++++++++++++++++++++++++++--
>  3 files changed, 44 insertions(+), 2 deletions(-)
>
> diff --git a/tree-walk.c b/tree-walk.c
> index 6dccd2d..4cab3e1 100644
> --- a/tree-walk.c
> +++ b/tree-walk.c
> @@ -329,6 +329,9 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
>  		make_traverse_path(base.buf, info->prev, &info->name);
>  		base.buf[info->pathlen-1] = '/';
>  		strbuf_setlen(&base, info->pathlen);
> +		info->traverse_path = base.buf;
> +	} else {
> +		info->traverse_path = info->name.path;
>  	}
>  	for (;;) {
>  		int trees_used;
> @@ -411,6 +414,7 @@ int traverse_trees(int n, struct tree_desc *t, struct traverse_info *info)
>  	for (i = 0; i < n; i++)
>  		free_extended_entry(tx + i);
>  	free(tx);
> +	info->traverse_path = NULL;
>  	strbuf_release(&base);
>  	return error;
>  }
> diff --git a/tree-walk.h b/tree-walk.h
> index 3b2f7bf..174eb61 100644
> --- a/tree-walk.h
> +++ b/tree-walk.h
> @@ -59,6 +59,7 @@ enum follow_symlinks_result {
>  enum follow_symlinks_result get_tree_entry_follow_symlinks(unsigned char *tree_sha1, const char *name, unsigned char *result, struct strbuf *result_path, unsigned *mode);
>  
>  struct traverse_info {
> +	const char *traverse_path;
>  	struct traverse_info *prev;
>  	struct name_entry name;
>  	int pathlen;
> diff --git a/unpack-trees.c b/unpack-trees.c
> index 8e2032f..127dd4d 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -498,13 +498,14 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
>   * itself - the caller needs to do the final check for the cache
>   * entry having more data at the end!
>   */
> -static int do_compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
> +static int do_compare_entry_piecewise(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
>  {
>  	int len, pathlen, ce_len;
>  	const char *ce_name;
>  
>  	if (info->prev) {
> -		int cmp = do_compare_entry(ce, info->prev, &info->name);
> +		int cmp = do_compare_entry_piecewise(ce, info->prev,
> +						     &info->name);
>  		if (cmp)
>  			return cmp;
>  	}
> @@ -522,6 +523,42 @@ static int do_compare_entry(const struct cache_entry *ce, const struct traverse_
>  	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
>  }
>  
> +static int do_compare_entry(const struct cache_entry *ce,
> +			    const struct traverse_info *info,
> +			    const struct name_entry *n)
> +{
> +	int len, pathlen, ce_len;
> +	const char *ce_name;
> +	int cmp;
> +
> +	/*
> +	 * If we have not precomputed the traverse path, it is quicker
> +	 * to avoid doing so.  But if we have precomputed it,
> +	 * it is quicker to use the precomputed version.
> +	 */
> +	if (!info->traverse_path)
> +		return do_compare_entry_piecewise(ce, info, n);
> +
> +	cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
> +	if (cmp)
> +		return cmp;
> +
> +	pathlen = info->pathlen;
> +	ce_len = ce_namelen(ce);
> +
> +	if (ce_len < pathlen) {
> +		if (do_compare_entry_piecewise(ce, info, n) >= 0)
> +			die("pathlen");
> +		return -1;
> +	}
> +
> +	ce_len -= pathlen;
> +	ce_name = ce->name + pathlen;
> +
> +	len = tree_entry_len(n);
> +	return df_name_compare(ce_name, ce_len, S_IFREG, n->path, len, n->mode);
> +}
> +
>  static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
>  {
>  	int cmp = do_compare_entry(ce, info, n);

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-12-21 17:17 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-12-19  1:40 [PATCH] do_compare_entry: use already-computed path David Turner
2015-12-21 17:17 ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.