All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] checkout: avoid unncessary match_pathspec calls
@ 2013-03-23 10:55 Nguyễn Thái Ngọc Duy
  2013-03-24  2:45 ` Eric Sunshine
  2013-03-24  6:47 ` Junio C Hamano
  0 siblings, 2 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-03-23 10:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

In checkout_paths() we do this

 - for i = 0..active_nr, if not updated, call match_pathspec
 - for ..., call match_pathspec (inside unmerge_cache)
 - for ..., call match_pathspec (for showing "path .. is unmerged)
 - for ..., if not updated, call match_pathspec and update paths

That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per index entry, save the
result in ce_flags and reuse the results in the following loops.

This command is used on webkit, 215k entries. The pattern is chosen
mainly to make match_pathspec sweat:

git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"

        before      after
real    0m3.493s    0m2.737s
user    0m2.239s    0m1.586s
sys     0m1.252s    0m1.151s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Junio, this patch clearly conflicts wih nd/magic-pathspecs. Do you
 want me to:

  - hold it off until nd/magic-pathspecs graduates
  - rebase on top of nd/magic-pathspecs and repost
  - leave it to you to handle conflicts

 ?

 builtin/checkout.c | 23 +++++++++++++++++++----
 cache.h            |  1 +
 resolve-undo.c     | 19 ++++++++++++++++++-
 resolve-undo.h     |  1 +
 4 files changed, 39 insertions(+), 5 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index a9c1b5a..fadc11b 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -273,22 +273,37 @@ static int checkout_paths(const struct checkout_opts *opts,
 
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
+		ce->ce_flags &= ~CE_MATCHED;
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+		if (match_pathspec(opts->pathspec, ce->name,
+				   ce_namelen(ce), 0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
 	}
 
 	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
 		return 1;
 
+	/*
+	 * call match_pathspec on the remaining entries that have not
+	 * been done in the previous loop
+	 */
+	for (pos = 0; pos < active_nr; pos++) {
+		struct cache_entry *ce = active_cache[pos];
+		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE) &&
+		    match_pathspec(opts->pathspec, ce->name,
+				   ce_namelen(ce), 0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
+	}
+
 	/* "checkout -m path" to recreate conflicted state */
 	if (opts->merge)
-		unmerge_cache(opts->pathspec);
+		unmerge_marked_index(&the_index);
 
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -315,7 +330,7 @@ static int checkout_paths(const struct checkout_opts *opts,
 		struct cache_entry *ce = active_cache[pos];
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
 			continue;
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/cache.h b/cache.h
index c56315c..04e6090 100644
--- a/cache.h
+++ b/cache.h
@@ -161,6 +161,7 @@ struct cache_entry {
 
 #define CE_UNPACKED          (1 << 24)
 #define CE_NEW_SKIP_WORKTREE (1 << 25)
+#define CE_MATCHED           (1 << 26)
 
 /*
  * Extended on-disk flags
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..639eb9c 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	struct cache_entry *ce;
 	struct string_list_item *item;
 	struct resolve_undo_info *ru;
-	int i, err = 0;
+	int i, err = 0, matched;
 
 	if (!istate->resolve_undo)
 		return pos;
@@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	ru = item->util;
 	if (!ru)
 		return pos;
+	matched = ce->ce_flags & CE_MATCHED;
 	remove_index_entry_at(istate, pos);
 	for (i = 0; i < 3; i++) {
 		struct cache_entry *nce;
@@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 			continue;
 		nce = make_cache_entry(ru->mode[i], ru->sha1[i],
 				       ce->name, i + 1, 0);
+		if (matched)
+			nce->ce_flags |= CE_MATCHED;
 		if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
 			err = 1;
 			error("cannot unmerge '%s'", ce->name);
@@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	return unmerge_index_entry_at(istate, pos);
 }
 
+void unmerge_marked_index(struct index_state *istate)
+{
+	int i;
+
+	if (!istate->resolve_undo)
+		return;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce->ce_flags & CE_MATCHED)
+			i = unmerge_index_entry_at(istate, i);
+	}
+}
+
 void unmerge_index(struct index_state *istate, const char **pathspec)
 {
 	int i;
diff --git a/resolve-undo.h b/resolve-undo.h
index 8458769..7a30206 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
 extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
+extern void unmerge_marked_index(struct index_state *);
 
 #endif
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] checkout: avoid unncessary match_pathspec calls
  2013-03-23 10:55 [PATCH] checkout: avoid unncessary match_pathspec calls Nguyễn Thái Ngọc Duy
@ 2013-03-24  2:45 ` Eric Sunshine
  2013-03-24  6:47 ` Junio C Hamano
  1 sibling, 0 replies; 7+ messages in thread
From: Eric Sunshine @ 2013-03-24  2:45 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: Git List, Junio C Hamano

On Sat, Mar 23, 2013 at 6:55 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> checkout: avoid unncessary match_pathspec calls

s/unncessary/unnecessary/

> In checkout_paths() we do this
> ...

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] checkout: avoid unncessary match_pathspec calls
  2013-03-23 10:55 [PATCH] checkout: avoid unncessary match_pathspec calls Nguyễn Thái Ngọc Duy
  2013-03-24  2:45 ` Eric Sunshine
@ 2013-03-24  6:47 ` Junio C Hamano
  2013-03-24 12:55   ` [PATCH v2] checkout: avoid unnecessary " Nguyễn Thái Ngọc Duy
  1 sibling, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2013-03-24  6:47 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> ---
>  Junio, this patch clearly conflicts wih nd/magic-pathspecs. Do you
>  want me to:
>
>   - hold it off until nd/magic-pathspecs graduates
>   - rebase on top of nd/magic-pathspecs and repost
>   - leave it to you to handle conflicts
>  ?

I'd prefer to take small, independent and clear improvements first
and worry about larger ones later, so if there were another choice,
i.e.

 - eject nd/magic-pathspecs for now, cook this (and other small
   independent and clear improvements you may come up with, some of
   which might come out of nd/magic-pathspecs itself) and let it
   graduate first, and later revisit rerolld nd/magic-pathspecs

that would be the ideal among the given choices ;-).

>  	for (pos = 0; pos < active_nr; pos++) {
>  		struct cache_entry *ce = active_cache[pos];
> +		ce->ce_flags &= ~CE_MATCHED;
>  		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
>  			continue;
> -		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
> +		if (match_pathspec(opts->pathspec, ce->name,
> +				   ce_namelen(ce), 0, ps_matched))
> +			ce->ce_flags |= CE_MATCHED;
>  	}
>  
>  	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
>  		return 1;
>  
> +	/*
> +	 * call match_pathspec on the remaining entries that have not
> +	 * been done in the previous loop
> +	 */
> +	for (pos = 0; pos < active_nr; pos++) {
> +		struct cache_entry *ce = active_cache[pos];
> +		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE) &&
> +		    match_pathspec(opts->pathspec, ce->name,
> +				   ce_namelen(ce), 0, ps_matched))
> +			ce->ce_flags |= CE_MATCHED;
> +	}
> +

The above is a faithful rewrite, but I have to wonder why you need
two separate loops.

Do you understand what the original loop is doing with ps_matched,
and why the code excludes certain paths while doing so?  I didn't
when I read your patch for the first time, as I forgot, until I
checked with 0a1283bc3955 (checkout $tree $path: do not clobber
local changes in $path not in $tree, 2011-09-30)

You don't use ps_matched after report_path_error(); the new loop
shouldn't have to record which pathspec matched.

Also I notice that I forgot to free ps_matched.  Perhaps doing it
this way is easier to maintain?

	/*
         * Make sure all pathspecs participated in locating the
	 * paths to be checked out.
         */
	for (pos = 0; pos < active_nr; pos++) {
		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
			/*
                         * "git checkout tree-ish -- path", but this entry
                         * is in the original index; it will not be checked
                         * out to the working tree and it does not matter
			 * if pathspec matched this entry.  We will not do
			 * anything to this entry at all.
                	 */
			verify_psmatch = NULL;
		else
			/*
                         * Either this entry came from the tree-ish
                         * we are checking the paths out of, or we
			 * are checking out of the index.
                	 */
                        verify_psmatch = ps_matched;
		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
				   0, verify_psmatch))
			ce->ce_flags |= CE_MATCHED;
	}

	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
		return 1;
	free(ps_matched);

After commenting on the above, it makes me wonder if we even need to
bother marking entries that were in the index that did not come from
the tree-ish we are checking paths out of, though.  What breaks if
you did not do the rewrite above and dropped the second loop in your
patch?

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v2] checkout: avoid unnecessary match_pathspec calls
  2013-03-24  6:47 ` Junio C Hamano
@ 2013-03-24 12:55   ` Nguyễn Thái Ngọc Duy
  2013-03-25 16:26     ` Junio C Hamano
  0 siblings, 1 reply; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-03-24 12:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

In checkout_paths() we do this

 - for all updated items, call match_pathspec
 - for all items, call match_pathspec (inside unmerge_cache)
 - for all items, call match_pathspec (for showing "path .. is unmerged)
 - for updated items, call match_pathspec and update paths

That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per updated index entry,
save the result in ce_flags and reuse the results in the following
loops.

The changes in 0a1283b (checkout $tree $path: do not clobber local
changes in $path not in $tree - 2011-09-30) limit the affected paths
to ones we read from $tree. We do not do anything to other modified
entries in this case, so the "for all items" above could be modified
to "for all updated items". But..

The command's behavior now is modified slightly: unmerged entries that
match $path, but not updated by $tree, are now NOT touched.  Although
this should be considered a bug fix, not a regression.

And while at there, free ps_matched after use.

The following command is tested on webkit, 215k entries. The pattern
is chosen mainly to make match_pathspec sweat:

git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"

        before      after
real    0m3.493s    0m2.737s
user    0m2.239s    0m1.586s
sys     0m1.252s    0m1.151s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 > and worry about larger ones later, so if there were another choice,
 > i.e.
 >
 >  - eject nd/magic-pathspecs for now, cook this (and other small
 >    independent and clear improvements you may come up with, some of
 >    which might come out of nd/magic-pathspecs itself) and let it
 >    graduate first, and later revisit rerolld nd/magic-pathspecs
 >
 > that would be the ideal among the given choices ;-).

 Whichever is easier for you.

 > The above is a faithful rewrite, but I have to wonder why you need
 > two separate loops.
 >
 > Do you understand what the original loop is doing with ps_matched,
 > and why the code excludes certain paths while doing so?

 Nope, I did not dig that deep. I expected you to do it ;-) j/k

 > After commenting on the above, it makes me wonder if we even need to
 > bother marking entries that were in the index that did not come from
 > the tree-ish we are checking paths out of, though.  What breaks if
 > you did not do the rewrite above and dropped the second loop in your
 > patch?

 The test suite says none. There is a behavior change regarding
 unmerged entries as mentioned in the commit message. But I think it's
 a good change.

 builtin/checkout.c | 34 +++++++++++++++++++++++++++-------
 cache.h            |  1 +
 resolve-undo.c     | 19 ++++++++++++++++++-
 resolve-undo.h     |  1 +
 4 files changed, 47 insertions(+), 8 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index a9c1b5a..359b983 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -271,24 +271,46 @@ static int checkout_paths(const struct checkout_opts *opts,
 		;
 	ps_matched = xcalloc(1, pos);
 
+	/*
+	 * Make sure all pathspecs participated in locating the paths
+	 * to be checked out.
+	 */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
+		ce->ce_flags &= ~CE_MATCHED;
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
+			/*
+			 * "git checkout tree-ish -- path", but this entry
+			 * is in the original index; it will not be checked
+			 * out to the working tree and it does not matter
+			 * if pathspec matched this entry.  We will not do
+			 * anything to this entry at all.
+			 */
 			continue;
-		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+		/*
+		 * Either this entry came from the tree-ish we are
+		 * checking the paths out of, or we are checking out
+		 * of the index.
+		 */
+		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
+				   0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
 	}
 
-	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
+	if (report_path_error(ps_matched, opts->pathspec, opts->prefix)) {
+		free(ps_matched);
 		return 1;
+	}
+	free(ps_matched);
 
 	/* "checkout -m path" to recreate conflicted state */
 	if (opts->merge)
-		unmerge_cache(opts->pathspec);
+		unmerge_marked_index(&the_index);
 
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -313,9 +335,7 @@ static int checkout_paths(const struct checkout_opts *opts,
 	state.refresh_cache = 1;
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
-			continue;
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/cache.h b/cache.h
index c56315c..04e6090 100644
--- a/cache.h
+++ b/cache.h
@@ -161,6 +161,7 @@ struct cache_entry {
 
 #define CE_UNPACKED          (1 << 24)
 #define CE_NEW_SKIP_WORKTREE (1 << 25)
+#define CE_MATCHED           (1 << 26)
 
 /*
  * Extended on-disk flags
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..639eb9c 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	struct cache_entry *ce;
 	struct string_list_item *item;
 	struct resolve_undo_info *ru;
-	int i, err = 0;
+	int i, err = 0, matched;
 
 	if (!istate->resolve_undo)
 		return pos;
@@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	ru = item->util;
 	if (!ru)
 		return pos;
+	matched = ce->ce_flags & CE_MATCHED;
 	remove_index_entry_at(istate, pos);
 	for (i = 0; i < 3; i++) {
 		struct cache_entry *nce;
@@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 			continue;
 		nce = make_cache_entry(ru->mode[i], ru->sha1[i],
 				       ce->name, i + 1, 0);
+		if (matched)
+			nce->ce_flags |= CE_MATCHED;
 		if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
 			err = 1;
 			error("cannot unmerge '%s'", ce->name);
@@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	return unmerge_index_entry_at(istate, pos);
 }
 
+void unmerge_marked_index(struct index_state *istate)
+{
+	int i;
+
+	if (!istate->resolve_undo)
+		return;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce->ce_flags & CE_MATCHED)
+			i = unmerge_index_entry_at(istate, i);
+	}
+}
+
 void unmerge_index(struct index_state *istate, const char **pathspec)
 {
 	int i;
diff --git a/resolve-undo.h b/resolve-undo.h
index 8458769..7a30206 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
 extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
+extern void unmerge_marked_index(struct index_state *);
 
 #endif
-- 
1.8.2.83.gc99314b

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v2] checkout: avoid unnecessary match_pathspec calls
  2013-03-24 12:55   ` [PATCH v2] checkout: avoid unnecessary " Nguyễn Thái Ngọc Duy
@ 2013-03-25 16:26     ` Junio C Hamano
  2013-03-27  5:58       ` [PATCH v3] " Nguyễn Thái Ngọc Duy
  0 siblings, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2013-03-25 16:26 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> In checkout_paths() we do this
>
>  - for all updated items, call match_pathspec
>  - for all items, call match_pathspec (inside unmerge_cache)
>  - for all items, call match_pathspec (for showing "path .. is unmerged)
>  - for updated items, call match_pathspec and update paths
>
> That's a lot of duplicate match_pathspec(s) and the function is not
> exactly cheap to be called so many times, especially on large indexes.
> This patch makes it call match_pathspec once per updated index entry,
> save the result in ce_flags and reuse the results in the following
> loops.
>
> The changes in 0a1283b (checkout $tree $path: do not clobber local
> changes in $path not in $tree - 2011-09-30) limit the affected paths
> to ones we read from $tree. We do not do anything to other modified
> entries in this case, so the "for all items" above could be modified
> to "for all updated items". But..
>
> The command's behavior now is modified slightly: unmerged entries that
> match $path, but not updated by $tree, are now NOT touched.  Although
> this should be considered a bug fix, not a regression.

Could we have a test to show the difference please, especially if we
are going to sell this as a fix?

The change itself looks quite sane to me (I didn't apply or test it,
though---just eyeballing).

Thanks.

>
> And while at there, free ps_matched after use.
>
> The following command is tested on webkit, 215k entries. The pattern
> is chosen mainly to make match_pathspec sweat:
>
> git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"
>
>         before      after
> real    0m3.493s    0m2.737s
> user    0m2.239s    0m1.586s
> sys     0m1.252s    0m1.151s
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  > and worry about larger ones later, so if there were another choice,
>  > i.e.
>  >
>  >  - eject nd/magic-pathspecs for now, cook this (and other small
>  >    independent and clear improvements you may come up with, some of
>  >    which might come out of nd/magic-pathspecs itself) and let it
>  >    graduate first, and later revisit rerolld nd/magic-pathspecs
>  >
>  > that would be the ideal among the given choices ;-).
>
>  Whichever is easier for you.
>
>  > The above is a faithful rewrite, but I have to wonder why you need
>  > two separate loops.
>  >
>  > Do you understand what the original loop is doing with ps_matched,
>  > and why the code excludes certain paths while doing so?
>
>  Nope, I did not dig that deep. I expected you to do it ;-) j/k
>
>  > After commenting on the above, it makes me wonder if we even need to
>  > bother marking entries that were in the index that did not come from
>  > the tree-ish we are checking paths out of, though.  What breaks if
>  > you did not do the rewrite above and dropped the second loop in your
>  > patch?
>
>  The test suite says none. There is a behavior change regarding
>  unmerged entries as mentioned in the commit message. But I think it's
>  a good change.
>
>  builtin/checkout.c | 34 +++++++++++++++++++++++++++-------
>  cache.h            |  1 +
>  resolve-undo.c     | 19 ++++++++++++++++++-
>  resolve-undo.h     |  1 +
>  4 files changed, 47 insertions(+), 8 deletions(-)
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c
> index a9c1b5a..359b983 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -271,24 +271,46 @@ static int checkout_paths(const struct checkout_opts *opts,
>  		;
>  	ps_matched = xcalloc(1, pos);
>  
> +	/*
> +	 * Make sure all pathspecs participated in locating the paths
> +	 * to be checked out.
> +	 */
>  	for (pos = 0; pos < active_nr; pos++) {
>  		struct cache_entry *ce = active_cache[pos];
> +		ce->ce_flags &= ~CE_MATCHED;
>  		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
> +			/*
> +			 * "git checkout tree-ish -- path", but this entry
> +			 * is in the original index; it will not be checked
> +			 * out to the working tree and it does not matter
> +			 * if pathspec matched this entry.  We will not do
> +			 * anything to this entry at all.
> +			 */
>  			continue;
> -		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
> +		/*
> +		 * Either this entry came from the tree-ish we are
> +		 * checking the paths out of, or we are checking out
> +		 * of the index.
> +		 */
> +		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
> +				   0, ps_matched))
> +			ce->ce_flags |= CE_MATCHED;
>  	}
>  
> -	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
> +	if (report_path_error(ps_matched, opts->pathspec, opts->prefix)) {
> +		free(ps_matched);
>  		return 1;
> +	}
> +	free(ps_matched);
>  
>  	/* "checkout -m path" to recreate conflicted state */
>  	if (opts->merge)
> -		unmerge_cache(opts->pathspec);
> +		unmerge_marked_index(&the_index);
>  
>  	/* Any unmerged paths? */
>  	for (pos = 0; pos < active_nr; pos++) {
>  		struct cache_entry *ce = active_cache[pos];
> -		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
> +		if (ce->ce_flags & CE_MATCHED) {
>  			if (!ce_stage(ce))
>  				continue;
>  			if (opts->force) {
> @@ -313,9 +335,7 @@ static int checkout_paths(const struct checkout_opts *opts,
>  	state.refresh_cache = 1;
>  	for (pos = 0; pos < active_nr; pos++) {
>  		struct cache_entry *ce = active_cache[pos];
> -		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
> -			continue;
> -		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
> +		if (ce->ce_flags & CE_MATCHED) {
>  			if (!ce_stage(ce)) {
>  				errs |= checkout_entry(ce, &state, NULL);
>  				continue;
> diff --git a/cache.h b/cache.h
> index c56315c..04e6090 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -161,6 +161,7 @@ struct cache_entry {
>  
>  #define CE_UNPACKED          (1 << 24)
>  #define CE_NEW_SKIP_WORKTREE (1 << 25)
> +#define CE_MATCHED           (1 << 26)
>  
>  /*
>   * Extended on-disk flags
> diff --git a/resolve-undo.c b/resolve-undo.c
> index 72b4612..639eb9c 100644
> --- a/resolve-undo.c
> +++ b/resolve-undo.c
> @@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
>  	struct cache_entry *ce;
>  	struct string_list_item *item;
>  	struct resolve_undo_info *ru;
> -	int i, err = 0;
> +	int i, err = 0, matched;
>  
>  	if (!istate->resolve_undo)
>  		return pos;
> @@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
>  	ru = item->util;
>  	if (!ru)
>  		return pos;
> +	matched = ce->ce_flags & CE_MATCHED;
>  	remove_index_entry_at(istate, pos);
>  	for (i = 0; i < 3; i++) {
>  		struct cache_entry *nce;
> @@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
>  			continue;
>  		nce = make_cache_entry(ru->mode[i], ru->sha1[i],
>  				       ce->name, i + 1, 0);
> +		if (matched)
> +			nce->ce_flags |= CE_MATCHED;
>  		if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
>  			err = 1;
>  			error("cannot unmerge '%s'", ce->name);
> @@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
>  	return unmerge_index_entry_at(istate, pos);
>  }
>  
> +void unmerge_marked_index(struct index_state *istate)
> +{
> +	int i;
> +
> +	if (!istate->resolve_undo)
> +		return;
> +
> +	for (i = 0; i < istate->cache_nr; i++) {
> +		struct cache_entry *ce = istate->cache[i];
> +		if (ce->ce_flags & CE_MATCHED)
> +			i = unmerge_index_entry_at(istate, i);
> +	}
> +}
> +
>  void unmerge_index(struct index_state *istate, const char **pathspec)
>  {
>  	int i;
> diff --git a/resolve-undo.h b/resolve-undo.h
> index 8458769..7a30206 100644
> --- a/resolve-undo.h
> +++ b/resolve-undo.h
> @@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
>  extern void resolve_undo_clear_index(struct index_state *);
>  extern int unmerge_index_entry_at(struct index_state *, int);
>  extern void unmerge_index(struct index_state *, const char **);
> +extern void unmerge_marked_index(struct index_state *);
>  
>  #endif

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH v3] checkout: avoid unnecessary match_pathspec calls
  2013-03-25 16:26     ` Junio C Hamano
@ 2013-03-27  5:58       ` Nguyễn Thái Ngọc Duy
  2013-03-28 22:32         ` Junio C Hamano
  0 siblings, 1 reply; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2013-03-27  5:58 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

In checkout_paths() we do this

 - for all updated items, call match_pathspec
 - for all items, call match_pathspec (inside unmerge_cache)
 - for all items, call match_pathspec (for showing "path .. is unmerged)
 - for updated items, call match_pathspec and update paths

That's a lot of duplicate match_pathspec(s) and the function is not
exactly cheap to be called so many times, especially on large indexes.
This patch makes it call match_pathspec once per updated index entry,
save the result in ce_flags and reuse the results in the following
loops.

The changes in 0a1283b (checkout $tree $path: do not clobber local
changes in $path not in $tree - 2011-09-30) limit the affected paths
to ones we read from $tree. We do not do anything to other modified
entries in this case, so the "for all items" above could be modified
to "for all updated items". But..

The command's behavior now is modified slightly: unmerged entries that
match $path, but not updated by $tree, are now NOT touched.  Although
this should be considered a bug fix, not a regression. A new test is
added for this change.

And while at there, free ps_matched after use.

The following command is tested on webkit, 215k entries. The pattern
is chosen mainly to make match_pathspec sweat:

git checkout -- "*[a-zA-Z]*[a-zA-Z]*[a-zA-Z]*"

        before      after
real    0m3.493s    0m2.737s
user    0m2.239s    0m1.586s
sys     0m1.252s    0m1.151s

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Changes from v2: a new test and some note about matching twice in
 "checkout $tree $path", once in read_tree_some and once checkout_paths.
 We may be able avoid match_pathspec entirely in this case when
 tree_entry_interesting learns to fill ps_matched.

 builtin/checkout.c        | 43 ++++++++++++++++++++++++++++++++++++-------
 cache.h                   |  1 +
 resolve-undo.c            | 19 ++++++++++++++++++-
 resolve-undo.h            |  1 +
 t/t2022-checkout-paths.sh | 21 +++++++++++++++++++++
 5 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index a9c1b5a..f8033f4 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -271,24 +271,55 @@ static int checkout_paths(const struct checkout_opts *opts,
 		;
 	ps_matched = xcalloc(1, pos);
 
+	/*
+	 * Make sure all pathspecs participated in locating the paths
+	 * to be checked out.
+	 */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
+		ce->ce_flags &= ~CE_MATCHED;
 		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
+			/*
+			 * "git checkout tree-ish -- path", but this entry
+			 * is in the original index; it will not be checked
+			 * out to the working tree and it does not matter
+			 * if pathspec matched this entry.  We will not do
+			 * anything to this entry at all.
+			 */
 			continue;
-		match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, ps_matched);
+		/*
+		 * Either this entry came from the tree-ish we are
+		 * checking the paths out of, or we are checking out
+		 * of the index.
+		 *
+		 * If it comes from the tree-ish, we already know it
+		 * matches the pathspec and could just stamp
+		 * CE_MATCHED to it from update_some(). But we still
+		 * need ps_matched and read_tree_recursive (and
+		 * eventually tree_entry_interesting) cannot fill
+		 * ps_matched yet. Once it can, we can avoid calling
+		 * match_pathspec() for _all_ entries when
+		 * opts->source_tree != NULL.
+		 */
+		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
+				   0, ps_matched))
+			ce->ce_flags |= CE_MATCHED;
 	}
 
-	if (report_path_error(ps_matched, opts->pathspec, opts->prefix))
+	if (report_path_error(ps_matched, opts->pathspec, opts->prefix)) {
+		free(ps_matched);
 		return 1;
+	}
+	free(ps_matched);
 
 	/* "checkout -m path" to recreate conflicted state */
 	if (opts->merge)
-		unmerge_cache(opts->pathspec);
+		unmerge_marked_index(&the_index);
 
 	/* Any unmerged paths? */
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce))
 				continue;
 			if (opts->force) {
@@ -313,9 +344,7 @@ static int checkout_paths(const struct checkout_opts *opts,
 	state.refresh_cache = 1;
 	for (pos = 0; pos < active_nr; pos++) {
 		struct cache_entry *ce = active_cache[pos];
-		if (opts->source_tree && !(ce->ce_flags & CE_UPDATE))
-			continue;
-		if (match_pathspec(opts->pathspec, ce->name, ce_namelen(ce), 0, NULL)) {
+		if (ce->ce_flags & CE_MATCHED) {
 			if (!ce_stage(ce)) {
 				errs |= checkout_entry(ce, &state, NULL);
 				continue;
diff --git a/cache.h b/cache.h
index c56315c..04e6090 100644
--- a/cache.h
+++ b/cache.h
@@ -161,6 +161,7 @@ struct cache_entry {
 
 #define CE_UNPACKED          (1 << 24)
 #define CE_NEW_SKIP_WORKTREE (1 << 25)
+#define CE_MATCHED           (1 << 26)
 
 /*
  * Extended on-disk flags
diff --git a/resolve-undo.c b/resolve-undo.c
index 72b4612..639eb9c 100644
--- a/resolve-undo.c
+++ b/resolve-undo.c
@@ -118,7 +118,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	struct cache_entry *ce;
 	struct string_list_item *item;
 	struct resolve_undo_info *ru;
-	int i, err = 0;
+	int i, err = 0, matched;
 
 	if (!istate->resolve_undo)
 		return pos;
@@ -137,6 +137,7 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	ru = item->util;
 	if (!ru)
 		return pos;
+	matched = ce->ce_flags & CE_MATCHED;
 	remove_index_entry_at(istate, pos);
 	for (i = 0; i < 3; i++) {
 		struct cache_entry *nce;
@@ -144,6 +145,8 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 			continue;
 		nce = make_cache_entry(ru->mode[i], ru->sha1[i],
 				       ce->name, i + 1, 0);
+		if (matched)
+			nce->ce_flags |= CE_MATCHED;
 		if (add_index_entry(istate, nce, ADD_CACHE_OK_TO_ADD)) {
 			err = 1;
 			error("cannot unmerge '%s'", ce->name);
@@ -156,6 +159,20 @@ int unmerge_index_entry_at(struct index_state *istate, int pos)
 	return unmerge_index_entry_at(istate, pos);
 }
 
+void unmerge_marked_index(struct index_state *istate)
+{
+	int i;
+
+	if (!istate->resolve_undo)
+		return;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+		if (ce->ce_flags & CE_MATCHED)
+			i = unmerge_index_entry_at(istate, i);
+	}
+}
+
 void unmerge_index(struct index_state *istate, const char **pathspec)
 {
 	int i;
diff --git a/resolve-undo.h b/resolve-undo.h
index 8458769..7a30206 100644
--- a/resolve-undo.h
+++ b/resolve-undo.h
@@ -12,5 +12,6 @@ extern struct string_list *resolve_undo_read(const char *, unsigned long);
 extern void resolve_undo_clear_index(struct index_state *);
 extern int unmerge_index_entry_at(struct index_state *, int);
 extern void unmerge_index(struct index_state *, const char **);
+extern void unmerge_marked_index(struct index_state *);
 
 #endif
diff --git a/t/t2022-checkout-paths.sh b/t/t2022-checkout-paths.sh
index 56090d2..5e01d58 100755
--- a/t/t2022-checkout-paths.sh
+++ b/t/t2022-checkout-paths.sh
@@ -39,4 +39,25 @@ test_expect_success 'checking out paths out of a tree does not clobber unrelated
 	test_cmp expect.next2 dir/next2
 '
 
+test_expect_success 'do not touch unmerged entries matching $path but not in $tree' '
+	git checkout next &&
+	git reset --hard &&
+
+	cat dir/common >expect.common &&
+	EMPTY_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 &&
+	git rm dir/next0 &&
+	cat >expect.next0<<EOF &&
+100644 $EMPTY_SHA1 1	dir/next0
+100644 $EMPTY_SHA1 2	dir/next0
+EOF
+	git update-index --index-info < expect.next0 &&
+
+	git checkout master dir &&
+
+	test_cmp expect.common dir/common &&
+	test_path_is_file dir/master &&
+	git diff --exit-code master dir/master &&
+	git ls-files -s dir/next0 >actual.next0
+'
+
 test_done
-- 
1.8.2.82.gc24b958

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH v3] checkout: avoid unnecessary match_pathspec calls
  2013-03-27  5:58       ` [PATCH v3] " Nguyễn Thái Ngọc Duy
@ 2013-03-28 22:32         ` Junio C Hamano
  0 siblings, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2013-03-28 22:32 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> diff --git a/t/t2022-checkout-paths.sh b/t/t2022-checkout-paths.sh
> index 56090d2..5e01d58 100755
> --- a/t/t2022-checkout-paths.sh
> +++ b/t/t2022-checkout-paths.sh
> @@ -39,4 +39,25 @@ test_expect_success 'checking out paths out of a tree does not clobber unrelated
>  	test_cmp expect.next2 dir/next2
>  '
>  
> +test_expect_success 'do not touch unmerged entries matching $path but not in $tree' '
> +	git checkout next &&
> +	git reset --hard &&
> +
> +	cat dir/common >expect.common &&
> +	EMPTY_SHA1=e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 &&

	EMPTY_SHA1=$(git hash-object -w --stdin </dev/null)

> +	git rm dir/next0 &&
> +	cat >expect.next0<<EOF &&
> +100644 $EMPTY_SHA1 1	dir/next0
> +100644 $EMPTY_SHA1 2	dir/next0
> +EOF
> +	git update-index --index-info < expect.next0 &&

	cat >expect.next0 <<-EOF &&
        100644 $EMPTY_SHA1 1 dir/next0
        100644 $EMPTY_SHA1 2 dir/next0
	EOF
	git update-index --index-info <expect.next0 &&

> +
> +	git checkout master dir &&
> +
> +	test_cmp expect.common dir/common &&
> +	test_path_is_file dir/master &&
> +	git diff --exit-code master dir/master &&
> +	git ls-files -s dir/next0 >actual.next0
> +'

... and actual.next0 is checked against what?

Ending this test with

	git ls-files -s dir/next0 >actual.next0 &&
	test_cmp expect.next0 actual.next0

would be sufficient, methinks.

Will replace v2 with the above fixups.  Thanks.

> +
>  test_done

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-03-28 22:32 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-23 10:55 [PATCH] checkout: avoid unncessary match_pathspec calls Nguyễn Thái Ngọc Duy
2013-03-24  2:45 ` Eric Sunshine
2013-03-24  6:47 ` Junio C Hamano
2013-03-24 12:55   ` [PATCH v2] checkout: avoid unnecessary " Nguyễn Thái Ngọc Duy
2013-03-25 16:26     ` Junio C Hamano
2013-03-27  5:58       ` [PATCH v3] " Nguyễn Thái Ngọc Duy
2013-03-28 22:32         ` Junio C Hamano

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.