git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH v2] untracked-cache: fix subdirectory handling
@ 2015-08-07 17:17 David Turner
  2015-08-15  7:32 ` Duy Nguyen
  0 siblings, 1 reply; 2+ messages in thread
From: David Turner @ 2015-08-07 17:17 UTC (permalink / raw)
  To: git, pclouds; +Cc: David Turner

Previously, some calls lookup_untracked would pass a full path.  But
lookup_untracked assumes that the portion of the path up to and
including to the untracked_cache_dir has been removed.  So
lookup_untracked would be looking in the untracked_cache for 'foo' for
'foo/bar' (instead of just looking for 'bar').  This would cause
untracked cache corruption.

To fix this, untracked_cache_dir learns to track its depth.  Callers
to lookup_untracked which have a full path now first trim off a
sufficient number of path prefixes.  A new helper function,
lookup_untracked_recursive, helps untracked_cache_invalidate_path to
perform this operation.

Signed-off-by: David Turner <dturner@twopensource.com>
---
This version removes debugging cruft.  Oops!

---
 dir.c                             | 50 ++++++++++++++++++++-------
 dir.h                             |  1 +
 t/t7063-status-untracked-cache.sh | 72 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/dir.c b/dir.c
index e7b89fe..314080b 100644
--- a/dir.c
+++ b/dir.c
@@ -631,6 +631,7 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 	memset(d, 0, sizeof(*d));
 	memcpy(d->name, name, len);
 	d->name[len] = '\0';
+	d->depth = dir->depth + 1;
 
 	ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
 	memmove(dir->dirs + first + 1, dir->dirs + first,
@@ -1324,7 +1325,19 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return exclude ? path_excluded : path_untracked;
 
-	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+	if (untracked) {
+		const char *cur = dirname;
+		int i;
+
+		for (i = 0; i < untracked->depth; i++) {
+			cur = strchr(cur, '/');
+			assert(cur);
+			cur++;
+		}
+		untracked = lookup_untracked(dir->untracked, untracked,
+					     cur,
+					     len - (cur - dirname));
+	}
 	return read_directory_recursive(dir, dirname, len,
 					untracked, 1, simplify);
 }
@@ -2431,7 +2444,7 @@ static void stat_data_from_disk(struct stat_data *to, const struct stat_data *fr
 }
 
 static int read_one_dir(struct untracked_cache_dir **untracked_,
-			struct read_data *rd)
+			struct read_data *rd, int depth)
 {
 	struct untracked_cache_dir ud, *untracked;
 	const unsigned char *next, *data = rd->data, *end = rd->end;
@@ -2444,6 +2457,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	value = decode_varint(&next);
 	if (next > end)
 		return -1;
+	ud.depth = depth;
 	ud.recurse	   = 1;
 	ud.untracked_alloc = value;
 	ud.untracked_nr	   = value;
@@ -2480,7 +2494,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	rd->data = data;
 
 	for (i = 0; i < untracked->dirs_nr; i++) {
-		len = read_one_dir(untracked->dirs + i, rd);
+		len = read_one_dir(untracked->dirs + i, rd, depth + 1);
 		if (len < 0)
 			return -1;
 	}
@@ -2577,7 +2591,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	rd.index      = 0;
 	rd.ucd        = xmalloc(sizeof(*rd.ucd) * len);
 
-	if (read_one_dir(&uc->root, &rd) || rd.index != len)
+	if (read_one_dir(&uc->root, &rd, 0) || rd.index != len)
 		goto done;
 
 	next = rd.data;
@@ -2614,20 +2628,32 @@ done2:
 	return uc;
 }
 
+static struct untracked_cache_dir *lookup_untracked_recursive(
+	struct untracked_cache *uc, struct untracked_cache_dir *dir,
+	const char *path, int len)
+{
+	const char *rest = strchr(path, '/');
+
+	if (rest) {
+		int component_len = rest - path;
+		struct untracked_cache_dir *d =
+			lookup_untracked(uc, dir, path, component_len);
+		return lookup_untracked_recursive(uc, d, rest + 1,
+						  len - (component_len + 1));
+	} else {
+		return dir;
+	}
+}
+
 void untracked_cache_invalidate_path(struct index_state *istate,
 				     const char *path)
 {
-	const char *sep;
 	struct untracked_cache_dir *d;
 	if (!istate->untracked || !istate->untracked->root)
 		return;
-	sep = strrchr(path, '/');
-	if (sep)
-		d = lookup_untracked(istate->untracked,
-				     istate->untracked->root,
-				     path, sep - path);
-	else
-		d = istate->untracked->root;
+	d = lookup_untracked_recursive(istate->untracked,
+				       istate->untracked->root,
+				       path, strlen(path));
 	istate->untracked->dir_invalidated++;
 	d->valid = 0;
 	d->untracked_nr = 0;
diff --git a/dir.h b/dir.h
index 7b5855d..885ed43 100644
--- a/dir.h
+++ b/dir.h
@@ -109,6 +109,7 @@ struct sha1_stat {
  *  excludes_file_sha1[])
  */
 struct untracked_cache_dir {
+	unsigned int depth;
 	struct untracked_cache_dir **dirs;
 	char **untracked;
 	struct stat_data stat_data;
diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh
index ff23f4e..ca8dc3a 100755
--- a/t/t7063-status-untracked-cache.sh
+++ b/t/t7063-status-untracked-cache.sh
@@ -408,7 +408,8 @@ test_expect_success 'set up sparse checkout' '
 	test_path_is_file done/one
 '
 
-test_expect_success 'create files, some of which are gitignored' '
+test_expect_success 'create/modify files, some of which are gitignored' '
+	echo two bis >done/two &&
 	echo three >done/three && # three is gitignored
 	echo four >done/four && # four is gitignored at a higher level
 	echo five >done/five # five is not gitignored
@@ -420,6 +421,7 @@ test_expect_success 'test sparse status with untracked cache' '
 	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
 	git status --porcelain >../status.actual &&
 	cat >../status.expect <<EOF &&
+ M done/two
 ?? .gitignore
 ?? done/five
 ?? dtwo/
@@ -459,6 +461,7 @@ test_expect_success 'test sparse status again with untracked cache' '
 	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
 	git status --porcelain >../status.actual &&
 	cat >../status.expect <<EOF &&
+ M done/two
 ?? .gitignore
 ?? done/five
 ?? dtwo/
@@ -473,4 +476,71 @@ EOF
 	test_cmp ../trace.expect ../trace
 '
 
+test_expect_success 'set up for test of subdir and sparse checkouts' '
+	mkdir done/sub &&
+	mkdir done/sub/sub &&
+	echo "sub" > done/sub/sub/file
+'
+
+test_expect_success 'test sparse status with untracked cache and subdir' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../status.actual &&
+	cat >../status.expect <<EOF &&
+ M done/two
+?? .gitignore
+?? done/five
+?? done/sub/
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../status.actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 2
+gitignore invalidation: 0
+directory invalidation: 1
+opendir: 3
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump (sparse/subdirs)' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 1946f0437f90c5005533cbe1736a6451ca301714 recurse valid
+five
+sub/
+/done/sub/ 0000000000000000000000000000000000000000 recurse check_only valid
+sub/
+/done/sub/sub/ 0000000000000000000000000000000000000000 recurse check_only valid
+file
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'test sparse status again with untracked cache and subdir' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../status.actual &&
+	test_cmp ../status.expect ../status.actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 0
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
 test_done
-- 
2.0.4.315.gad8727a-twtrsrc

^ permalink raw reply related	[flat|nested] 2+ messages in thread

* Re: [PATCH v2] untracked-cache: fix subdirectory handling
  2015-08-07 17:17 [PATCH v2] untracked-cache: fix subdirectory handling David Turner
@ 2015-08-15  7:32 ` Duy Nguyen
  0 siblings, 0 replies; 2+ messages in thread
From: Duy Nguyen @ 2015-08-15  7:32 UTC (permalink / raw)
  To: David Turner; +Cc: git

First of all, sorry for my long silence. I'm going through my git
inbox now.

> diff --git a/dir.c b/dir.c
> index e7b89fe..314080b 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -631,6 +631,7 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
>  	memset(d, 0, sizeof(*d));
>  	memcpy(d->name, name, len);
>  	d->name[len] = '\0';
> +	d->depth = dir->depth + 1;
>  
>  	ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
>  	memmove(dir->dirs + first + 1, dir->dirs + first,
> @@ -1324,7 +1325,19 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
>  	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
>  		return exclude ? path_excluded : path_untracked;
>  
> -	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
> +	if (untracked) {
> +		const char *cur = dirname;
> +		int i;
> +
> +		for (i = 0; i < untracked->depth; i++) {
> +			cur = strchr(cur, '/');
> +			assert(cur);
> +			cur++;
> +		}
> +		untracked = lookup_untracked(dir->untracked, untracked,
> +					     cur,
> +					     len - (cur - dirname));
> +	}

If I read it correctly, the call chain is, treat_path() reconstructs
full path, then it calls treat_one_path -> treat_directory ->
lookup_untracked.

In the same treat_path(), we may also call treat_fast_path ->
read_directory_recursive -> lookup_untracked. In this call chain, we
retain the "baselen" and we can exclude the base path before passing
it to lookup_untracked().

So instead of adding "depth" (and spending some more cycles cutting
path components), perhaps we can do the same for the first call chain,
passing baselen to treat_one_path and treat_directory?  Something like
this passes your new tests

-- 8< --
diff --git a/dir.c b/dir.c
index 07a6642..a4c52bf 100644
--- a/dir.c
+++ b/dir.c
@@ -1297,7 +1297,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
 	struct untracked_cache_dir *untracked,
-	const char *dirname, int len, int exclude,
+	const char *dirname, int len, int baselen, int exclude,
 	const struct path_simplify *simplify)
 {
 	/* The "len-1" is to strip the final '/' */
@@ -1324,7 +1324,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return exclude ? path_excluded : path_untracked;
 
-	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+	untracked = lookup_untracked(dir->untracked, untracked,
+				     dirname + baselen, len - baselen);
 	return read_directory_recursive(dir, dirname, len,
 					untracked, 1, simplify);
 }
@@ -1444,6 +1445,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
 static enum path_treatment treat_one_path(struct dir_struct *dir,
 					  struct untracked_cache_dir *untracked,
 					  struct strbuf *path,
+					  int baselen,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
 {
@@ -1495,8 +1497,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 		return path_none;
 	case DT_DIR:
 		strbuf_addch(path, '/');
-		return treat_directory(dir, untracked, path->buf, path->len, exclude,
-			simplify);
+		return treat_directory(dir, untracked, path->buf, path->len,
+				       baselen, exclude, simplify);
 	case DT_REG:
 	case DT_LNK:
 		return exclude ? path_excluded : path_untracked;
@@ -1557,7 +1559,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_none;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, untracked, path, simplify, dtype, de);
+	return treat_one_path(dir, untracked, path, baselen, simplify, dtype, de);
 }
 
 static void add_untracked(struct untracked_cache_dir *dir, const char *name)
@@ -1827,7 +1829,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, NULL, &sb, simplify,
+		if (treat_one_path(dir, NULL, &sb, baselen, simplify,
 				   DT_DIR, NULL) == path_none)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
-- 8< --

> +static struct untracked_cache_dir *lookup_untracked_recursive(
> +	struct untracked_cache *uc, struct untracked_cache_dir *dir,
> +	const char *path, int len)
> +{
> +	const char *rest = strchr(path, '/');
> +
> +	if (rest) {
> +		int component_len = rest - path;
> +		struct untracked_cache_dir *d =
> +			lookup_untracked(uc, dir, path, component_len);
> +		return lookup_untracked_recursive(uc, d, rest + 1,
> +						  len - (component_len + 1));
> +	} else {
> +		return dir;
> +	}
> +}
> +
>  void untracked_cache_invalidate_path(struct index_state *istate,
>  				     const char *path)
>  {
> -	const char *sep;
>  	struct untracked_cache_dir *d;
>  	if (!istate->untracked || !istate->untracked->root)
>  		return;
> -	sep = strrchr(path, '/');
> -	if (sep)
> -		d = lookup_untracked(istate->untracked,
> -				     istate->untracked->root,
> -				     path, sep - path);
> -	else
> -		d = istate->untracked->root;
> +	d = lookup_untracked_recursive(istate->untracked,
> +				       istate->untracked->root,
> +				       path, strlen(path));

This is totally my fault. It's so obvious, how could I miss
it.. Thanks for fixing.

By the way, while running the tests, I noticed that the test "set up
sparse checkout" in t7063 performs the time-consuming "Testing
...... OK" again. At this point in the test suite, you are already
certain the underlying filesystem supports untracked-cached, maybe do
this to reduce test time for people who do not run tests in parallel?

s/update-index --untracked-cache/update-index --force-untracked-cache/

--
Duy

^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2015-08-15  7:30 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-07 17:17 [PATCH v2] untracked-cache: fix subdirectory handling David Turner
2015-08-15  7:32 ` Duy Nguyen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).