* [PATCH v2] untracked-cache: fix subdirectory handling @ 2015-08-07 17:17 David Turner 2015-08-15 7:32 ` Duy Nguyen 0 siblings, 1 reply; 2+ messages in thread From: David Turner @ 2015-08-07 17:17 UTC (permalink / raw) To: git, pclouds; +Cc: David Turner Previously, some calls lookup_untracked would pass a full path. But lookup_untracked assumes that the portion of the path up to and including to the untracked_cache_dir has been removed. So lookup_untracked would be looking in the untracked_cache for 'foo' for 'foo/bar' (instead of just looking for 'bar'). This would cause untracked cache corruption. To fix this, untracked_cache_dir learns to track its depth. Callers to lookup_untracked which have a full path now first trim off a sufficient number of path prefixes. A new helper function, lookup_untracked_recursive, helps untracked_cache_invalidate_path to perform this operation. Signed-off-by: David Turner <dturner@twopensource.com> --- This version removes debugging cruft. Oops! --- dir.c | 50 ++++++++++++++++++++------- dir.h | 1 + t/t7063-status-untracked-cache.sh | 72 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 110 insertions(+), 13 deletions(-) diff --git a/dir.c b/dir.c index e7b89fe..314080b 100644 --- a/dir.c +++ b/dir.c @@ -631,6 +631,7 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, memset(d, 0, sizeof(*d)); memcpy(d->name, name, len); d->name[len] = '\0'; + d->depth = dir->depth + 1; ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc); memmove(dir->dirs + first + 1, dir->dirs + first, @@ -1324,7 +1325,19 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return exclude ? path_excluded : path_untracked; - untracked = lookup_untracked(dir->untracked, untracked, dirname, len); + if (untracked) { + const char *cur = dirname; + int i; + + for (i = 0; i < untracked->depth; i++) { + cur = strchr(cur, '/'); + assert(cur); + cur++; + } + untracked = lookup_untracked(dir->untracked, untracked, + cur, + len - (cur - dirname)); + } return read_directory_recursive(dir, dirname, len, untracked, 1, simplify); } @@ -2431,7 +2444,7 @@ static void stat_data_from_disk(struct stat_data *to, const struct stat_data *fr } static int read_one_dir(struct untracked_cache_dir **untracked_, - struct read_data *rd) + struct read_data *rd, int depth) { struct untracked_cache_dir ud, *untracked; const unsigned char *next, *data = rd->data, *end = rd->end; @@ -2444,6 +2457,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_, value = decode_varint(&next); if (next > end) return -1; + ud.depth = depth; ud.recurse = 1; ud.untracked_alloc = value; ud.untracked_nr = value; @@ -2480,7 +2494,7 @@ static int read_one_dir(struct untracked_cache_dir **untracked_, rd->data = data; for (i = 0; i < untracked->dirs_nr; i++) { - len = read_one_dir(untracked->dirs + i, rd); + len = read_one_dir(untracked->dirs + i, rd, depth + 1); if (len < 0) return -1; } @@ -2577,7 +2591,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long rd.index = 0; rd.ucd = xmalloc(sizeof(*rd.ucd) * len); - if (read_one_dir(&uc->root, &rd) || rd.index != len) + if (read_one_dir(&uc->root, &rd, 0) || rd.index != len) goto done; next = rd.data; @@ -2614,20 +2628,32 @@ done2: return uc; } +static struct untracked_cache_dir *lookup_untracked_recursive( + struct untracked_cache *uc, struct untracked_cache_dir *dir, + const char *path, int len) +{ + const char *rest = strchr(path, '/'); + + if (rest) { + int component_len = rest - path; + struct untracked_cache_dir *d = + lookup_untracked(uc, dir, path, component_len); + return lookup_untracked_recursive(uc, d, rest + 1, + len - (component_len + 1)); + } else { + return dir; + } +} + void untracked_cache_invalidate_path(struct index_state *istate, const char *path) { - const char *sep; struct untracked_cache_dir *d; if (!istate->untracked || !istate->untracked->root) return; - sep = strrchr(path, '/'); - if (sep) - d = lookup_untracked(istate->untracked, - istate->untracked->root, - path, sep - path); - else - d = istate->untracked->root; + d = lookup_untracked_recursive(istate->untracked, + istate->untracked->root, + path, strlen(path)); istate->untracked->dir_invalidated++; d->valid = 0; d->untracked_nr = 0; diff --git a/dir.h b/dir.h index 7b5855d..885ed43 100644 --- a/dir.h +++ b/dir.h @@ -109,6 +109,7 @@ struct sha1_stat { * excludes_file_sha1[]) */ struct untracked_cache_dir { + unsigned int depth; struct untracked_cache_dir **dirs; char **untracked; struct stat_data stat_data; diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh index ff23f4e..ca8dc3a 100755 --- a/t/t7063-status-untracked-cache.sh +++ b/t/t7063-status-untracked-cache.sh @@ -408,7 +408,8 @@ test_expect_success 'set up sparse checkout' ' test_path_is_file done/one ' -test_expect_success 'create files, some of which are gitignored' ' +test_expect_success 'create/modify files, some of which are gitignored' ' + echo two bis >done/two && echo three >done/three && # three is gitignored echo four >done/four && # four is gitignored at a higher level echo five >done/five # five is not gitignored @@ -420,6 +421,7 @@ test_expect_success 'test sparse status with untracked cache' ' GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ git status --porcelain >../status.actual && cat >../status.expect <<EOF && + M done/two ?? .gitignore ?? done/five ?? dtwo/ @@ -459,6 +461,7 @@ test_expect_success 'test sparse status again with untracked cache' ' GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ git status --porcelain >../status.actual && cat >../status.expect <<EOF && + M done/two ?? .gitignore ?? done/five ?? dtwo/ @@ -473,4 +476,71 @@ EOF test_cmp ../trace.expect ../trace ' +test_expect_success 'set up for test of subdir and sparse checkouts' ' + mkdir done/sub && + mkdir done/sub/sub && + echo "sub" > done/sub/sub/file +' + +test_expect_success 'test sparse status with untracked cache and subdir' ' + avoid_racy && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../status.actual && + cat >../status.expect <<EOF && + M done/two +?? .gitignore +?? done/five +?? done/sub/ +?? dtwo/ +EOF + test_cmp ../status.expect ../status.actual && + cat >../trace.expect <<EOF && +node creation: 2 +gitignore invalidation: 0 +directory invalidation: 1 +opendir: 3 +EOF + test_cmp ../trace.expect ../trace +' + +test_expect_success 'verify untracked cache dump (sparse/subdirs)' ' + test-dump-untracked-cache >../actual && + cat >../expect <<EOF && +info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0 +core.excludesfile 0000000000000000000000000000000000000000 +exclude_per_dir .gitignore +flags 00000006 +/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid +.gitignore +dtwo/ +/done/ 1946f0437f90c5005533cbe1736a6451ca301714 recurse valid +five +sub/ +/done/sub/ 0000000000000000000000000000000000000000 recurse check_only valid +sub/ +/done/sub/sub/ 0000000000000000000000000000000000000000 recurse check_only valid +file +/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid +/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid +two +EOF + test_cmp ../expect ../actual +' + +test_expect_success 'test sparse status again with untracked cache and subdir' ' + avoid_racy && + : >../trace && + GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \ + git status --porcelain >../status.actual && + test_cmp ../status.expect ../status.actual && + cat >../trace.expect <<EOF && +node creation: 0 +gitignore invalidation: 0 +directory invalidation: 0 +opendir: 0 +EOF + test_cmp ../trace.expect ../trace +' + test_done -- 2.0.4.315.gad8727a-twtrsrc ^ permalink raw reply related [flat|nested] 2+ messages in thread
* Re: [PATCH v2] untracked-cache: fix subdirectory handling 2015-08-07 17:17 [PATCH v2] untracked-cache: fix subdirectory handling David Turner @ 2015-08-15 7:32 ` Duy Nguyen 0 siblings, 0 replies; 2+ messages in thread From: Duy Nguyen @ 2015-08-15 7:32 UTC (permalink / raw) To: David Turner; +Cc: git First of all, sorry for my long silence. I'm going through my git inbox now. > diff --git a/dir.c b/dir.c > index e7b89fe..314080b 100644 > --- a/dir.c > +++ b/dir.c > @@ -631,6 +631,7 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc, > memset(d, 0, sizeof(*d)); > memcpy(d->name, name, len); > d->name[len] = '\0'; > + d->depth = dir->depth + 1; > > ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc); > memmove(dir->dirs + first + 1, dir->dirs + first, > @@ -1324,7 +1325,19 @@ static enum path_treatment treat_directory(struct dir_struct *dir, > if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) > return exclude ? path_excluded : path_untracked; > > - untracked = lookup_untracked(dir->untracked, untracked, dirname, len); > + if (untracked) { > + const char *cur = dirname; > + int i; > + > + for (i = 0; i < untracked->depth; i++) { > + cur = strchr(cur, '/'); > + assert(cur); > + cur++; > + } > + untracked = lookup_untracked(dir->untracked, untracked, > + cur, > + len - (cur - dirname)); > + } If I read it correctly, the call chain is, treat_path() reconstructs full path, then it calls treat_one_path -> treat_directory -> lookup_untracked. In the same treat_path(), we may also call treat_fast_path -> read_directory_recursive -> lookup_untracked. In this call chain, we retain the "baselen" and we can exclude the base path before passing it to lookup_untracked(). So instead of adding "depth" (and spending some more cycles cutting path components), perhaps we can do the same for the first call chain, passing baselen to treat_one_path and treat_directory? Something like this passes your new tests -- 8< -- diff --git a/dir.c b/dir.c index 07a6642..a4c52bf 100644 --- a/dir.c +++ b/dir.c @@ -1297,7 +1297,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len) */ static enum path_treatment treat_directory(struct dir_struct *dir, struct untracked_cache_dir *untracked, - const char *dirname, int len, int exclude, + const char *dirname, int len, int baselen, int exclude, const struct path_simplify *simplify) { /* The "len-1" is to strip the final '/' */ @@ -1324,7 +1324,8 @@ static enum path_treatment treat_directory(struct dir_struct *dir, if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES)) return exclude ? path_excluded : path_untracked; - untracked = lookup_untracked(dir->untracked, untracked, dirname, len); + untracked = lookup_untracked(dir->untracked, untracked, + dirname + baselen, len - baselen); return read_directory_recursive(dir, dirname, len, untracked, 1, simplify); } @@ -1444,6 +1445,7 @@ static int get_dtype(struct dirent *de, const char *path, int len) static enum path_treatment treat_one_path(struct dir_struct *dir, struct untracked_cache_dir *untracked, struct strbuf *path, + int baselen, const struct path_simplify *simplify, int dtype, struct dirent *de) { @@ -1495,8 +1497,8 @@ static enum path_treatment treat_one_path(struct dir_struct *dir, return path_none; case DT_DIR: strbuf_addch(path, '/'); - return treat_directory(dir, untracked, path->buf, path->len, exclude, - simplify); + return treat_directory(dir, untracked, path->buf, path->len, + baselen, exclude, simplify); case DT_REG: case DT_LNK: return exclude ? path_excluded : path_untracked; @@ -1557,7 +1559,7 @@ static enum path_treatment treat_path(struct dir_struct *dir, return path_none; dtype = DTYPE(de); - return treat_one_path(dir, untracked, path, simplify, dtype, de); + return treat_one_path(dir, untracked, path, baselen, simplify, dtype, de); } static void add_untracked(struct untracked_cache_dir *dir, const char *name) @@ -1827,7 +1829,7 @@ static int treat_leading_path(struct dir_struct *dir, break; if (simplify_away(sb.buf, sb.len, simplify)) break; - if (treat_one_path(dir, NULL, &sb, simplify, + if (treat_one_path(dir, NULL, &sb, baselen, simplify, DT_DIR, NULL) == path_none) break; /* do not recurse into it */ if (len <= baselen) { -- 8< -- > +static struct untracked_cache_dir *lookup_untracked_recursive( > + struct untracked_cache *uc, struct untracked_cache_dir *dir, > + const char *path, int len) > +{ > + const char *rest = strchr(path, '/'); > + > + if (rest) { > + int component_len = rest - path; > + struct untracked_cache_dir *d = > + lookup_untracked(uc, dir, path, component_len); > + return lookup_untracked_recursive(uc, d, rest + 1, > + len - (component_len + 1)); > + } else { > + return dir; > + } > +} > + > void untracked_cache_invalidate_path(struct index_state *istate, > const char *path) > { > - const char *sep; > struct untracked_cache_dir *d; > if (!istate->untracked || !istate->untracked->root) > return; > - sep = strrchr(path, '/'); > - if (sep) > - d = lookup_untracked(istate->untracked, > - istate->untracked->root, > - path, sep - path); > - else > - d = istate->untracked->root; > + d = lookup_untracked_recursive(istate->untracked, > + istate->untracked->root, > + path, strlen(path)); This is totally my fault. It's so obvious, how could I miss it.. Thanks for fixing. By the way, while running the tests, I noticed that the test "set up sparse checkout" in t7063 performs the time-consuming "Testing ...... OK" again. At this point in the test suite, you are already certain the underlying filesystem supports untracked-cached, maybe do this to reduce test time for people who do not run tests in parallel? s/update-index --untracked-cache/update-index --force-untracked-cache/ -- Duy ^ permalink raw reply related [flat|nested] 2+ messages in thread
end of thread, other threads:[~2015-08-15 7:30 UTC | newest] Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2015-08-07 17:17 [PATCH v2] untracked-cache: fix subdirectory handling David Turner 2015-08-15 7:32 ` Duy Nguyen
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).