All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/24] nd/untracked-cache updates
@ 2015-02-08  8:55 Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
                   ` (23 more replies)
  0 siblings, 24 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Code changes are all in 20/24, to avoid hard coding the test path. The
rest is documentation changes.

-- 8<--
diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index 7850f53..4dcad4e 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -59,7 +59,7 @@ shown (i.e. the same as specifying `normal`), to help you avoid
 forgetting to add newly created files.  Because it takes extra work
 to find untracked files in the filesystem, this mode may take some
 time in a large working tree.
-Consider to enable untracked cache and split index if supported (see
+Consider enabling untracked cache and split index if supported (see
 `git update-index --untracked-cache` and `git update-index
 --split-index`), Otherwise you can use `no` to have `git status`
 return more quickly without showing untracked files.
diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index 0045b89..e24b4bc 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -269,9 +269,9 @@ Git index format
   - A number of directory blocks in depth-first-search order, each
     consists of
 
-    - The number of untracked entries, variable witdh encoding.
+    - The number of untracked entries, variable width encoding.
 
-    - The number of sub-directory blocks, variable with encoding.
+    - The number of sub-directory blocks, variable width encoding.
 
     - The directory name terminated by NUL.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index e76740d..fc5e108 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -32,6 +32,7 @@ static int mark_valid_only;
 static int mark_skip_worktree_only;
 #define MARK_FLAG 1
 #define UNMARK_FLAG 2
+static struct strbuf mtime_dir = STRBUF_INIT;
 
 __attribute__((format (printf, 1, 2)))
 static void report(const char *fmt, ...)
@@ -49,28 +50,37 @@ static void report(const char *fmt, ...)
 
 static void remove_test_directory(void)
 {
-	struct strbuf sb = STRBUF_INIT;
-	strbuf_addstr(&sb, "dir-mtime-test");
-	remove_dir_recursively(&sb, 0);
-	strbuf_release(&sb);
+	if (mtime_dir.len)
+		remove_dir_recursively(&mtime_dir, 0);
+}
+
+static const char *get_mtime_path(const char *path)
+{
+	static struct strbuf sb = STRBUF_INIT;
+	strbuf_reset(&sb);
+	strbuf_addf(&sb, "%s/%s", mtime_dir.buf, path);
+	return sb.buf;
 }
 
 static void xmkdir(const char *path)
 {
+	path = get_mtime_path(path);
 	if (mkdir(path, 0700))
 		die_errno(_("failed to create directory %s"), path);
 }
 
-static int xstat(const char *path, struct stat *st)
+static int xstat_mtime_dir(struct stat *st)
 {
-	if (stat(path, st))
-		die_errno(_("failed to stat %s"), path);
+	if (stat(mtime_dir.buf, st))
+		die_errno(_("failed to stat %s"), mtime_dir.buf);
 	return 0;
 }
 
 static int create_file(const char *path)
 {
-	int fd = open(path, O_CREAT | O_RDWR, 0644);
+	int fd;
+	path = get_mtime_path(path);
+	fd = open(path, O_CREAT | O_RDWR, 0644);
 	if (fd < 0)
 		die_errno(_("failed to create file %s"), path);
 	return fd;
@@ -78,12 +88,14 @@ static int create_file(const char *path)
 
 static void xunlink(const char *path)
 {
+	path = get_mtime_path(path);
 	if (unlink(path))
 		die_errno(_("failed to delete file %s"), path);
 }
 
 static void xrmdir(const char *path)
 {
+	path = get_mtime_path(path);
 	if (rmdir(path))
 		die_errno(_("failed to delete directory %s"), path);
 }
@@ -102,37 +114,40 @@ static int test_if_untracked_cache_is_supported(void)
 {
 	struct stat st;
 	struct stat_data base;
-	int fd;
+	int fd, ret = 0;
+
+	strbuf_addstr(&mtime_dir, "mtime-test-XXXXXX");
+	if (!mkdtemp(mtime_dir.buf))
+		die_errno("Could not make temporary directory");
 
 	fprintf(stderr, _("Testing "));
-	xmkdir("dir-mtime-test");
 	atexit(remove_test_directory);
-	xstat("dir-mtime-test", &st);
+	xstat_mtime_dir(&st);
 	fill_stat_data(&base, &st);
 	fputc('.', stderr);
 
 	avoid_racy();
-	fd = create_file("dir-mtime-test/newfile");
-	xstat("dir-mtime-test", &st);
+	fd = create_file("newfile");
+	xstat_mtime_dir(&st);
 	if (!match_stat_data(&base, &st)) {
 		close(fd);
 		fputc('\n', stderr);
 		fprintf_ln(stderr,_("directory stat info does not "
 				    "change after adding a new file"));
-		return 0;
+		goto done;
 	}
 	fill_stat_data(&base, &st);
 	fputc('.', stderr);
 
 	avoid_racy();
-	xmkdir("dir-mtime-test/new-dir");
-	xstat("dir-mtime-test", &st);
+	xmkdir("new-dir");
+	xstat_mtime_dir(&st);
 	if (!match_stat_data(&base, &st)) {
 		close(fd);
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info does not change "
 				     "after adding a new directory"));
-		return 0;
+		goto done;
 	}
 	fill_stat_data(&base, &st);
 	fputc('.', stderr);
@@ -140,52 +155,57 @@ static int test_if_untracked_cache_is_supported(void)
 	avoid_racy();
 	write_or_die(fd, "data", 4);
 	close(fd);
-	xstat("dir-mtime-test", &st);
+	xstat_mtime_dir(&st);
 	if (match_stat_data(&base, &st)) {
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info changes "
 				     "after updating a file"));
-		return 0;
+		goto done;
 	}
 	fputc('.', stderr);
 
 	avoid_racy();
-	close(create_file("dir-mtime-test/new-dir/new"));
-	xstat("dir-mtime-test", &st);
+	close(create_file("new-dir/new"));
+	xstat_mtime_dir(&st);
 	if (match_stat_data(&base, &st)) {
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info changes after "
 				     "adding a file inside subdirectory"));
-		return 0;
+		goto done;
 	}
 	fputc('.', stderr);
 
 	avoid_racy();
-	xunlink("dir-mtime-test/newfile");
-	xstat("dir-mtime-test", &st);
+	xunlink("newfile");
+	xstat_mtime_dir(&st);
 	if (!match_stat_data(&base, &st)) {
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info does not "
 				     "change after deleting a file"));
-		return 0;
+		goto done;
 	}
 	fill_stat_data(&base, &st);
 	fputc('.', stderr);
 
 	avoid_racy();
-	xunlink("dir-mtime-test/new-dir/new");
-	xrmdir("dir-mtime-test/new-dir");
-	xstat("dir-mtime-test", &st);
+	xunlink("new-dir/new");
+	xrmdir("new-dir");
+	xstat_mtime_dir(&st);
 	if (!match_stat_data(&base, &st)) {
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info does not "
 				     "change after deleting a directory"));
-		return 0;
+		goto done;
 	}
 
-	xrmdir("dir-mtime-test");
+	if (rmdir(mtime_dir.buf))
+		die_errno(_("failed to delete directory %s"), mtime_dir.buf);
 	fprintf_ln(stderr, _(" OK"));
-	return 1;
+	ret = 1;
+
+done:
+	strbuf_release(&mtime_dir);
+	return ret;
 }
 
 static int mark_ce_flags(const char *path, int flag, int mark)
-- 8<--
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-11 21:23   ` Junio C Hamano
  2015-02-08  8:55 ` [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
                   ` (22 subsequent siblings)
  23 siblings, 1 reply; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

This is not used anywhere yet. But the goal is to compare quickly if a
.gitignore file has changed when we have the SHA-1 of both old (cached
somewhere) and new (from index or a tree) versions.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
 dir.h |  6 ++++++
 2 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/dir.c b/dir.c
index fcb6872..4cc936b 100644
--- a/dir.c
+++ b/dir.c
@@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base,
 	x->el = el;
 }
 
-static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
+static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
+						struct sha1_stat *sha1_stat)
 {
 	int pos, len;
 	unsigned long sz;
@@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 		return NULL;
 	}
 	*size = xsize_t(sz);
+	if (sha1_stat) {
+		memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
+		hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+	}
 	return data;
 }
 
@@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf)
 		*last_space = '\0';
 }
 
-int add_excludes_from_file_to_list(const char *fname,
-				   const char *base,
-				   int baselen,
-				   struct exclude_list *el,
-				   int check_index)
+/*
+ * Given a file with name "fname", read it (either from disk, or from
+ * the index if "check_index" is non-zero), parse it and store the
+ * exclude rules in "el".
+ *
+ * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
+ * stat data from disk (only valid if add_excludes returns zero). If
+ * ss_valid is non-zero, "ss" must contain good value as input.
+ */
+static int add_excludes(const char *fname, const char *base, int baselen,
+			struct exclude_list *el, int check_index,
+			struct sha1_stat *sha1_stat)
 {
 	struct stat st;
 	int fd, i, lineno = 1;
@@ -547,7 +559,7 @@ int add_excludes_from_file_to_list(const char *fname,
 		if (0 <= fd)
 			close(fd);
 		if (!check_index ||
-		    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
+		    (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL)
 			return -1;
 		if (size == 0) {
 			free(buf);
@@ -560,6 +572,11 @@ int add_excludes_from_file_to_list(const char *fname,
 	} else {
 		size = xsize_t(st.st_size);
 		if (size == 0) {
+			if (sha1_stat) {
+				fill_stat_data(&sha1_stat->stat, &st);
+				hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN);
+				sha1_stat->valid = 1;
+			}
 			close(fd);
 			return 0;
 		}
@@ -571,6 +588,21 @@ int add_excludes_from_file_to_list(const char *fname,
 		}
 		buf[size++] = '\n';
 		close(fd);
+		if (sha1_stat) {
+			int pos;
+			if (sha1_stat->valid &&
+			    !match_stat_data(&sha1_stat->stat, &st))
+				; /* no content change, ss->sha1 still good */
+			else if (check_index &&
+				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
+				 !ce_stage(active_cache[pos]) &&
+				 ce_uptodate(active_cache[pos]))
+				hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+			else
+				hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
+			fill_stat_data(&sha1_stat->stat, &st);
+			sha1_stat->valid = 1;
+		}
 	}
 
 	el->filebuf = buf;
@@ -589,6 +621,13 @@ int add_excludes_from_file_to_list(const char *fname,
 	return 0;
 }
 
+int add_excludes_from_file_to_list(const char *fname, const char *base,
+				   int baselen, struct exclude_list *el,
+				   int check_index)
+{
+	return add_excludes(fname, base, baselen, el, check_index, NULL);
+}
+
 struct exclude_list *add_exclude_list(struct dir_struct *dir,
 				      int group_type, const char *src)
 {
diff --git a/dir.h b/dir.h
index 6c45e9d..cdca71b 100644
--- a/dir.h
+++ b/dir.h
@@ -73,6 +73,12 @@ struct exclude_list_group {
 	struct exclude_list *el;
 };
 
+struct sha1_stat {
+	struct stat_data stat;
+	unsigned char sha1[20];
+	int valid;
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 03/24] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
                   ` (21 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

The idea is if we can capture all input and (non-rescursive) output of
read_directory_recursive(), and can verify later that all the input is
the same, then the second r_d_r() should produce the same output as in
the first run.

The requirement for this to work is stat info of a directory MUST
change if an entry is added to or removed from that directory (and
should not change often otherwise). If your OS and filesystem do not
meet this requirement, untracked cache is not for you. Most file
systems on *nix should be fine. On Windows, NTFS is fine while FAT may
not be [1] even though FAT on Linux seems to be fine.

The list of input of r_d_r() is in the big comment block in dir.h. In
short, the output of a directory (not counting subdirs) mainly depends
on stat info of the directory in question, all .gitignore leading to
it and the check_only flag when r_d_r() is called recursively. This
patch records all this info (and the output) as r_d_r() runs.

Two hash_sha1_file() are required for $GIT_DIR/info/exclude and
core.excludesfile unless their stat data matches. hash_sha1_file() is
only needed when .gitignore files in the worktree are modified,
otherwise their SHA-1 in index is used (see the previous patch).

We could store stat data for .gitignore files so we don't have to
rehash them if their content is different from index, but I think
.gitignore files are rarely modified, so not worth extra cache data
(and hashing penalty read-cache.c:verify_hdr(), as we will be storing
this as an index extension).

The implication is, if you change .gitignore, you better add it to the
index soon or you lose all the benefit of untracked cache because a
modified .gitignore invalidates all subdirs recursively. This is
especially bad for .gitignore at root.

This cached output is about untracked files only, not ignored files
because the number of tracked files is usually small, so small cache
overhead, while the number of ignored files could go really high
(e.g. *.o files mixing with source code).

[1] "Description of NTFS date and time stamps for files and folders"
    http://support.microsoft.com/kb/299648

Helped-by: Torsten Bögershausen <tboegi@web.de>
Helped-by: David Turner <dturner@twopensource.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 142 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 dir.h |  60 ++++++++++++++++++++++++++++
 2 files changed, 183 insertions(+), 19 deletions(-)

diff --git a/dir.c b/dir.c
index 4cc936b..27734f0 100644
--- a/dir.c
+++ b/dir.c
@@ -32,7 +32,7 @@ enum path_treatment {
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
-	const char *path, int len,
+	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
@@ -535,6 +535,54 @@ static void trim_trailing_spaces(char *buf)
 }
 
 /*
+ * Given a subdirectory name and "dir" of the current directory,
+ * search the subdir in "dir" and return it, or create a new one if it
+ * does not exist in "dir".
+ *
+ * If "name" has the trailing slash, it'll be excluded in the search.
+ */
+static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+						    struct untracked_cache_dir *dir,
+						    const char *name, int len)
+{
+	int first, last;
+	struct untracked_cache_dir *d;
+	if (!dir)
+		return NULL;
+	if (len && name[len - 1] == '/')
+		len--;
+	first = 0;
+	last = dir->dirs_nr;
+	while (last > first) {
+		int cmp, next = (last + first) >> 1;
+		d = dir->dirs[next];
+		cmp = strncmp(name, d->name, len);
+		if (!cmp && strlen(d->name) > len)
+			cmp = -1;
+		if (!cmp)
+			return d;
+		if (cmp < 0) {
+			last = next;
+			continue;
+		}
+		first = next+1;
+	}
+
+	uc->dir_created++;
+	d = xmalloc(sizeof(*d) + len + 1);
+	memset(d, 0, sizeof(*d));
+	memcpy(d->name, name, len);
+	d->name[len] = '\0';
+
+	ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
+	memmove(dir->dirs + first + 1, dir->dirs + first,
+		(dir->dirs_nr - first) * sizeof(*dir->dirs));
+	dir->dirs_nr++;
+	dir->dirs[first] = d;
+	return d;
+}
+
+/*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
  * exclude rules in "el".
@@ -645,14 +693,20 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
 /*
  * Used to set up core.excludesfile and .git/info/exclude lists.
  */
-void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
+				     struct sha1_stat *sha1_stat)
 {
 	struct exclude_list *el;
 	el = add_exclude_list(dir, EXC_FILE, fname);
-	if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0)
+	if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 		die("cannot use %s as an exclude file", fname);
 }
 
+void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+{
+	add_excludes_from_file_1(dir, fname, NULL);
+}
+
 int match_basename(const char *basename, int basenamelen,
 		   const char *pattern, int prefix, int patternlen,
 		   int flags)
@@ -827,6 +881,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	struct exclude_list_group *group;
 	struct exclude_list *el;
 	struct exclude_stack *stk = NULL;
+	struct untracked_cache_dir *untracked;
 	int current;
 
 	group = &dir->exclude_list_group[EXC_DIRS];
@@ -864,9 +919,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	/* Read from the parent directories and push them down. */
 	current = stk ? stk->baselen : -1;
 	strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
+	if (dir->untracked)
+		untracked = stk ? stk->ucd : dir->untracked->root;
+	else
+		untracked = NULL;
+
 	while (current < baselen) {
 		struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 		const char *cp;
+		struct sha1_stat sha1_stat;
 
 		if (current < 0) {
 			cp = base;
@@ -876,10 +937,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			if (!cp)
 				die("oops in prep_exclude");
 			cp++;
+			untracked =
+				lookup_untracked(dir->untracked, untracked,
+						 base + current,
+						 cp - base - current);
 		}
 		stk->prev = dir->exclude_stack;
 		stk->baselen = cp - base;
 		stk->exclude_ix = group->nr;
+		stk->ucd = untracked;
 		el = add_exclude_list(dir, EXC_DIRS, NULL);
 		strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
 		assert(stk->baselen == dir->basebuf.len);
@@ -902,6 +968,8 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		}
 
 		/* Try to read per-directory file */
+		hashclr(sha1_stat.sha1);
+		sha1_stat.valid = 0;
 		if (dir->exclude_per_dir) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
@@ -915,8 +983,11 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			strbuf_addbuf(&sb, &dir->basebuf);
 			strbuf_addstr(&sb, dir->exclude_per_dir);
 			el->src = strbuf_detach(&sb, NULL);
-			add_excludes_from_file_to_list(el->src, el->src,
-						       stk->baselen, el, 1);
+			add_excludes(el->src, el->src, stk->baselen, el, 1,
+				     untracked ? &sha1_stat : NULL);
+		}
+		if (untracked) {
+			hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
 		}
 		dir->exclude_stack = stk;
 		current = stk->baselen;
@@ -1097,6 +1168,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
  *  (c) otherwise, we recurse into it.
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
+	struct untracked_cache_dir *untracked,
 	const char *dirname, int len, int exclude,
 	const struct path_simplify *simplify)
 {
@@ -1124,7 +1196,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return exclude ? path_excluded : path_untracked;
 
-	return read_directory_recursive(dir, dirname, len, 1, simplify);
+	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+	return read_directory_recursive(dir, dirname, len,
+					untracked, 1, simplify);
 }
 
 /*
@@ -1240,6 +1314,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
 }
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
+					  struct untracked_cache_dir *untracked,
 					  struct strbuf *path,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
@@ -1292,7 +1367,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 		return path_none;
 	case DT_DIR:
 		strbuf_addch(path, '/');
-		return treat_directory(dir, path->buf, path->len, exclude,
+		return treat_directory(dir, untracked, path->buf, path->len, exclude,
 			simplify);
 	case DT_REG:
 	case DT_LNK:
@@ -1301,6 +1376,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 }
 
 static enum path_treatment treat_path(struct dir_struct *dir,
+				      struct untracked_cache_dir *untracked,
 				      struct dirent *de,
 				      struct strbuf *path,
 				      int baselen,
@@ -1316,7 +1392,16 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_none;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, path, simplify, dtype, de);
+	return treat_one_path(dir, untracked, path, simplify, dtype, de);
+}
+
+static void add_untracked(struct untracked_cache_dir *dir, const char *name)
+{
+	if (!dir)
+		return;
+	ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
+		   dir->untracked_alloc);
+	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
 /*
@@ -1332,7 +1417,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    const char *base, int baselen,
-				    int check_only,
+				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
 	DIR *fdir;
@@ -1346,24 +1431,36 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	if (!fdir)
 		goto out;
 
+	if (untracked)
+		untracked->check_only = !!check_only;
+
 	while ((de = readdir(fdir)) != NULL) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+
 		if (state > dir_state)
 			dir_state = state;
 
 		/* recurse into subdir if instructed by treat_path */
 		if (state == path_recurse) {
-			subdir_state = read_directory_recursive(dir, path.buf,
-				path.len, check_only, simplify);
+			struct untracked_cache_dir *ud;
+			ud = lookup_untracked(dir->untracked, untracked,
+					      path.buf + baselen,
+					      path.len - baselen);
+			subdir_state =
+				read_directory_recursive(dir, path.buf, path.len,
+							 ud, check_only, simplify);
 			if (subdir_state > dir_state)
 				dir_state = subdir_state;
 		}
 
 		if (check_only) {
 			/* abort early if maximum state has been reached */
-			if (dir_state == path_untracked)
+			if (dir_state == path_untracked) {
+				if (untracked)
+					add_untracked(untracked, path.buf + baselen);
 				break;
+			}
 			/* skip the dir_add_* part */
 			continue;
 		}
@@ -1381,8 +1478,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 
 		case path_untracked:
-			if (!(dir->flags & DIR_SHOW_IGNORED))
-				dir_add_name(dir, path.buf, path.len);
+			if (dir->flags & DIR_SHOW_IGNORED)
+				break;
+			dir_add_name(dir, path.buf, path.len);
+			if (untracked)
+				add_untracked(untracked, path.buf + baselen);
 			break;
 
 		default:
@@ -1459,7 +1559,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, NULL, &sb, simplify,
 				   DT_DIR, NULL) == path_none)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1499,7 +1599,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len, 0, simplify);
+		read_directory_recursive(dir, path, len,
+					 dir->untracked ? dir->untracked->root : NULL,
+					 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
@@ -1666,9 +1768,11 @@ void setup_standard_excludes(struct dir_struct *dir)
 		excludes_file = xdg_path;
 	}
 	if (!access_or_warn(path, R_OK, 0))
-		add_excludes_from_file(dir, path);
+		add_excludes_from_file_1(dir, path,
+					 dir->untracked ? &dir->ss_info_exclude : NULL);
 	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
-		add_excludes_from_file(dir, excludes_file);
+		add_excludes_from_file_1(dir, excludes_file,
+					 dir->untracked ? &dir->ss_excludes_file : NULL);
 }
 
 int remove_path(const char *name)
diff --git a/dir.h b/dir.h
index cdca71b..9ab74b4 100644
--- a/dir.h
+++ b/dir.h
@@ -66,6 +66,7 @@ struct exclude_stack {
 	struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
 	int baselen;
 	int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
+	struct untracked_cache_dir *ucd;
 };
 
 struct exclude_list_group {
@@ -79,6 +80,60 @@ struct sha1_stat {
 	int valid;
 };
 
+/*
+ *  Untracked cache
+ *
+ *  The following inputs are sufficient to determine what files in a
+ *  directory are excluded:
+ *
+ *   - The list of files and directories of the directory in question
+ *   - The $GIT_DIR/index
+ *   - dir_struct flags
+ *   - The content of $GIT_DIR/info/exclude
+ *   - The content of core.excludesfile
+ *   - The content (or the lack) of .gitignore of all parent directories
+ *     from $GIT_WORK_TREE
+ *   - The check_only flag in read_directory_recursive (for
+ *     DIR_HIDE_EMPTY_DIRECTORIES)
+ *
+ *  The first input can be checked using directory mtime. In many
+ *  filesystems, directory mtime (stat_data field) is updated when its
+ *  files or direct subdirs are added or removed.
+ *
+ *  The second one can be hooked from cache_tree_invalidate_path().
+ *  Whenever a file (or a submodule) is added or removed from a
+ *  directory, we invalidate that directory.
+ *
+ *  The remaining inputs are easy, their SHA-1 could be used to verify
+ *  their contents (exclude_sha1[], info_exclude_sha1[] and
+ *  excludes_file_sha1[])
+ */
+struct untracked_cache_dir {
+	struct untracked_cache_dir **dirs;
+	char **untracked;
+	struct stat_data stat_data;
+	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+	unsigned int untracked_nr;
+	unsigned int check_only : 1;
+	/* null SHA-1 means this directory does not have .gitignore */
+	unsigned char exclude_sha1[20];
+	char name[FLEX_ARRAY];
+};
+
+struct untracked_cache {
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
+	const char *exclude_per_dir;
+	/*
+	 * dir_struct#flags must match dir_flags or the untracked
+	 * cache is ignored.
+	 */
+	unsigned dir_flags;
+	struct untracked_cache_dir *root;
+	/* Statistics */
+	int dir_created;
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
@@ -126,6 +181,11 @@ struct dir_struct {
 	struct exclude_stack *exclude_stack;
 	struct exclude *exclude;
 	struct strbuf basebuf;
+
+	/* Enable untracked file cache if set */
+	struct untracked_cache *untracked;
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
 };
 
 /*
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 03/24] untracked cache: initial untracked cache validation
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
                   ` (20 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Make sure the starting conditions and all global exclude files are
good to go. If not, either disable untracked cache completely, or wipe
out the cache and start fresh.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   4 +++
 2 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 27734f0..44ed9f2 100644
--- a/dir.c
+++ b/dir.c
@@ -582,6 +582,22 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 	return d;
 }
 
+static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
+{
+	int i;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		do_invalidate_gitignore(dir->dirs[i]);
+}
+
+static void invalidate_gitignore(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->gitignore_invalidated++;
+	do_invalidate_gitignore(dir);
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -697,6 +713,13 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 				     struct sha1_stat *sha1_stat)
 {
 	struct exclude_list *el;
+	/*
+	 * catch setup_standard_excludes() that's called before
+	 * dir->untracked is assigned. That function behaves
+	 * differently when dir->untracked is non-NULL.
+	 */
+	if (!dir->untracked)
+		dir->unmanaged_exclude_files++;
 	el = add_exclude_list(dir, EXC_FILE, fname);
 	if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 		die("cannot use %s as an exclude file", fname);
@@ -704,6 +727,7 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 
 void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 {
+	dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
 	add_excludes_from_file_1(dir, fname, NULL);
 }
 
@@ -1572,9 +1596,87 @@ static int treat_leading_path(struct dir_struct *dir,
 	return rc;
 }
 
+static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
+						      int base_len,
+						      const struct pathspec *pathspec)
+{
+	struct untracked_cache_dir *root;
+
+	if (!dir->untracked)
+		return NULL;
+
+	/*
+	 * We only support $GIT_DIR/info/exclude and core.excludesfile
+	 * as the global ignore rule files. Any other additions
+	 * (e.g. from command line) invalidate the cache. This
+	 * condition also catches running setup_standard_excludes()
+	 * before setting dir->untracked!
+	 */
+	if (dir->unmanaged_exclude_files)
+		return NULL;
+
+	/*
+	 * Optimize for the main use case only: whole-tree git
+	 * status. More work involved in treat_leading_path() if we
+	 * use cache on just a subset of the worktree. pathspec
+	 * support could make the matter even worse.
+	 */
+	if (base_len || (pathspec && pathspec->nr))
+		return NULL;
+
+	/* Different set of flags may produce different results */
+	if (dir->flags != dir->untracked->dir_flags ||
+	    /*
+	     * See treat_directory(), case index_nonexistent. Without
+	     * this flag, we may need to also cache .git file content
+	     * for the resolve_gitlink_ref() call, which we don't.
+	     */
+	    !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) ||
+	    /* We don't support collecting ignore files */
+	    (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO |
+			   DIR_COLLECT_IGNORED)))
+		return NULL;
+
+	/*
+	 * If we use .gitignore in the cache and now you change it to
+	 * .gitexclude, everything will go wrong.
+	 */
+	if (dir->exclude_per_dir != dir->untracked->exclude_per_dir &&
+	    strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir))
+		return NULL;
+
+	/*
+	 * EXC_CMDL is not considered in the cache. If people set it,
+	 * skip the cache.
+	 */
+	if (dir->exclude_list_group[EXC_CMDL].nr)
+		return NULL;
+
+	if (!dir->untracked->root) {
+		const int len = sizeof(*dir->untracked->root);
+		dir->untracked->root = xmalloc(len);
+		memset(dir->untracked->root, 0, len);
+	}
+
+	/* Validate $GIT_DIR/info/exclude and core.excludesfile */
+	root = dir->untracked->root;
+	if (hashcmp(dir->ss_info_exclude.sha1,
+		    dir->untracked->ss_info_exclude.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_info_exclude = dir->ss_info_exclude;
+	}
+	if (hashcmp(dir->ss_excludes_file.sha1,
+		    dir->untracked->ss_excludes_file.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
+	}
+	return root;
+}
+
 int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
 {
 	struct path_simplify *simplify;
+	struct untracked_cache_dir *untracked;
 
 	/*
 	 * Check out create_simplify()
@@ -1598,10 +1700,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 * create_simplify().
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
+	untracked = validate_untracked_cache(dir, len, pathspec);
+	if (!untracked)
+		/*
+		 * make sure untracked cache code path is disabled,
+		 * e.g. prep_exclude()
+		 */
+		dir->untracked = NULL;
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len,
-					 dir->untracked ? dir->untracked->root : NULL,
-					 0, simplify);
+		read_directory_recursive(dir, path, len, untracked, 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
diff --git a/dir.h b/dir.h
index 9ab74b4..1d7a958 100644
--- a/dir.h
+++ b/dir.h
@@ -115,6 +115,8 @@ struct untracked_cache_dir {
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
 	unsigned int untracked_nr;
 	unsigned int check_only : 1;
+	/* all data in this struct are good */
+	unsigned int valid : 1;
 	/* null SHA-1 means this directory does not have .gitignore */
 	unsigned char exclude_sha1[20];
 	char name[FLEX_ARRAY];
@@ -132,6 +134,7 @@ struct untracked_cache {
 	struct untracked_cache_dir *root;
 	/* Statistics */
 	int dir_created;
+	int gitignore_invalidated;
 };
 
 struct dir_struct {
@@ -186,6 +189,7 @@ struct dir_struct {
 	struct untracked_cache *untracked;
 	struct sha1_stat ss_info_exclude;
 	struct sha1_stat ss_excludes_file;
+	unsigned unmanaged_exclude_files;
 };
 
 /*
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 03/24] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
                   ` (19 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

It's easy to see that if an existing .gitignore changes, its SHA-1
would be different and invalidate_gitignore() is called.

If .gitignore is removed, add_excludes() will treat it like an empty
.gitignore, which again should invalidate the cached directory data.

if .gitignore is added, lookup_untracked() already fills initial
.gitignore SHA-1 as "empty file", so again invalidate_gitignore() is
called.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 44ed9f2..6e91315 100644
--- a/dir.c
+++ b/dir.c
@@ -1010,7 +1010,23 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			add_excludes(el->src, el->src, stk->baselen, el, 1,
 				     untracked ? &sha1_stat : NULL);
 		}
-		if (untracked) {
+		/*
+		 * NEEDSWORK: when untracked cache is enabled, prep_exclude()
+		 * will first be called in valid_cached_dir() then maybe many
+		 * times more in last_exclude_matching(). When the cache is
+		 * used, last_exclude_matching() will not be called and
+		 * reading .gitignore content will be a waste.
+		 *
+		 * So when it's called by valid_cached_dir() and we can get
+		 * .gitignore SHA-1 from the index (i.e. .gitignore is not
+		 * modified on work tree), we could delay reading the
+		 * .gitignore content until we absolutely need it in
+		 * last_exclude_matching(). Be careful about ignore rule
+		 * order, though, if you do that.
+		 */
+		if (untracked &&
+		    hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) {
+			invalidate_gitignore(dir->untracked, untracked);
 			hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
 		}
 		dir->exclude_stack = stk;
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir()
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (3 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
                   ` (18 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

This allows us to feed different info to read_directory_recursive()
based on untracked cache in the next patch.

Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/dir.c b/dir.c
index 6e91315..fb6ed86 100644
--- a/dir.c
+++ b/dir.c
@@ -31,6 +31,15 @@ enum path_treatment {
 	path_untracked
 };
 
+/*
+ * Support data structure for our opendir/readdir/closedir wrappers
+ */
+struct cached_dir {
+	DIR *fdir;
+	struct untracked_cache_dir *untracked;
+	struct dirent *de;
+};
+
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
@@ -1417,12 +1426,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
-				      struct dirent *de,
+				      struct cached_dir *cdir,
 				      struct strbuf *path,
 				      int baselen,
 				      const struct path_simplify *simplify)
 {
 	int dtype;
+	struct dirent *de = cdir->de;
 
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
@@ -1444,6 +1454,37 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int open_cached_dir(struct cached_dir *cdir,
+			   struct dir_struct *dir,
+			   struct untracked_cache_dir *untracked,
+			   struct strbuf *path,
+			   int check_only)
+{
+	memset(cdir, 0, sizeof(*cdir));
+	cdir->untracked = untracked;
+	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (!cdir->fdir)
+		return -1;
+	return 0;
+}
+
+static int read_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir) {
+		cdir->de = readdir(cdir->fdir);
+		if (!cdir->de)
+			return -1;
+		return 0;
+	}
+	return -1;
+}
+
+static void close_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir)
+		closedir(cdir->fdir);
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1460,23 +1501,21 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
-	DIR *fdir;
+	struct cached_dir cdir;
 	enum path_treatment state, subdir_state, dir_state = path_none;
-	struct dirent *de;
 	struct strbuf path = STRBUF_INIT;
 
 	strbuf_add(&path, base, baselen);
 
-	fdir = opendir(path.len ? path.buf : ".");
-	if (!fdir)
+	if (open_cached_dir(&cdir, dir, untracked, &path, check_only))
 		goto out;
 
 	if (untracked)
 		untracked->check_only = !!check_only;
 
-	while ((de = readdir(fdir)) != NULL) {
+	while (!read_cached_dir(&cdir)) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
 
 		if (state > dir_state)
 			dir_state = state;
@@ -1529,7 +1568,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 		}
 	}
-	closedir(fdir);
+	close_cached_dir(&cdir);
  out:
 	strbuf_release(&path);
 
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (4 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
                   ` (17 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

The main readdir loop in read_directory_recursive() is replaced with a
new one that checks if cached results of a directory is still valid.

If a file is added or removed from the index, the containing directory
is invalidated (but not its subdirs). If directory's mtime is changed,
the same happens. If a .gitignore is updated, the containing directory
and all subdirs are invalidated recursively. If dir_struct#flags or
other conditions change, the cache is ignored.

If a directory is invalidated, we opendir/readdir/closedir and run the
exclude machinery on that directory listing as usual. If untracked
cache is also enabled, we'll update the cache along the way. If a
directory is validated, we simply pull the untracked listing out from
the cache. The cache also records the list of direct subdirs that we
have to recurse in. Fully excluded directories are seen as "untracked
files".

In the best case when no dirs are invalidated, read_directory()
becomes a series of

  stat(dir), open(.gitignore), fstat(), read(), close() and optionally
  hash_sha1_file()

For comparison, standard read_directory() is a sequence of

  opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the
  expensive last_exclude_matching() and closedir().

We already try not to open(.gitignore) if we know it does not exist,
so open/fstat/read/close sequence does not apply to every
directory. The sequence could be reduced further, as noted in
prep_exclude() in another patch. So in theory, the entire best-case
read_directory sequence could be reduced to a series of stat() and
nothing else.

This is not a silver bullet approach. When you compile a C file, for
example, the old .o file is removed and a new one with the same name
created, effectively invalidating the containing directory's cache
(but not its subdirectories). If your build process touches every
directory, this cache adds extra overhead for nothing, so it's a good
idea to separate generated files from tracked files.. Editors may use
the same strategy for saving files. And of course you're out of luck
running your repo on an unsupported filesystem and/or operating system.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   2 ++
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index fb6ed86..8c989e3 100644
--- a/dir.c
+++ b/dir.c
@@ -37,7 +37,12 @@ enum path_treatment {
 struct cached_dir {
 	DIR *fdir;
 	struct untracked_cache_dir *untracked;
+	int nr_files;
+	int nr_dirs;
+
 	struct dirent *de;
+	const char *file;
+	struct untracked_cache_dir *ucd;
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
@@ -607,6 +612,14 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 	do_invalidate_gitignore(dir);
 }
 
+static void invalidate_directory(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->dir_invalidated++;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -1424,6 +1437,39 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	}
 }
 
+static enum path_treatment treat_path_fast(struct dir_struct *dir,
+					   struct untracked_cache_dir *untracked,
+					   struct cached_dir *cdir,
+					   struct strbuf *path,
+					   int baselen,
+					   const struct path_simplify *simplify)
+{
+	strbuf_setlen(path, baselen);
+	if (!cdir->ucd) {
+		strbuf_addstr(path, cdir->file);
+		return path_untracked;
+	}
+	strbuf_addstr(path, cdir->ucd->name);
+	/* treat_one_path() does this before it calls treat_directory() */
+	if (path->buf[path->len - 1] != '/')
+		strbuf_addch(path, '/');
+	if (cdir->ucd->check_only)
+		/*
+		 * check_only is set as a result of treat_directory() getting
+		 * to its bottom. Verify again the same set of directories
+		 * with check_only set.
+		 */
+		return read_directory_recursive(dir, path->buf, path->len,
+						cdir->ucd, 1, simplify);
+	/*
+	 * We get path_recurse in the first run when
+	 * directory_exists_in_index() returns index_nonexistent. We
+	 * are sure that new changes in the index does not impact the
+	 * outcome. Return now.
+	 */
+	return path_recurse;
+}
+
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
 				      struct cached_dir *cdir,
@@ -1434,6 +1480,9 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 	int dtype;
 	struct dirent *de = cdir->de;
 
+	if (!de)
+		return treat_path_fast(dir, untracked, cdir, path,
+				       baselen, simplify);
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
@@ -1454,6 +1503,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int valid_cached_dir(struct dir_struct *dir,
+			    struct untracked_cache_dir *untracked,
+			    struct strbuf *path,
+			    int check_only)
+{
+	struct stat st;
+
+	if (!untracked)
+		return 0;
+
+	if (stat(path->len ? path->buf : ".", &st)) {
+		invalidate_directory(dir->untracked, untracked);
+		memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
+		return 0;
+	}
+	if (!untracked->valid ||
+	    match_stat_data(&untracked->stat_data, &st)) {
+		if (untracked->valid)
+			invalidate_directory(dir->untracked, untracked);
+		fill_stat_data(&untracked->stat_data, &st);
+		return 0;
+	}
+
+	if (untracked->check_only != !!check_only) {
+		invalidate_directory(dir->untracked, untracked);
+		return 0;
+	}
+
+	/*
+	 * prep_exclude will be called eventually on this directory,
+	 * but it's called much later in last_exclude_matching(). We
+	 * need it now to determine the validity of the cache for this
+	 * path. The next calls will be nearly no-op, the way
+	 * prep_exclude() is designed.
+	 */
+	if (path->len && path->buf[path->len - 1] != '/') {
+		strbuf_addch(path, '/');
+		prep_exclude(dir, path->buf, path->len);
+		strbuf_setlen(path, path->len - 1);
+	} else
+		prep_exclude(dir, path->buf, path->len);
+
+	/* hopefully prep_exclude() haven't invalidated this entry... */
+	return untracked->valid;
+}
+
 static int open_cached_dir(struct cached_dir *cdir,
 			   struct dir_struct *dir,
 			   struct untracked_cache_dir *untracked,
@@ -1462,7 +1557,11 @@ static int open_cached_dir(struct cached_dir *cdir,
 {
 	memset(cdir, 0, sizeof(*cdir));
 	cdir->untracked = untracked;
+	if (valid_cached_dir(dir, untracked, path, check_only))
+		return 0;
 	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (dir->untracked)
+		dir->untracked->dir_opened++;
 	if (!cdir->fdir)
 		return -1;
 	return 0;
@@ -1476,6 +1575,18 @@ static int read_cached_dir(struct cached_dir *cdir)
 			return -1;
 		return 0;
 	}
+	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
+		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		cdir->ucd = d;
+		cdir->nr_dirs++;
+		return 0;
+	}
+	cdir->ucd = NULL;
+	if (cdir->nr_files < cdir->untracked->untracked_nr) {
+		struct untracked_cache_dir *d = cdir->untracked;
+		cdir->file = d->untracked[cdir->nr_files++];
+		return 0;
+	}
 	return -1;
 }
 
@@ -1483,6 +1594,12 @@ static void close_cached_dir(struct cached_dir *cdir)
 {
 	if (cdir->fdir)
 		closedir(cdir->fdir);
+	/*
+	 * We have gone through this directory and found no untracked
+	 * entries. Mark it valid.
+	 */
+	if (cdir->untracked)
+		cdir->untracked->valid = 1;
 }
 
 /*
@@ -1536,7 +1653,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		if (check_only) {
 			/* abort early if maximum state has been reached */
 			if (dir_state == path_untracked) {
-				if (untracked)
+				if (cdir.fdir)
 					add_untracked(untracked, path.buf + baselen);
 				break;
 			}
@@ -1560,7 +1677,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			if (dir->flags & DIR_SHOW_IGNORED)
 				break;
 			dir_add_name(dir, path.buf, path.len);
-			if (untracked)
+			if (cdir.fdir)
 				add_untracked(untracked, path.buf + baselen);
 			break;
 
diff --git a/dir.h b/dir.h
index 1d7a958..ff3d99b 100644
--- a/dir.h
+++ b/dir.h
@@ -135,6 +135,8 @@ struct untracked_cache {
 	/* Statistics */
 	int dir_created;
 	int gitignore_invalidated;
+	int dir_invalidated;
+	int dir_opened;
 };
 
 struct dir_struct {
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (5 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 08/24] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
                   ` (16 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

If we redo this thing in a functional style, we would have one struct
untracked_dir as input tree and another as output. The input is used
for verification. The output is a brand new tree, reflecting current
worktree.

But that means recreate a lot of dir nodes even if a lot could be
shared between input and output trees in good cases. So we go with the
messy but efficient way, combining both input and output trees into
one. We need a way to know which node in this combined tree belongs to
the output. This is the purpose of this "recurse" flag.

"valid" bit can't be used for this because it's about data of the node
except the subdirs. When we invalidate a directory, we want to keep
cached data of the subdirs intact even though we don't really know
what subdir still exists (yet). Then we check worktree to see what
actual subdir remains on disk. Those will have 'recurse' bit set
again. If cached data for those are still valid, we may be able to
avoid computing exclude files for them. Those subdirs that are deleted
will have 'recurse' remained clear and their 'valid' bits do not
matter.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 14 +++++++++++++-
 dir.h |  3 ++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index 8c989e3..02cdc26 100644
--- a/dir.c
+++ b/dir.c
@@ -615,9 +615,12 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 static void invalidate_directory(struct untracked_cache *uc,
 				 struct untracked_cache_dir *dir)
 {
+	int i;
 	uc->dir_invalidated++;
 	dir->valid = 0;
 	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		dir->dirs[i]->recurse = 0;
 }
 
 /*
@@ -1577,6 +1580,10 @@ static int read_cached_dir(struct cached_dir *cdir)
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
 		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		if (!d->recurse) {
+			cdir->nr_dirs++;
+			continue;
+		}
 		cdir->ucd = d;
 		cdir->nr_dirs++;
 		return 0;
@@ -1598,8 +1605,10 @@ static void close_cached_dir(struct cached_dir *cdir)
 	 * We have gone through this directory and found no untracked
 	 * entries. Mark it valid.
 	 */
-	if (cdir->untracked)
+	if (cdir->untracked) {
 		cdir->untracked->valid = 1;
+		cdir->untracked->recurse = 1;
+	}
 }
 
 /*
@@ -1842,6 +1851,9 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 		invalidate_gitignore(dir->untracked, root);
 		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
 	}
+
+	/* Make sure this directory is not dropped out at saving phase */
+	root->recurse = 1;
 	return root;
 }
 
diff --git a/dir.h b/dir.h
index ff3d99b..95baf01 100644
--- a/dir.h
+++ b/dir.h
@@ -115,8 +115,9 @@ struct untracked_cache_dir {
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
 	unsigned int untracked_nr;
 	unsigned int check_only : 1;
-	/* all data in this struct are good */
+	/* all data except 'dirs' in this struct are good */
 	unsigned int valid : 1;
+	unsigned int recurse : 1;
 	/* null SHA-1 means this directory does not have .gitignore */
 	unsigned char exclude_sha1[20];
 	char name[FLEX_ARRAY];
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 08/24] untracked cache: don't open non-existent .gitignore
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (6 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf() Nguyễn Thái Ngọc Duy
                   ` (15 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

This cuts down a signficant number of open(.gitignore) because most
directories usually don't have .gitignore files.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 02cdc26..95a0f0a 100644
--- a/dir.c
+++ b/dir.c
@@ -1019,7 +1019,21 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		/* Try to read per-directory file */
 		hashclr(sha1_stat.sha1);
 		sha1_stat.valid = 0;
-		if (dir->exclude_per_dir) {
+		if (dir->exclude_per_dir &&
+		    /*
+		     * If we know that no files have been added in
+		     * this directory (i.e. valid_cached_dir() has
+		     * been executed and set untracked->valid) ..
+		     */
+		    (!untracked || !untracked->valid ||
+		     /*
+		      * .. and .gitignore does not exist before
+		      * (i.e. null exclude_sha1 and skip_worktree is
+		      * not set). Then we can skip loading .gitignore,
+		      * which would result in ENOENT anyway.
+		      * skip_worktree is taken care in read_directory()
+		      */
+		     !is_null_sha1(untracked->exclude_sha1))) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
 			 * need fname to remain unchanged to ensure the src
@@ -1782,6 +1796,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 						      const struct pathspec *pathspec)
 {
 	struct untracked_cache_dir *root;
+	int i;
 
 	if (!dir->untracked)
 		return NULL;
@@ -1833,6 +1848,15 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	if (dir->exclude_list_group[EXC_CMDL].nr)
 		return NULL;
 
+	/*
+	 * An optimization in prep_exclude() does not play well with
+	 * CE_SKIP_WORKTREE. It's a rare case anyway, if a single
+	 * entry has that bit set, disable the whole untracked cache.
+	 */
+	for (i = 0; i < active_nr; i++)
+		if (ce_skip_worktree(active_cache[i]))
+			return NULL;
+
 	if (!dir->untracked->root) {
 		const int len = sizeof(*dir->untracked->root);
 		dir->untracked->root = xmalloc(len);
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf()
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (7 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 08/24] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
                   ` (14 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 ewah/ewah_io.c | 13 +++++++++++++
 ewah/ewok.h    |  2 ++
 split-index.c  | 11 ++---------
 3 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/ewah/ewah_io.c b/ewah/ewah_io.c
index 1c2d7af..43481b9 100644
--- a/ewah/ewah_io.c
+++ b/ewah/ewah_io.c
@@ -19,6 +19,7 @@
  */
 #include "git-compat-util.h"
 #include "ewok.h"
+#include "strbuf.h"
 
 int ewah_serialize_native(struct ewah_bitmap *self, int fd)
 {
@@ -110,6 +111,18 @@ int ewah_serialize(struct ewah_bitmap *self, int fd)
 	return ewah_serialize_to(self, write_helper, (void *)(intptr_t)fd);
 }
 
+static int write_strbuf(void *user_data, const void *data, size_t len)
+{
+	struct strbuf *sb = user_data;
+	strbuf_add(sb, data, len);
+	return len;
+}
+
+int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *sb)
+{
+	return ewah_serialize_to(self, write_strbuf, sb);
+}
+
 int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len)
 {
 	const uint8_t *ptr = map;
diff --git a/ewah/ewok.h b/ewah/ewok.h
index f6ad190..4d7f5e9 100644
--- a/ewah/ewok.h
+++ b/ewah/ewok.h
@@ -30,6 +30,7 @@
 #	define ewah_calloc xcalloc
 #endif
 
+struct strbuf;
 typedef uint64_t eword_t;
 #define BITS_IN_WORD (sizeof(eword_t) * 8)
 
@@ -97,6 +98,7 @@ int ewah_serialize_to(struct ewah_bitmap *self,
 		      void *out);
 int ewah_serialize(struct ewah_bitmap *self, int fd);
 int ewah_serialize_native(struct ewah_bitmap *self, int fd);
+int ewah_serialize_strbuf(struct ewah_bitmap *self, struct strbuf *);
 
 int ewah_deserialize(struct ewah_bitmap *self, int fd);
 int ewah_read_mmap(struct ewah_bitmap *self, const void *map, size_t len);
diff --git a/split-index.c b/split-index.c
index 21485e2..968b780 100644
--- a/split-index.c
+++ b/split-index.c
@@ -41,13 +41,6 @@ int read_link_extension(struct index_state *istate,
 	return 0;
 }
 
-static int write_strbuf(void *user_data, const void *data, size_t len)
-{
-	struct strbuf *sb = user_data;
-	strbuf_add(sb, data, len);
-	return len;
-}
-
 int write_link_extension(struct strbuf *sb,
 			 struct index_state *istate)
 {
@@ -55,8 +48,8 @@ int write_link_extension(struct strbuf *sb,
 	strbuf_add(sb, si->base_sha1, 20);
 	if (!si->delete_bitmap && !si->replace_bitmap)
 		return 0;
-	ewah_serialize_to(si->delete_bitmap, write_strbuf, sb);
-	ewah_serialize_to(si->replace_bitmap, write_strbuf, sb);
+	ewah_serialize_strbuf(si->delete_bitmap, sb);
+	ewah_serialize_strbuf(si->replace_bitmap, sb);
 	return 0;
 }
 
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 10/24] untracked cache: save to an index extension
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (8 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf() Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-03-07 19:08   ` Stefan Beller
  2015-02-08  8:55 ` [PATCH 11/24] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
                   ` (13 subsequent siblings)
  23 siblings, 1 reply; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/technical/index-format.txt |  58 +++++++++++++
 cache.h                                  |   3 +
 dir.c                                    | 136 +++++++++++++++++++++++++++++++
 dir.h                                    |   1 +
 read-cache.c                             |  12 +++
 5 files changed, 210 insertions(+)

diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index fe6f316..899dd3d 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -233,3 +233,61 @@ Git index format
   The remaining index entries after replaced ones will be added to the
   final index. These added entries are also sorted by entry namme then
   stage.
+
+== Untracked cache
+
+  Untracked cache saves the untracked file list and necessary data to
+  verify the cache. The signature for this extension is { 'U', 'N',
+  'T', 'R' }.
+
+  The extension starts with
+
+  - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from
+    ctime field until "file size".
+
+  - Stat data of core.excludesfile
+
+  - 32-bit dir_flags (see struct dir_struct)
+
+  - 160-bit SHA-1 of $GIT_DIR/info/exclude. Null SHA-1 means the file
+    does not exist.
+
+  - 160-bit SHA-1 of core.excludesfile. Null SHA-1 means the file does
+    not exist.
+
+  - NUL-terminated string of per-dir exclude file name. This usually
+    is ".gitignore".
+
+  - The number of following directory blocks, variable width
+    encoding. If this number is zero, the extension ends here with a
+    following NUL.
+
+  - A number of directory blocks in depth-first-search order, each
+    consists of
+
+    - The number of untracked entries, variable width encoding.
+
+    - The number of sub-directory blocks, variable width encoding.
+
+    - The directory name terminated by NUL.
+
+    - A number of untrached file/dir names terminated by NUL.
+
+The remaining data of each directory block is grouped by type:
+
+  - An ewah bitmap, the n-th bit marks whether the n-th directory has
+    valid untracked cache entries.
+
+  - An ewah bitmap, the n-th bit records "check-only" bit of
+    read_directory_recursive() for the n-th directory.
+
+  - An ewah bitmap, the n-th bit indicates whether SHA-1 and stat data
+    is valid for the n-th directory and exists in the next data.
+
+  - An array of stat data. The n-th data corresponds with the n-th
+    "one" bit in the previous ewah bitmap.
+
+  - An array of SHA-1. The n-th SHA-1 corresponds with the n-th "one" bit
+    in the previous ewah bitmap.
+
+  - One NUL.
diff --git a/cache.h b/cache.h
index dcf3a2a..b14d6e2 100644
--- a/cache.h
+++ b/cache.h
@@ -297,6 +297,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
+struct untracked_cache;
+
 struct index_state {
 	struct cache_entry **cache;
 	unsigned int version;
@@ -310,6 +312,7 @@ struct index_state {
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
+	struct untracked_cache *untracked;
 };
 
 extern struct index_state the_index;
diff --git a/dir.c b/dir.c
index 95a0f0a..1f2d701 100644
--- a/dir.c
+++ b/dir.c
@@ -12,6 +12,8 @@
 #include "refs.h"
 #include "wildmatch.h"
 #include "pathspec.h"
+#include "varint.h"
+#include "ewah/ewok.h"
 
 struct path_simplify {
 	int len;
@@ -2139,3 +2141,137 @@ void clear_directory(struct dir_struct *dir)
 	}
 	strbuf_release(&dir->basebuf);
 }
+
+struct ondisk_untracked_cache {
+	struct stat_data info_exclude_stat;
+	struct stat_data excludes_file_stat;
+	uint32_t dir_flags;
+	unsigned char info_exclude_sha1[20];
+	unsigned char excludes_file_sha1[20];
+	char exclude_per_dir[FLEX_ARRAY];
+};
+
+#define ouc_size(len) (offsetof(struct ondisk_untracked_cache, exclude_per_dir) + len + 1)
+
+struct write_data {
+	int index;	   /* number of written untracked_cache_dir */
+	struct ewah_bitmap *check_only; /* from untracked_cache_dir */
+	struct ewah_bitmap *valid;	/* from untracked_cache_dir */
+	struct ewah_bitmap *sha1_valid; /* set if exclude_sha1 is not null */
+	struct strbuf out;
+	struct strbuf sb_stat;
+	struct strbuf sb_sha1;
+};
+
+static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
+	to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
+	to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
+	to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
+	to->sd_dev	  = htonl(from->sd_dev);
+	to->sd_ino	  = htonl(from->sd_ino);
+	to->sd_uid	  = htonl(from->sd_uid);
+	to->sd_gid	  = htonl(from->sd_gid);
+	to->sd_size	  = htonl(from->sd_size);
+}
+
+static void write_one_dir(struct untracked_cache_dir *untracked,
+			  struct write_data *wd)
+{
+	struct stat_data stat_data;
+	struct strbuf *out = &wd->out;
+	unsigned char intbuf[16];
+	unsigned int intlen, value;
+	int i = wd->index++;
+
+	/*
+	 * untracked_nr should be reset whenever valid is clear, but
+	 * for safety..
+	 */
+	if (!untracked->valid) {
+		untracked->untracked_nr = 0;
+		untracked->check_only = 0;
+	}
+
+	if (untracked->check_only)
+		ewah_set(wd->check_only, i);
+	if (untracked->valid) {
+		ewah_set(wd->valid, i);
+		stat_data_to_disk(&stat_data, &untracked->stat_data);
+		strbuf_add(&wd->sb_stat, &stat_data, sizeof(stat_data));
+	}
+	if (!is_null_sha1(untracked->exclude_sha1)) {
+		ewah_set(wd->sha1_valid, i);
+		strbuf_add(&wd->sb_sha1, untracked->exclude_sha1, 20);
+	}
+
+	intlen = encode_varint(untracked->untracked_nr, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	/* skip non-recurse directories */
+	for (i = 0, value = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			value++;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
+
+	for (i = 0; i < untracked->untracked_nr; i++)
+		strbuf_add(out, untracked->untracked[i],
+			   strlen(untracked->untracked[i]) + 1);
+
+	for (i = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			write_one_dir(untracked->dirs[i], wd);
+}
+
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
+{
+	struct ondisk_untracked_cache *ouc;
+	struct write_data wd;
+	unsigned char varbuf[16];
+	int len = 0, varint_len;
+	if (untracked->exclude_per_dir)
+		len = strlen(untracked->exclude_per_dir);
+	ouc = xmalloc(sizeof(*ouc) + len + 1);
+	stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
+	stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
+	hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
+	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
+	ouc->dir_flags = htonl(untracked->dir_flags);
+	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+	strbuf_add(out, ouc, ouc_size(len));
+	if (!untracked->root) {
+		varint_len = encode_varint(0, varbuf);
+		strbuf_add(out, varbuf, varint_len);
+		return;
+	}
+
+	wd.index      = 0;
+	wd.check_only = ewah_new();
+	wd.valid      = ewah_new();
+	wd.sha1_valid = ewah_new();
+	strbuf_init(&wd.out, 1024);
+	strbuf_init(&wd.sb_stat, 1024);
+	strbuf_init(&wd.sb_sha1, 1024);
+	write_one_dir(untracked->root, &wd);
+
+	varint_len = encode_varint(wd.index, varbuf);
+	strbuf_add(out, varbuf, varint_len);
+	strbuf_addbuf(out, &wd.out);
+	ewah_serialize_strbuf(wd.valid, out);
+	ewah_serialize_strbuf(wd.check_only, out);
+	ewah_serialize_strbuf(wd.sha1_valid, out);
+	strbuf_addbuf(out, &wd.sb_stat);
+	strbuf_addbuf(out, &wd.sb_sha1);
+	strbuf_addch(out, '\0'); /* safe guard for string lists */
+
+	ewah_free(wd.valid);
+	ewah_free(wd.check_only);
+	ewah_free(wd.sha1_valid);
+	strbuf_release(&wd.out);
+	strbuf_release(&wd.sb_stat);
+	strbuf_release(&wd.sb_sha1);
+}
diff --git a/dir.h b/dir.h
index 95baf01..dc3ee0b 100644
--- a/dir.h
+++ b/dir.h
@@ -298,4 +298,5 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 6f0057f..baf3057 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -37,6 +37,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 #define CACHE_EXT_TREE 0x54524545	/* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
+#define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -2016,6 +2017,17 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (!strip_extensions && istate->untracked) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_untracked_extension(&sb, istate->untracked);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED,
+					     sb.len) < 0 ||
+			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 11/24] untracked cache: load from UNTR index extension
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (9 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-09 22:26   ` Junio C Hamano
  2015-02-08  8:55 ` [PATCH 12/24] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
                   ` (12 subsequent siblings)
  23 siblings, 1 reply; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c        | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |   2 +
 read-cache.c |   5 ++
 3 files changed, 227 insertions(+)

diff --git a/dir.c b/dir.c
index 1f2d701..e7d7df3 100644
--- a/dir.c
+++ b/dir.c
@@ -2275,3 +2275,223 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	strbuf_release(&wd.sb_stat);
 	strbuf_release(&wd.sb_sha1);
 }
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+	int i;
+	if (!ucd)
+		return;
+	for (i = 0; i < ucd->dirs_nr; i++)
+		free_untracked(ucd->dirs[i]);
+	for (i = 0; i < ucd->untracked_nr; i++)
+		free(ucd->untracked[i]);
+	free(ucd->untracked);
+	free(ucd->dirs);
+	free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+	if (uc)
+		free_untracked(uc->root);
+	free(uc);
+}
+
+struct read_data {
+	int index;
+	struct untracked_cache_dir **ucd;
+	struct ewah_bitmap *check_only;
+	struct ewah_bitmap *valid;
+	struct ewah_bitmap *sha1_valid;
+	const unsigned char *data;
+	const unsigned char *end;
+};
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+	to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+	to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+	to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+	to->sd_dev	  = get_be32(&from->sd_dev);
+	to->sd_ino	  = get_be32(&from->sd_ino);
+	to->sd_uid	  = get_be32(&from->sd_uid);
+	to->sd_gid	  = get_be32(&from->sd_gid);
+	to->sd_size	  = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+			struct read_data *rd)
+{
+#define NEXT(x) \
+	next = data + (x); \
+	if (next > rd->end) \
+		return -1;
+
+	struct untracked_cache_dir ud, *untracked;
+	const unsigned char *next, *data = rd->data, *end = rd->end;
+	unsigned int value;
+	int i, len;
+
+	memset(&ud, 0, sizeof(ud));
+
+	next = data;
+	value = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.recurse	   = 1;
+	ud.untracked_alloc = value;
+	ud.untracked_nr	   = value;
+	if (ud.untracked_nr)
+		ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+	data = next;
+
+	next = data;
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+	data = next;
+
+	len = strlen((const char *)data);
+	NEXT(len + 1);
+	*untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+	memcpy(untracked, &ud, sizeof(ud));
+	memcpy(untracked->name, data, len + 1);
+	data = next;
+
+	for (i = 0; i < untracked->untracked_nr; i++) {
+		len = strlen((const char *)data);
+		NEXT(len + 1);
+		untracked->untracked[i] = xstrdup((const char*)data);
+		data = next;
+	}
+
+	rd->ucd[rd->index++] = untracked;
+	rd->data = data;
+
+	for (i = 0; i < untracked->dirs_nr; i++) {
+		len = read_one_dir(untracked->dirs + i, rd);
+		if (len < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void set_check_only(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	ud->check_only = 1;
+}
+
+static void read_stat(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + sizeof(struct stat_data) > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data);
+	rd->data += sizeof(struct stat_data);
+	ud->valid = 1;
+}
+
+static void read_sha1(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + 20 > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	hashcpy(ud->exclude_sha1, rd->data);
+	rd->data += 20;
+}
+
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+			   const struct stat_data *stat,
+			   const unsigned char *sha1)
+{
+	stat_data_from_disk(&sha1_stat->stat, stat);
+	hashcpy(sha1_stat->sha1, sha1);
+	sha1_stat->valid = 1;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+	const struct ondisk_untracked_cache *ouc;
+	struct untracked_cache *uc;
+	struct read_data rd;
+	const unsigned char *next = data, *end = (const unsigned char *)data + sz;
+	int len;
+
+	if (sz <= 1 || end[-1] != '\0')
+		return NULL;
+	end--;
+
+	ouc = (const struct ondisk_untracked_cache *)next;
+	if (next + ouc_size(0) > end)
+		return NULL;
+
+	uc = xcalloc(1, sizeof(*uc));
+	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+		       ouc->info_exclude_sha1);
+	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+		       ouc->excludes_file_sha1);
+	uc->dir_flags = get_be32(&ouc->dir_flags);
+	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+	/* NUL after exclude_per_dir is covered by sizeof(*ouc) */
+	next += ouc_size(strlen(ouc->exclude_per_dir));
+	if (next >= end)
+		goto done2;
+
+	len = decode_varint(&next);
+	if (next > end || len == 0)
+		goto done2;
+
+	rd.valid      = ewah_new();
+	rd.check_only = ewah_new();
+	rd.sha1_valid = ewah_new();
+	rd.data	      = next;
+	rd.end	      = end;
+	rd.index      = 0;
+	rd.ucd        = xmalloc(sizeof(*rd.ucd) * len);
+
+	if (read_one_dir(&uc->root, &rd) || rd.index != len)
+		goto done;
+
+	next = rd.data;
+	len = ewah_read_mmap(rd.valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.check_only, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.sha1_valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	ewah_each_bit(rd.check_only, set_check_only, &rd);
+	rd.data = next + len;
+	ewah_each_bit(rd.valid, read_stat, &rd);
+	ewah_each_bit(rd.sha1_valid, read_sha1, &rd);
+	next = rd.data;
+
+done:
+	free(rd.ucd);
+	ewah_free(rd.valid);
+	ewah_free(rd.check_only);
+	ewah_free(rd.sha1_valid);
+done2:
+	if (next != end) {
+		free_untracked_cache(uc);
+		uc = NULL;
+	}
+	return uc;
+}
diff --git a/dir.h b/dir.h
index dc3ee0b..40a679a 100644
--- a/dir.h
+++ b/dir.h
@@ -298,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void free_untracked_cache(struct untracked_cache *);
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index baf3057..3736a56 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1366,6 +1366,9 @@ static int read_index_extension(struct index_state *istate,
 		if (read_link_extension(istate, data, sz))
 			return -1;
 		break;
+	case CACHE_EXT_UNTRACKED:
+		istate->untracked = read_untracked_extension(data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1631,6 +1634,8 @@ int discard_index(struct index_state *istate)
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
 	discard_split_index(istate);
+	free_untracked_cache(istate->untracked);
+	istate->untracked = NULL;
 	return 0;
 }
 
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 12/24] untracked cache: invalidate at index addition or removal
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (10 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 11/24] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 13/24] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
                   ` (11 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Ideally we should implement untracked_cache_remove_from_index() and
untracked_cache_add_to_index() so that they update untracked cache
right away instead of invalidating it and wait for read_directory()
next time to deal with it. But that may need some more work in
unpack-trees.c. So stay simple as the first step.

The new call in add_index_entry_with_check() may look strange because
new calls usually stay close to cache_tree_invalidate_path(). We do it
a bit later than c_t_i_p() in this function because if it's about
replacing the entry with the same name, we don't care (but cache-tree
does).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c          | 31 +++++++++++++++++++++++++++++++
 dir.h          |  4 ++++
 read-cache.c   |  4 ++++
 unpack-trees.c |  7 +++++--
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index e7d7df3..30e23f8 100644
--- a/dir.c
+++ b/dir.c
@@ -2495,3 +2495,34 @@ done2:
 	}
 	return uc;
 }
+
+void untracked_cache_invalidate_path(struct index_state *istate,
+				     const char *path)
+{
+	const char *sep;
+	struct untracked_cache_dir *d;
+	if (!istate->untracked || !istate->untracked->root)
+		return;
+	sep = strrchr(path, '/');
+	if (sep)
+		d = lookup_untracked(istate->untracked,
+				     istate->untracked->root,
+				     path, sep - path);
+	else
+		d = istate->untracked->root;
+	istate->untracked->dir_invalidated++;
+	d->valid = 0;
+	d->untracked_nr = 0;
+}
+
+void untracked_cache_remove_from_index(struct index_state *istate,
+				       const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
+
+void untracked_cache_add_to_index(struct index_state *istate,
+				  const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
diff --git a/dir.h b/dir.h
index 40a679a..2ce7dd3 100644
--- a/dir.h
+++ b/dir.h
@@ -298,6 +298,10 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void untracked_cache_invalidate_path(struct index_state *, const char *);
+void untracked_cache_remove_from_index(struct index_state *, const char *);
+void untracked_cache_add_to_index(struct index_state *, const char *);
+
 void free_untracked_cache(struct untracked_cache *);
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
diff --git a/read-cache.c b/read-cache.c
index 3736a56..d643a3f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -78,6 +78,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate, old->name);
+	untracked_cache_remove_from_index(istate, old->name);
 	remove_index_entry_at(istate, nr);
 	add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 }
@@ -537,6 +538,7 @@ int remove_file_from_index(struct index_state *istate, const char *path)
 	if (pos < 0)
 		pos = -pos-1;
 	cache_tree_invalidate_path(istate, path);
+	untracked_cache_remove_from_index(istate, path);
 	while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
 		remove_index_entry_at(istate, pos);
 	return 0;
@@ -968,6 +970,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 	}
 	pos = -pos-1;
 
+	untracked_cache_add_to_index(istate, ce->name);
+
 	/*
 	 * Inserting a merged entry ("stage 0") into the index
 	 * will always replace all non-merged entries..
diff --git a/unpack-trees.c b/unpack-trees.c
index 629c658..e5ddb0c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -9,6 +9,7 @@
 #include "refs.h"
 #include "attr.h"
 #include "split-index.h"
+#include "dir.h"
 
 /*
  * Error messages expected by scripts out of plumbing commands such as
@@ -1255,8 +1256,10 @@ static int verify_uptodate_sparse(const struct cache_entry *ce,
 static void invalidate_ce_path(const struct cache_entry *ce,
 			       struct unpack_trees_options *o)
 {
-	if (ce)
-		cache_tree_invalidate_path(o->src_index, ce->name);
+	if (!ce)
+		return;
+	cache_tree_invalidate_path(o->src_index, ce->name);
+	untracked_cache_invalidate_path(o->src_index, ce->name);
 }
 
 /*
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 13/24] read-cache.c: split racy stat test to a separate function
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (11 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 12/24] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 14/24] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
                   ` (10 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 read-cache.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index d643a3f..f12a185 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -270,20 +270,26 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
-static int is_racy_timestamp(const struct index_state *istate,
-			     const struct cache_entry *ce)
+static int is_racy_stat(const struct index_state *istate,
+			const struct stat_data *sd)
 {
-	return (!S_ISGITLINK(ce->ce_mode) &&
-		istate->timestamp.sec &&
+	return (istate->timestamp.sec &&
 #ifdef USE_NSEC
 		 /* nanosecond timestamped files can also be racy! */
-		(istate->timestamp.sec < ce->ce_stat_data.sd_mtime.sec ||
-		 (istate->timestamp.sec == ce->ce_stat_data.sd_mtime.sec &&
-		  istate->timestamp.nsec <= ce->ce_stat_data.sd_mtime.nsec))
+		(istate->timestamp.sec < sd->sd_mtime.sec ||
+		 (istate->timestamp.sec == sd->sd_mtime.sec &&
+		  istate->timestamp.nsec <= sd->sd_mtime.nsec))
 #else
-		istate->timestamp.sec <= ce->ce_stat_data.sd_mtime.sec
+		istate->timestamp.sec <= sd->sd_mtime.sec
 #endif
-		 );
+		);
+}
+
+static int is_racy_timestamp(const struct index_state *istate,
+			     const struct cache_entry *ce)
+{
+	return (!S_ISGITLINK(ce->ce_mode) &&
+		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
 int ie_match_stat(const struct index_state *istate,
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 14/24] untracked cache: avoid racy timestamps
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (12 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 13/24] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
                   ` (9 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

When a directory is updated within the same second that its timestamp
is last saved, we cannot realize the directory has been updated by
checking timestamps. Assume the worst (something is update). See
29e4d36 (Racy GIT - 2005-12-20) for more information.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h      | 2 ++
 dir.c        | 4 ++--
 read-cache.c | 8 ++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index b14d6e2..f8b3dc5 100644
--- a/cache.h
+++ b/cache.h
@@ -561,6 +561,8 @@ extern void fill_stat_data(struct stat_data *sd, struct stat *st);
  * INODE_CHANGED, and DATA_CHANGED.
  */
 extern int match_stat_data(const struct stat_data *sd, struct stat *st);
+extern int match_stat_data_racy(const struct index_state *istate,
+				const struct stat_data *sd, struct stat *st);
 
 extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 
diff --git a/dir.c b/dir.c
index 30e23f8..439ff22 100644
--- a/dir.c
+++ b/dir.c
@@ -682,7 +682,7 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 		if (sha1_stat) {
 			int pos;
 			if (sha1_stat->valid &&
-			    !match_stat_data(&sha1_stat->stat, &st))
+			    !match_stat_data_racy(&the_index, &sha1_stat->stat, &st))
 				; /* no content change, ss->sha1 still good */
 			else if (check_index &&
 				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
@@ -1538,7 +1538,7 @@ static int valid_cached_dir(struct dir_struct *dir,
 		return 0;
 	}
 	if (!untracked->valid ||
-	    match_stat_data(&untracked->stat_data, &st)) {
+	    match_stat_data_racy(&the_index, &untracked->stat_data, &st)) {
 		if (untracked->valid)
 			invalidate_directory(dir->untracked, untracked);
 		fill_stat_data(&untracked->stat_data, &st);
diff --git a/read-cache.c b/read-cache.c
index f12a185..0ecba05 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -292,6 +292,14 @@ static int is_racy_timestamp(const struct index_state *istate,
 		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
+int match_stat_data_racy(const struct index_state *istate,
+			 const struct stat_data *sd, struct stat *st)
+{
+	if (is_racy_stat(istate, sd))
+		return MTIME_CHANGED;
+	return match_stat_data(sd, st);
+}
+
 int ie_match_stat(const struct index_state *istate,
 		  const struct cache_entry *ce, struct stat *st,
 		  unsigned int options)
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (13 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 14/24] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
                   ` (8 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

This could be used to verify correct behavior in tests

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/dir.c b/dir.c
index 439ff22..c5ca5ce 100644
--- a/dir.c
+++ b/dir.c
@@ -1922,6 +1922,18 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
+	if (dir->untracked) {
+		static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
+		trace_printf_key(&trace_untracked_stats,
+				 "node creation: %u\n"
+				 "gitignore invalidation: %u\n"
+				 "directory invalidation: %u\n"
+				 "opendir: %u\n",
+				 dir->untracked->dir_created,
+				 dir->untracked->gitignore_invalidated,
+				 dir->untracked->dir_invalidated,
+				 dir->untracked->dir_opened);
+	}
 	return dir->nr;
 }
 
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (14 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
                   ` (7 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 cache.h      | 1 +
 dir.c        | 9 +++++++++
 read-cache.c | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index f8b3dc5..fca979b 100644
--- a/cache.h
+++ b/cache.h
@@ -295,6 +295,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define RESOLVE_UNDO_CHANGED	(1 << 4)
 #define CACHE_TREE_CHANGED	(1 << 5)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
+#define UNTRACKED_CHANGED	(1 << 7)
 
 struct split_index;
 struct untracked_cache;
diff --git a/dir.c b/dir.c
index c5ca5ce..1c3db0b 100644
--- a/dir.c
+++ b/dir.c
@@ -1933,6 +1933,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 				 dir->untracked->gitignore_invalidated,
 				 dir->untracked->dir_invalidated,
 				 dir->untracked->dir_opened);
+		if (dir->untracked == the_index.untracked &&
+		    (dir->untracked->dir_opened ||
+		     dir->untracked->gitignore_invalidated ||
+		     dir->untracked->dir_invalidated))
+			the_index.cache_changed |= UNTRACKED_CHANGED;
+		if (dir->untracked != the_index.untracked) {
+			free(dir->untracked);
+			dir->untracked = NULL;
+		}
 	}
 	return dir->nr;
 }
diff --git a/read-cache.c b/read-cache.c
index 0ecba05..71d8e20 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -42,7 +42,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-		 SPLIT_INDEX_ORDERED)
+		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (15 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 18/24] status: enable untracked cache Nguyễn Thái Ngọc Duy
                   ` (6 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

This can be used to double check if results with untracked cache are
correctly, compared to vanilla version. Untracked cache remains in
index, but not used.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 1c3db0b..5b9dd5d 100644
--- a/dir.c
+++ b/dir.c
@@ -1800,7 +1800,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	struct untracked_cache_dir *root;
 	int i;
 
-	if (!dir->untracked)
+	if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE"))
 		return NULL;
 
 	/*
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 18/24] status: enable untracked cache
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (16 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 19/24] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
                   ` (5 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

update_index_if_able() is moved down so that the updated untracked
cache could be written out.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/commit.c | 5 +++--
 wt-status.c      | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 5ed6036..bdcfa61 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1372,13 +1372,14 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
 
 	fd = hold_locked_index(&index_lock, 0);
-	if (0 <= fd)
-		update_index_if_able(&the_index, &index_lock);
 
 	s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0;
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	wt_status_collect(&s);
 
+	if (0 <= fd)
+		update_index_if_able(&the_index, &index_lock);
+
 	if (s.relative_paths)
 		s.prefix = prefix;
 
diff --git a/wt-status.c b/wt-status.c
index 27da529..8880c3b 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -585,6 +585,8 @@ static void wt_status_collect_untracked(struct wt_status *s)
 			DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
 	if (s->show_ignored_files)
 		dir.flags |= DIR_SHOW_IGNORED_TOO;
+	else
+		dir.untracked = the_index.untracked;
 	setup_standard_excludes(&dir);
 
 	fill_directory(&dir, &s->pathspec);
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 19/24] update-index: manually enable or disable untracked cache
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (17 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 18/24] status: enable untracked cache Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 20/24] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
                   ` (4 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Overall time saving on "git status" is about 40% in the best case
scenario, removing ..collect_untracked() as the most time consuming
function. read and refresh index operations are now at the top (which
should drop when index-helper and/or watchman support is added). More
numbers and analysis below.

webkit.git
==========

169k files. 6k dirs. Lots of test data (i.e. not touched most of the
time)

Base status
-----------

Index version 4 in split index mode and cache-tree populated. No
untracked cache. It shows how time is consumed by "git status". The
same settings are used for other repos below.

18:28:10.199679 builtin/commit.c:1394   performance: 0.000000451 s: cmd_status:setup
18:28:10.474847 read-cache.c:1407       performance: 0.274873831 s: read_index
18:28:10.475295 read-cache.c:1407       performance: 0.000000656 s: read_index
18:28:10.728443 preload-index.c:131     performance: 0.253147487 s: read_index_preload
18:28:10.741422 read-cache.c:1254       performance: 0.012868340 s: refresh_index
18:28:10.752300 wt-status.c:623         performance: 0.010421357 s: wt_status_collect_changes_worktree
18:28:10.762069 wt-status.c:629         performance: 0.009644748 s: wt_status_collect_changes_index
18:28:11.601019 wt-status.c:632         performance: 0.838859547 s: wt_status_collect_untracked
18:28:11.605939 builtin/commit.c:1421   performance: 0.004835004 s: cmd_status:update_index
18:28:11.606580 trace.c:415             performance: 1.407878388 s: git command: 'git' 'status'

Populating status
-----------------

This is after enabling untracked cache and the cache is still empty.
We see a slight increase in .._collect_untracked() and update_index
(because new cache has to be written to $GIT_DIR/index).

18:28:18.915213 builtin/commit.c:1394   performance: 0.000000326 s: cmd_status:setup
18:28:19.197364 read-cache.c:1407       performance: 0.281901416 s: read_index
18:28:19.197754 read-cache.c:1407       performance: 0.000000546 s: read_index
18:28:19.451355 preload-index.c:131     performance: 0.253599607 s: read_index_preload
18:28:19.464400 read-cache.c:1254       performance: 0.012935336 s: refresh_index
18:28:19.475115 wt-status.c:623         performance: 0.010236920 s: wt_status_collect_changes_worktree
18:28:19.486022 wt-status.c:629         performance: 0.010801685 s: wt_status_collect_changes_index
18:28:20.362660 wt-status.c:632         performance: 0.876551366 s: wt_status_collect_untracked
18:28:20.396199 builtin/commit.c:1421   performance: 0.033447969 s: cmd_status:update_index
18:28:20.396939 trace.c:415             performance: 1.482695902 s: git command: 'git' 'status'

Populated status
----------------

After the cache is populated, wt_status_collect_untracked() drops 82%
from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now
read_index() and read_index_preload().

18:28:20.408605 builtin/commit.c:1394   performance: 0.000000457 s: cmd_status:setup
18:28:20.692864 read-cache.c:1407       performance: 0.283980458 s: read_index
18:28:20.693273 read-cache.c:1407       performance: 0.000000661 s: read_index
18:28:20.958814 preload-index.c:131     performance: 0.265540254 s: read_index_preload
18:28:20.972375 read-cache.c:1254       performance: 0.013437429 s: refresh_index
18:28:20.983959 wt-status.c:623         performance: 0.011146646 s: wt_status_collect_changes_worktree
18:28:20.993948 wt-status.c:629         performance: 0.009879094 s: wt_status_collect_changes_index
18:28:21.138125 wt-status.c:632         performance: 0.144084737 s: wt_status_collect_untracked
18:28:21.173678 builtin/commit.c:1421   performance: 0.035463949 s: cmd_status:update_index
18:28:21.174251 trace.c:415             performance: 0.766707355 s: git command: 'git' 'status'

gentoo-x86.git
==============

This repository is a strange one with a balanced, wide and shallow
worktree (about 100k files and 23k dirs) and no .gitignore in
worktree. .._collect_untracked() time drops 88%, total time drops 56%.

Base status
-----------
18:20:40.828642 builtin/commit.c:1394   performance: 0.000000496 s: cmd_status:setup
18:20:41.027233 read-cache.c:1407       performance: 0.198130532 s: read_index
18:20:41.027670 read-cache.c:1407       performance: 0.000000581 s: read_index
18:20:41.171716 preload-index.c:131     performance: 0.144045594 s: read_index_preload
18:20:41.179171 read-cache.c:1254       performance: 0.007320424 s: refresh_index
18:20:41.185785 wt-status.c:623         performance: 0.006144638 s: wt_status_collect_changes_worktree
18:20:41.192701 wt-status.c:629         performance: 0.006780184 s: wt_status_collect_changes_index
18:20:41.991723 wt-status.c:632         performance: 0.798927029 s: wt_status_collect_untracked
18:20:41.994664 builtin/commit.c:1421   performance: 0.002852772 s: cmd_status:update_index
18:20:41.995458 trace.c:415             performance: 1.168427502 s: git command: 'git' 'status'
Populating status
-----------------
18:20:48.968848 builtin/commit.c:1394   performance: 0.000000380 s: cmd_status:setup
18:20:49.172918 read-cache.c:1407       performance: 0.203734214 s: read_index
18:20:49.173341 read-cache.c:1407       performance: 0.000000562 s: read_index
18:20:49.320013 preload-index.c:131     performance: 0.146671391 s: read_index_preload
18:20:49.328039 read-cache.c:1254       performance: 0.007921957 s: refresh_index
18:20:49.334680 wt-status.c:623         performance: 0.006172020 s: wt_status_collect_changes_worktree
18:20:49.342526 wt-status.c:629         performance: 0.007731746 s: wt_status_collect_changes_index
18:20:50.257510 wt-status.c:632         performance: 0.914864222 s: wt_status_collect_untracked
18:20:50.338371 builtin/commit.c:1421   performance: 0.080776477 s: cmd_status:update_index
18:20:50.338900 trace.c:415             performance: 1.371462446 s: git command: 'git' 'status'
Populated status
----------------
18:20:50.351160 builtin/commit.c:1394   performance: 0.000000571 s: cmd_status:setup
18:20:50.577358 read-cache.c:1407       performance: 0.225917338 s: read_index
18:20:50.577794 read-cache.c:1407       performance: 0.000000617 s: read_index
18:20:50.734140 preload-index.c:131     performance: 0.156345564 s: read_index_preload
18:20:50.745717 read-cache.c:1254       performance: 0.011463075 s: refresh_index
18:20:50.755176 wt-status.c:623         performance: 0.008877929 s: wt_status_collect_changes_worktree
18:20:50.763768 wt-status.c:629         performance: 0.008471633 s: wt_status_collect_changes_index
18:20:50.854885 wt-status.c:632         performance: 0.090988721 s: wt_status_collect_untracked
18:20:50.857765 builtin/commit.c:1421   performance: 0.002789097 s: cmd_status:update_index
18:20:50.858411 trace.c:415             performance: 0.508647673 s: git command: 'git' 'status'

linux-2.6
=========

Reference repo. Not too big. .._collect_status() drops 84%. Total time
drops 42%.

Base status
-----------
18:34:09.870122 builtin/commit.c:1394   performance: 0.000000385 s: cmd_status:setup
18:34:09.943218 read-cache.c:1407       performance: 0.072871177 s: read_index
18:34:09.943614 read-cache.c:1407       performance: 0.000000491 s: read_index
18:34:10.004364 preload-index.c:131     performance: 0.060748102 s: read_index_preload
18:34:10.008190 read-cache.c:1254       performance: 0.003714285 s: refresh_index
18:34:10.012087 wt-status.c:623         performance: 0.002775446 s: wt_status_collect_changes_worktree
18:34:10.016054 wt-status.c:629         performance: 0.003862140 s: wt_status_collect_changes_index
18:34:10.214747 wt-status.c:632         performance: 0.198604837 s: wt_status_collect_untracked
18:34:10.216102 builtin/commit.c:1421   performance: 0.001244166 s: cmd_status:update_index
18:34:10.216817 trace.c:415             performance: 0.347670735 s: git command: 'git' 'status'
Populating status
-----------------
18:34:16.595102 builtin/commit.c:1394   performance: 0.000000456 s: cmd_status:setup
18:34:16.666600 read-cache.c:1407       performance: 0.070992413 s: read_index
18:34:16.667012 read-cache.c:1407       performance: 0.000000606 s: read_index
18:34:16.729375 preload-index.c:131     performance: 0.062362492 s: read_index_preload
18:34:16.732565 read-cache.c:1254       performance: 0.003075517 s: refresh_index
18:34:16.736148 wt-status.c:623         performance: 0.002422201 s: wt_status_collect_changes_worktree
18:34:16.739990 wt-status.c:629         performance: 0.003746618 s: wt_status_collect_changes_index
18:34:16.948505 wt-status.c:632         performance: 0.208426710 s: wt_status_collect_untracked
18:34:16.961744 builtin/commit.c:1421   performance: 0.013151887 s: cmd_status:update_index
18:34:16.962233 trace.c:415             performance: 0.368537535 s: git command: 'git' 'status'
Populated status
----------------
18:34:16.970026 builtin/commit.c:1394   performance: 0.000000631 s: cmd_status:setup
18:34:17.046235 read-cache.c:1407       performance: 0.075904673 s: read_index
18:34:17.046644 read-cache.c:1407       performance: 0.000000681 s: read_index
18:34:17.113564 preload-index.c:131     performance: 0.066920253 s: read_index_preload
18:34:17.117281 read-cache.c:1254       performance: 0.003604055 s: refresh_index
18:34:17.121115 wt-status.c:623         performance: 0.002508345 s: wt_status_collect_changes_worktree
18:34:17.125089 wt-status.c:629         performance: 0.003871636 s: wt_status_collect_changes_index
18:34:17.156089 wt-status.c:632         performance: 0.030895703 s: wt_status_collect_untracked
18:34:17.169861 builtin/commit.c:1421   performance: 0.013686404 s: cmd_status:update_index
18:34:17.170391 trace.c:415             performance: 0.201474531 s: git command: 'git' 'status'

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 Documentation/git-update-index.txt |  8 ++++++++
 builtin/update-index.c             | 16 ++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index dfc09d9..f9a35cd 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -172,6 +172,14 @@ may not support it yet.
 	the shared index file. This mode is designed for very large
 	indexes that take a signficant amount of time to read or write.
 
+--untracked-cache::
+--no-untracked-cache::
+	Enable or disable untracked cache extension. This could speed
+	up for commands that involve determining untracked files such
+	as `git status`. The underlying operating system and file
+	system must change `st_mtime` field of a directory if files
+	are added or deleted in that directory.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index e8c7fd4..3d2dedd 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -740,6 +740,7 @@ static int reupdate_callback(struct parse_opt_ctx_t *ctx,
 int cmd_update_index(int argc, const char **argv, const char *prefix)
 {
 	int newfd, entries, has_errors = 0, line_termination = '\n';
+	int untracked_cache = -1;
 	int read_from_stdin = 0;
 	int prefix_length = prefix ? strlen(prefix) : 0;
 	int preferred_index_format = 0;
@@ -831,6 +832,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("write index in this format")),
 		OPT_BOOL(0, "split-index", &split_index,
 			N_("enable or disable split index")),
+		OPT_BOOL(0, "untracked-cache", &untracked_cache,
+			N_("enable/disable untracked cache")),
 		OPT_END()
 	};
 
@@ -937,6 +940,19 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		the_index.split_index = NULL;
 		the_index.cache_changed |= SOMETHING_CHANGED;
 	}
+	if (untracked_cache > 0 && !the_index.untracked) {
+		struct untracked_cache *uc;
+
+		uc = xcalloc(1, sizeof(*uc));
+		uc->exclude_per_dir = ".gitignore";
+		/* should be the same flags used by git-status */
+		uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
+		the_index.untracked = uc;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
+	} else if (!untracked_cache && the_index.untracked) {
+		the_index.untracked = NULL;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
+	}
 
 	if (active_cache_changed) {
 		if (newfd < 0) {
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 20/24] update-index: test the system before enabling untracked cache
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (18 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 19/24] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 21/24] t7063: tests for " Nguyễn Thái Ngọc Duy
                   ` (3 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: Junio C Hamano <gitster@pobox.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt |   6 ++
 builtin/update-index.c             | 168 +++++++++++++++++++++++++++++++++++++
 2 files changed, 174 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index f9a35cd..ed32bae 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -180,6 +180,12 @@ may not support it yet.
 	system must change `st_mtime` field of a directory if files
 	are added or deleted in that directory.
 
+--force-untracked-cache::
+	For safety, `--untracked-cache` performs tests on the working
+	directory to make sure untracked cache can be used. These
+	tests can take a few seconds. `--force-untracked-cache` can be
+	used to skip the tests.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 3d2dedd..f5f6689 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -32,6 +32,7 @@ static int mark_valid_only;
 static int mark_skip_worktree_only;
 #define MARK_FLAG 1
 #define UNMARK_FLAG 2
+static struct strbuf mtime_dir = STRBUF_INIT;
 
 __attribute__((format (printf, 1, 2)))
 static void report(const char *fmt, ...)
@@ -47,6 +48,166 @@ static void report(const char *fmt, ...)
 	va_end(vp);
 }
 
+static void remove_test_directory(void)
+{
+	if (mtime_dir.len)
+		remove_dir_recursively(&mtime_dir, 0);
+}
+
+static const char *get_mtime_path(const char *path)
+{
+	static struct strbuf sb = STRBUF_INIT;
+	strbuf_reset(&sb);
+	strbuf_addf(&sb, "%s/%s", mtime_dir.buf, path);
+	return sb.buf;
+}
+
+static void xmkdir(const char *path)
+{
+	path = get_mtime_path(path);
+	if (mkdir(path, 0700))
+		die_errno(_("failed to create directory %s"), path);
+}
+
+static int xstat_mtime_dir(struct stat *st)
+{
+	if (stat(mtime_dir.buf, st))
+		die_errno(_("failed to stat %s"), mtime_dir.buf);
+	return 0;
+}
+
+static int create_file(const char *path)
+{
+	int fd;
+	path = get_mtime_path(path);
+	fd = open(path, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		die_errno(_("failed to create file %s"), path);
+	return fd;
+}
+
+static void xunlink(const char *path)
+{
+	path = get_mtime_path(path);
+	if (unlink(path))
+		die_errno(_("failed to delete file %s"), path);
+}
+
+static void xrmdir(const char *path)
+{
+	path = get_mtime_path(path);
+	if (rmdir(path))
+		die_errno(_("failed to delete directory %s"), path);
+}
+
+static void avoid_racy(void)
+{
+	/*
+	 * not use if we could usleep(10) if USE_NSEC is defined. The
+	 * field nsec could be there, but the OS could choose to
+	 * ignore it?
+	 */
+	sleep(1);
+}
+
+static int test_if_untracked_cache_is_supported(void)
+{
+	struct stat st;
+	struct stat_data base;
+	int fd, ret = 0;
+
+	strbuf_addstr(&mtime_dir, "mtime-test-XXXXXX");
+	if (!mkdtemp(mtime_dir.buf))
+		die_errno("Could not make temporary directory");
+
+	fprintf(stderr, _("Testing "));
+	atexit(remove_test_directory);
+	xstat_mtime_dir(&st);
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	fd = create_file("newfile");
+	xstat_mtime_dir(&st);
+	if (!match_stat_data(&base, &st)) {
+		close(fd);
+		fputc('\n', stderr);
+		fprintf_ln(stderr,_("directory stat info does not "
+				    "change after adding a new file"));
+		goto done;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xmkdir("new-dir");
+	xstat_mtime_dir(&st);
+	if (!match_stat_data(&base, &st)) {
+		close(fd);
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not change "
+				     "after adding a new directory"));
+		goto done;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	write_or_die(fd, "data", 4);
+	close(fd);
+	xstat_mtime_dir(&st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes "
+				     "after updating a file"));
+		goto done;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	close(create_file("new-dir/new"));
+	xstat_mtime_dir(&st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes after "
+				     "adding a file inside subdirectory"));
+		goto done;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("newfile");
+	xstat_mtime_dir(&st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a file"));
+		goto done;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("new-dir/new");
+	xrmdir("new-dir");
+	xstat_mtime_dir(&st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a directory"));
+		goto done;
+	}
+
+	if (rmdir(mtime_dir.buf))
+		die_errno(_("failed to delete directory %s"), mtime_dir.buf);
+	fprintf_ln(stderr, _(" OK"));
+	ret = 1;
+
+done:
+	strbuf_release(&mtime_dir);
+	return ret;
+}
+
 static int mark_ce_flags(const char *path, int flag, int mark)
 {
 	int namelen = strlen(path);
@@ -834,6 +995,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("enable or disable split index")),
 		OPT_BOOL(0, "untracked-cache", &untracked_cache,
 			N_("enable/disable untracked cache")),
+		OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
+			    N_("enable untracked cache without testing the filesystem"), 2),
 		OPT_END()
 	};
 
@@ -943,6 +1106,11 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	if (untracked_cache > 0 && !the_index.untracked) {
 		struct untracked_cache *uc;
 
+		if (untracked_cache < 2) {
+			setup_work_tree();
+			if (!test_if_untracked_cache_is_supported())
+				return 1;
+		}
 		uc = xcalloc(1, sizeof(*uc));
 		uc->exclude_per_dir = ".gitignore";
 		/* should be the same flags used by git-status */
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 21/24] t7063: tests for untracked cache
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (19 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 20/24] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 22/24] mingw32: add uname() Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 .gitignore                                 |   1 +
 Makefile                                   |   1 +
 t/t7063-status-untracked-cache.sh (new +x) | 353 +++++++++++++++++++++++++++++
 test-dump-untracked-cache.c (new)          |  61 +++++
 4 files changed, 416 insertions(+)
 create mode 100755 t/t7063-status-untracked-cache.sh
 create mode 100644 test-dump-untracked-cache.c

diff --git a/.gitignore b/.gitignore
index 81e12c0..e2bb375 100644
--- a/.gitignore
+++ b/.gitignore
@@ -182,6 +182,7 @@
 /test-delta
 /test-dump-cache-tree
 /test-dump-split-index
+/test-dump-untracked-cache
 /test-scrap-cache-tree
 /test-genrandom
 /test-hashmap
diff --git a/Makefile b/Makefile
index 9f984a9..fa58a53 100644
--- a/Makefile
+++ b/Makefile
@@ -555,6 +555,7 @@ TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
+TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-hashmap
 TEST_PROGRAMS_NEED_X += test-index-version
diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh
new file mode 100755
index 0000000..2b2ffd7
--- /dev/null
+++ b/t/t7063-status-untracked-cache.sh
@@ -0,0 +1,353 @@
+#!/bin/sh
+
+test_description='test untracked cache'
+
+. ./test-lib.sh
+
+avoid_racy() {
+	sleep 1
+}
+
+git update-index --untracked-cache
+# It's fine if git update-index returns an error code other than one,
+# it'll be caught in the first test.
+if test $? -eq 1; then
+	skip_all='This system does not support untracked cache'
+	test_done
+fi
+
+test_expect_success 'setup' '
+	git init worktree &&
+	cd worktree &&
+	mkdir done dtwo dthree &&
+	touch one two three done/one dtwo/two dthree/three &&
+	git add one two done/one &&
+	: >.git/info/exclude &&
+	git update-index --untracked-cache
+'
+
+test_expect_success 'untracked cache is empty' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 0000000000000000000000000000000000000000
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+EOF
+	test_cmp ../expect ../actual
+'
+
+cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? three
+EOF
+
+cat >../dump.expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+
+test_expect_success 'status first time (empty cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 3
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after first status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'status second time (fully populated cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 0
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after second status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'modify in root directory, one dir invalidation' '
+	avoid_racy &&
+	: >four &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? four
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 1
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+four
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new .gitignore invalidates recursively' '
+	avoid_racy &&
+	echo four >.gitignore &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dthree/
+?? dtwo/
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 1
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new info/exclude invalidates everything' '
+	avoid_racy &&
+	echo three >>.git/info/exclude &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from tracked to untracked' '
+	git rm --cached two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+?? .gitignore
+?? dtwo/
+?? two
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+two
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from untracked to tracked' '
+	git add two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_done
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
new file mode 100644
index 0000000..710441e
--- /dev/null
+++ b/test-dump-untracked-cache.c
@@ -0,0 +1,61 @@
+#include "cache.h"
+#include "dir.h"
+
+static int compare_untracked(const void *a_, const void *b_)
+{
+	const char *const *a = a_;
+	const char *const *b = b_;
+	return strcmp(*a, *b);
+}
+
+static int compare_dir(const void *a_, const void *b_)
+{
+	const struct untracked_cache_dir *const *a = a_;
+	const struct untracked_cache_dir *const *b = b_;
+	return strcmp((*a)->name, (*b)->name);
+}
+
+static void dump(struct untracked_cache_dir *ucd, struct strbuf *base)
+{
+	int i, len;
+	qsort(ucd->untracked, ucd->untracked_nr, sizeof(*ucd->untracked),
+	      compare_untracked);
+	qsort(ucd->dirs, ucd->dirs_nr, sizeof(*ucd->dirs),
+	      compare_dir);
+	len = base->len;
+	strbuf_addf(base, "%s/", ucd->name);
+	printf("%s %s", base->buf,
+	       sha1_to_hex(ucd->exclude_sha1));
+	if (ucd->recurse)
+		fputs(" recurse", stdout);
+	if (ucd->check_only)
+		fputs(" check_only", stdout);
+	if (ucd->valid)
+		fputs(" valid", stdout);
+	printf("\n");
+	for (i = 0; i < ucd->untracked_nr; i++)
+		printf("%s\n", ucd->untracked[i]);
+	for (i = 0; i < ucd->dirs_nr; i++)
+		dump(ucd->dirs[i], base);
+	strbuf_setlen(base, len);
+}
+
+int main(int ac, char **av)
+{
+	struct untracked_cache *uc;
+	struct strbuf base = STRBUF_INIT;
+	if (read_cache() < 0)
+		die("unable to read index file");
+	uc = the_index.untracked;
+	if (!uc) {
+		printf("no untracked cache\n");
+		return 0;
+	}
+	printf("info/exclude %s\n", sha1_to_hex(uc->ss_info_exclude.sha1));
+	printf("core.excludesfile %s\n", sha1_to_hex(uc->ss_excludes_file.sha1));
+	printf("exclude_per_dir %s\n", uc->exclude_per_dir);
+	printf("flags %08x\n", uc->dir_flags);
+	if (uc->root)
+		dump(uc->root, &base);
+	return 0;
+}
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 22/24] mingw32: add uname()
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (20 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 21/24] t7063: tests for " Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 23/24] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 24/24] git-status.txt: advertisement for untracked cache Nguyễn Thái Ngọc Duy
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy, Junio C Hamano

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 compat/mingw.c | 11 +++++++++++
 compat/mingw.h |  9 +++++++++
 2 files changed, 20 insertions(+)

diff --git a/compat/mingw.c b/compat/mingw.c
index c5c37e5..88140e4 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -2128,3 +2128,14 @@ void mingw_startup()
 	/* initialize Unicode console */
 	winansi_init();
 }
+
+int uname(struct utsname *buf)
+{
+	DWORD v = GetVersion();
+	memset(buf, 0, sizeof(*buf));
+	strcpy(buf->sysname, "Windows");
+	sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff);
+	/* assuming NT variants only.. */
+	sprintf(buf->version, "%u", (v >> 16) & 0x7fff);
+	return 0;
+}
diff --git a/compat/mingw.h b/compat/mingw.h
index df0e320..d00ba7a 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -77,6 +77,14 @@ struct itimerval {
 };
 #define ITIMER_REAL 0
 
+struct utsname {
+	char sysname[16];
+	char nodename[1];
+	char release[16];
+	char version[16];
+	char machine[1];
+};
+
 /*
  * sanitize preprocessor namespace polluted by Windows headers defining
  * macros which collide with git local versions
@@ -166,6 +174,7 @@ struct passwd *getpwuid(uid_t uid);
 int setitimer(int type, struct itimerval *in, struct itimerval *out);
 int sigaction(int sig, struct sigaction *in, struct sigaction *out);
 int link(const char *oldpath, const char *newpath);
+int uname(struct utsname *buf);
 
 /*
  * replacements of existing functions
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 23/24] untracked cache: guard and disable on system changes
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (21 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 22/24] mingw32: add uname() Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  2015-02-08  8:55 ` [PATCH 24/24] git-status.txt: advertisement for untracked cache Nguyễn Thái Ngọc Duy
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

If the user enables untracked cache, then

 - move worktree to an unsupported filesystem
 - or simply upgrade OS
 - or move the whole (portable) disk from one machine to another
 - or access a shared fs from another machine

there's no guarantee that untracked cache can still function properly.
Record the worktree location and OS footprint in the cache. If it
changes, err on the safe side and disable the cache. The user can
'update-index --untracked-cache' again to make sure all conditions are
met.

This adds a new requirement that setup_git_directory* must be called
before read_cache() because we need worktree location by then, or the
cache is dropped.

This change does not cover all bases, you can fool it if you try
hard. The point is to stop accidents.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Helped-by: brian m. carlson <sandals@crustytoothpaste.net>
Helped-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/technical/index-format.txt |  4 +++
 builtin/update-index.c                   | 16 ++++++----
 dir.c                                    | 55 +++++++++++++++++++++++++++++++-
 dir.h                                    |  2 ++
 git-compat-util.h                        |  1 +
 test-dump-untracked-cache.c              |  1 +
 6 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/Documentation/technical/index-format.txt b/Documentation/technical/index-format.txt
index 899dd3d..e24b4bc 100644
--- a/Documentation/technical/index-format.txt
+++ b/Documentation/technical/index-format.txt
@@ -242,6 +242,10 @@ Git index format
 
   The extension starts with
 
+  - A sequence of NUL-terminated strings, preceded by the size of the
+    sequence in variable width encoding. Each string describes the
+    environment where the cache can be used.
+
   - Stat data of $GIT_DIR/info/exclude. See "Index entry" section from
     ctime field until "file size".
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index f5f6689..fc5e108 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -1103,7 +1103,7 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		the_index.split_index = NULL;
 		the_index.cache_changed |= SOMETHING_CHANGED;
 	}
-	if (untracked_cache > 0 && !the_index.untracked) {
+	if (untracked_cache > 0) {
 		struct untracked_cache *uc;
 
 		if (untracked_cache < 2) {
@@ -1111,11 +1111,15 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			if (!test_if_untracked_cache_is_supported())
 				return 1;
 		}
-		uc = xcalloc(1, sizeof(*uc));
-		uc->exclude_per_dir = ".gitignore";
-		/* should be the same flags used by git-status */
-		uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
-		the_index.untracked = uc;
+		if (!the_index.untracked) {
+			uc = xcalloc(1, sizeof(*uc));
+			strbuf_init(&uc->ident, 100);
+			uc->exclude_per_dir = ".gitignore";
+			/* should be the same flags used by git-status */
+			uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
+			the_index.untracked = uc;
+		}
+		add_untracked_ident(the_index.untracked);
 		the_index.cache_changed |= UNTRACKED_CHANGED;
 	} else if (!untracked_cache && the_index.untracked) {
 		the_index.untracked = NULL;
diff --git a/dir.c b/dir.c
index 5b9dd5d..b8a4f9e 100644
--- a/dir.c
+++ b/dir.c
@@ -1793,6 +1793,40 @@ static int treat_leading_path(struct dir_struct *dir,
 	return rc;
 }
 
+static const char *get_ident_string(void)
+{
+	static struct strbuf sb = STRBUF_INIT;
+	struct utsname uts;
+
+	if (sb.len)
+		return sb.buf;
+	if (uname(&uts))
+		die_errno(_("failed to get kernel name and information"));
+	strbuf_addf(&sb, "Location %s, system %s %s %s", get_git_work_tree(),
+		    uts.sysname, uts.release, uts.version);
+	return sb.buf;
+}
+
+static int ident_in_untracked(const struct untracked_cache *uc)
+{
+	const char *end = uc->ident.buf + uc->ident.len;
+	const char *p   = uc->ident.buf;
+
+	for (p = uc->ident.buf; p < end; p += strlen(p) + 1)
+		if (!strcmp(p, get_ident_string()))
+			return 1;
+	return 0;
+}
+
+void add_untracked_ident(struct untracked_cache *uc)
+{
+	if (ident_in_untracked(uc))
+		return;
+	strbuf_addstr(&uc->ident, get_ident_string());
+	/* this strbuf contains a list of strings, save NUL too */
+	strbuf_addch(&uc->ident, 0);
+}
+
 static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
 						      int base_len,
 						      const struct pathspec *pathspec)
@@ -1859,6 +1893,11 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 		if (ce_skip_worktree(active_cache[i]))
 			return NULL;
 
+	if (!ident_in_untracked(dir->untracked)) {
+		warning(_("Untracked cache is disabled on this system."));
+		return NULL;
+	}
+
 	if (!dir->untracked->root) {
 		const int len = sizeof(*dir->untracked->root);
 		dir->untracked->root = xmalloc(len);
@@ -2263,6 +2302,11 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
 	ouc->dir_flags = htonl(untracked->dir_flags);
 	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+
+	varint_len = encode_varint(untracked->ident.len, varbuf);
+	strbuf_add(out, varbuf, varint_len);
+	strbuf_add(out, untracked->ident.buf, untracked->ident.len);
+
 	strbuf_add(out, ouc, ouc_size(len));
 	if (!untracked->root) {
 		varint_len = encode_varint(0, varbuf);
@@ -2446,17 +2490,26 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	struct untracked_cache *uc;
 	struct read_data rd;
 	const unsigned char *next = data, *end = (const unsigned char *)data + sz;
-	int len;
+	const char *ident;
+	int ident_len, len;
 
 	if (sz <= 1 || end[-1] != '\0')
 		return NULL;
 	end--;
 
+	ident_len = decode_varint(&next);
+	if (next + ident_len > end)
+		return NULL;
+	ident = (const char *)next;
+	next += ident_len;
+
 	ouc = (const struct ondisk_untracked_cache *)next;
 	if (next + ouc_size(0) > end)
 		return NULL;
 
 	uc = xcalloc(1, sizeof(*uc));
+	strbuf_init(&uc->ident, ident_len);
+	strbuf_add(&uc->ident, ident, ident_len);
 	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
 		       ouc->info_exclude_sha1);
 	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
diff --git a/dir.h b/dir.h
index 2ce7dd3..6ccbc45 100644
--- a/dir.h
+++ b/dir.h
@@ -127,6 +127,7 @@ struct untracked_cache {
 	struct sha1_stat ss_info_exclude;
 	struct sha1_stat ss_excludes_file;
 	const char *exclude_per_dir;
+	struct strbuf ident;
 	/*
 	 * dir_struct#flags must match dir_flags or the untracked
 	 * cache is ignored.
@@ -305,4 +306,5 @@ void untracked_cache_add_to_index(struct index_state *, const char *);
 void free_untracked_cache(struct untracked_cache *);
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
+void add_untracked_ident(struct untracked_cache *);
 #endif
diff --git a/git-compat-util.h b/git-compat-util.h
index f587749..e9502a1 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -132,6 +132,7 @@
 #elif defined(_MSC_VER)
 #include "compat/msvc.h"
 #else
+#include <sys/utsname.h>
 #include <sys/wait.h>
 #include <sys/resource.h>
 #include <sys/socket.h>
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
index 710441e..25d855d 100644
--- a/test-dump-untracked-cache.c
+++ b/test-dump-untracked-cache.c
@@ -44,6 +44,7 @@ int main(int ac, char **av)
 {
 	struct untracked_cache *uc;
 	struct strbuf base = STRBUF_INIT;
+	setup_git_directory();
 	if (read_cache() < 0)
 		die("unable to read index file");
 	uc = the_index.untracked;
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 24/24] git-status.txt: advertisement for untracked cache
  2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
                   ` (22 preceding siblings ...)
  2015-02-08  8:55 ` [PATCH 23/24] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
@ 2015-02-08  8:55 ` Nguyễn Thái Ngọc Duy
  23 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-02-08  8:55 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

When a good user sees the "too long, consider -uno" advice when
running `git status`, they should check out the man page to find out
more. This change suggests they try untracked cache before -uno.

Helped-by: brian m. carlson <sandals@crustytoothpaste.net>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-status.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index def635f..4dcad4e 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -58,7 +58,10 @@ When `-u` option is not used, untracked files and directories are
 shown (i.e. the same as specifying `normal`), to help you avoid
 forgetting to add newly created files.  Because it takes extra work
 to find untracked files in the filesystem, this mode may take some
-time in a large working tree.  You can use `no` to have `git status`
+time in a large working tree.
+Consider enabling untracked cache and split index if supported (see
+`git update-index --untracked-cache` and `git update-index
+--split-index`), Otherwise you can use `no` to have `git status`
 return more quickly without showing untracked files.
 +
 The default can be changed using the status.showUntrackedFiles
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* Re: [PATCH 11/24] untracked cache: load from UNTR index extension
  2015-02-08  8:55 ` [PATCH 11/24] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
@ 2015-02-09 22:26   ` Junio C Hamano
  0 siblings, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2015-02-09 22:26 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> +static int read_one_dir(struct untracked_cache_dir **untracked_,
> +			struct read_data *rd)
> +{
> +#define NEXT(x) \
> +	next = data + (x); \
> +	if (next > rd->end) \
> +		return -1;

Please do not do this, and instead write them at two places they are
used.  It is not clear to those who are reading the use sites that
this can break the control flow and return to the caller.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file
  2015-02-08  8:55 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2015-02-11 21:23   ` Junio C Hamano
  2015-02-16  9:45     ` Duy Nguyen
  0 siblings, 1 reply; 32+ messages in thread
From: Junio C Hamano @ 2015-02-11 21:23 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> -int add_excludes_from_file_to_list(const char *fname,
> -				   const char *base,
> -				   int baselen,
> -				   struct exclude_list *el,
> -				   int check_index)
> +/*
> + * Given a file with name "fname", read it (either from disk, or from
> + * the index if "check_index" is non-zero), parse it and store the
> + * exclude rules in "el".
> + *
> + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
> + * stat data from disk (only valid if add_excludes returns zero). If
> + * ss_valid is non-zero, "ss" must contain good value as input.
> + */
> +static int add_excludes(const char *fname, const char *base, int baselen,
> +			struct exclude_list *el, int check_index,
> +			struct sha1_stat *sha1_stat)
> ...
> @@ -571,6 +588,21 @@ int add_excludes_from_file_to_list(const char *fname,
>  		}
>  		buf[size++] = '\n';
>  		close(fd);
> +		if (sha1_stat) {
> +			int pos;
> +			if (sha1_stat->valid &&
> +			    !match_stat_data(&sha1_stat->stat, &st))
> +				; /* no content change, ss->sha1 still good */
> +			else if (check_index &&
> +				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
> +				 !ce_stage(active_cache[pos]) &&
> +				 ce_uptodate(active_cache[pos]))
> +				hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
> +			else
> +				hash_sha1_file(buf, size, "blob", sha1_stat->sha1);

I do not think this would work well on DOS.

This helper function originally is meant to work *only* on the
checked out representation of the file and that is what is read by
read_in_full(), and that is the reason why it handles the case where
the contents of buf[] happens to be CRLF terminated in the function.

If you want to detect the content changes across working tree, index
and the tree objects by reusing hash_sha1_file(), however, you must
not feed the checked out (aka "smudged") representation to it.
You'd need to turn it into "cleaned" representation by doing the
equivalent of calling index_path().  Some helpers in the callchain
that originates from index_path() might directly be reusable for
your purpose.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file
  2015-02-11 21:23   ` Junio C Hamano
@ 2015-02-16  9:45     ` Duy Nguyen
  2015-02-16 21:59       ` Junio C Hamano
  0 siblings, 1 reply; 32+ messages in thread
From: Duy Nguyen @ 2015-02-16  9:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List

On Thu, Feb 12, 2015 at 4:23 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> -int add_excludes_from_file_to_list(const char *fname,
>> -                                const char *base,
>> -                                int baselen,
>> -                                struct exclude_list *el,
>> -                                int check_index)
>> +/*
>> + * Given a file with name "fname", read it (either from disk, or from
>> + * the index if "check_index" is non-zero), parse it and store the
>> + * exclude rules in "el".
>> + *
>> + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
>> + * stat data from disk (only valid if add_excludes returns zero). If
>> + * ss_valid is non-zero, "ss" must contain good value as input.
>> + */
>> +static int add_excludes(const char *fname, const char *base, int baselen,
>> +                     struct exclude_list *el, int check_index,
>> +                     struct sha1_stat *sha1_stat)
>> ...
>> @@ -571,6 +588,21 @@ int add_excludes_from_file_to_list(const char *fname,
>>               }
>>               buf[size++] = '\n';
>>               close(fd);
>> +             if (sha1_stat) {
>> +                     int pos;
>> +                     if (sha1_stat->valid &&
>> +                         !match_stat_data(&sha1_stat->stat, &st))
>> +                             ; /* no content change, ss->sha1 still good */
>> +                     else if (check_index &&
>> +                              (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
>> +                              !ce_stage(active_cache[pos]) &&
>> +                              ce_uptodate(active_cache[pos]))
>> +                             hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
>> +                     else
>> +                             hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
>
> I do not think this would work well on DOS.
>
> This helper function originally is meant to work *only* on the
> checked out representation of the file and that is what is read by
> read_in_full(), and that is the reason why it handles the case where
> the contents of buf[] happens to be CRLF terminated in the function.
>
> If you want to detect the content changes across working tree, index
> and the tree objects by reusing hash_sha1_file(), however, you must
> not feed the checked out (aka "smudged") representation to it.
> You'd need to turn it into "cleaned" representation by doing the
> equivalent of calling index_path().  Some helpers in the callchain
> that originates from index_path() might directly be reusable for
> your purpose.

Urgh.. you're right this test would fail when some filters are
involved. I'm not sure if we want to check the cleaned version though.
What matters to exclude machinery is the checkout version. I think for
now we fall back to hashing .gitignore content. Perhaps later we could
make an exception for cr/lf conversion (and just that, not generic
filters, doing content conversion here sounds like a bad idea).
-- 
Duy

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file
  2015-02-16  9:45     ` Duy Nguyen
@ 2015-02-16 21:59       ` Junio C Hamano
  0 siblings, 0 replies; 32+ messages in thread
From: Junio C Hamano @ 2015-02-16 21:59 UTC (permalink / raw)
  To: Duy Nguyen; +Cc: Git Mailing List

Duy Nguyen <pclouds@gmail.com> writes:

> On Thu, Feb 12, 2015 at 4:23 AM, Junio C Hamano <gitster@pobox.com> wrote:
> ...
>> If you want to detect the content changes across working tree, index
>> and the tree objects by reusing hash_sha1_file(), however, you must
>> not feed the checked out (aka "smudged") representation to it.
>> You'd need to turn it into "cleaned" representation by doing the
>> equivalent of calling index_path().  Some helpers in the callchain
>> that originates from index_path() might directly be reusable for
>> your purpose.
>
> Urgh.. you're right this test would fail when some filters are
> involved. I'm not sure if we want to check the cleaned version though.
> What matters to exclude machinery is the checkout version. 

Oh, I wouldn't suggest getting lines from the cleaned version.  It
is just that you must hash the cleaned version if you want to decide
"Ah, the content is different from what the internal cache is based
on, so I need to invalidate my cache" and "Because the version I
have on the filesystem matches what is in the index, which is what
my cache is based on, I would use my cached version".  The latter
would break (i.e. the signature would not match when it should) and
you end up invalidating the cache when you do not have to.

^ permalink raw reply	[flat|nested] 32+ messages in thread

* Re: [PATCH 10/24] untracked cache: save to an index extension
  2015-02-08  8:55 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
@ 2015-03-07 19:08   ` Stefan Beller
  0 siblings, 0 replies; 32+ messages in thread
From: Stefan Beller @ 2015-03-07 19:08 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

On Sun, Feb 8, 2015 at 12:55 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> +       ouc = xmalloc(sizeof(*ouc) + len + 1);

Do we need to free `ouc` at the end of the function as well?

^ permalink raw reply	[flat|nested] 32+ messages in thread

* [PATCH 11/24] untracked cache: load from UNTR index extension
  2015-03-08 10:12 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
@ 2015-03-08 10:12 ` Nguyễn Thái Ngọc Duy
  0 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-03-08 10:12 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c        | 219 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |   2 +
 read-cache.c |   5 ++
 3 files changed, 226 insertions(+)

diff --git a/dir.c b/dir.c
index 8f0deb1..0b37c65 100644
--- a/dir.c
+++ b/dir.c
@@ -2279,3 +2279,222 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	strbuf_release(&wd.sb_stat);
 	strbuf_release(&wd.sb_sha1);
 }
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+	int i;
+	if (!ucd)
+		return;
+	for (i = 0; i < ucd->dirs_nr; i++)
+		free_untracked(ucd->dirs[i]);
+	for (i = 0; i < ucd->untracked_nr; i++)
+		free(ucd->untracked[i]);
+	free(ucd->untracked);
+	free(ucd->dirs);
+	free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+	if (uc)
+		free_untracked(uc->root);
+	free(uc);
+}
+
+struct read_data {
+	int index;
+	struct untracked_cache_dir **ucd;
+	struct ewah_bitmap *check_only;
+	struct ewah_bitmap *valid;
+	struct ewah_bitmap *sha1_valid;
+	const unsigned char *data;
+	const unsigned char *end;
+};
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+	to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+	to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+	to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+	to->sd_dev	  = get_be32(&from->sd_dev);
+	to->sd_ino	  = get_be32(&from->sd_ino);
+	to->sd_uid	  = get_be32(&from->sd_uid);
+	to->sd_gid	  = get_be32(&from->sd_gid);
+	to->sd_size	  = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+			struct read_data *rd)
+{
+	struct untracked_cache_dir ud, *untracked;
+	const unsigned char *next, *data = rd->data, *end = rd->end;
+	unsigned int value;
+	int i, len;
+
+	memset(&ud, 0, sizeof(ud));
+
+	next = data;
+	value = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.recurse	   = 1;
+	ud.untracked_alloc = value;
+	ud.untracked_nr	   = value;
+	if (ud.untracked_nr)
+		ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+	data = next;
+
+	next = data;
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+	data = next;
+
+	len = strlen((const char *)data);
+	next = data + len + 1;
+	if (next > rd->end)
+		return -1;
+	*untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+	memcpy(untracked, &ud, sizeof(ud));
+	memcpy(untracked->name, data, len + 1);
+	data = next;
+
+	for (i = 0; i < untracked->untracked_nr; i++) {
+		len = strlen((const char *)data);
+		next = data + len + 1;
+		if (next > rd->end)
+			return -1;
+		untracked->untracked[i] = xstrdup((const char*)data);
+		data = next;
+	}
+
+	rd->ucd[rd->index++] = untracked;
+	rd->data = data;
+
+	for (i = 0; i < untracked->dirs_nr; i++) {
+		len = read_one_dir(untracked->dirs + i, rd);
+		if (len < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void set_check_only(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	ud->check_only = 1;
+}
+
+static void read_stat(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + sizeof(struct stat_data) > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data);
+	rd->data += sizeof(struct stat_data);
+	ud->valid = 1;
+}
+
+static void read_sha1(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + 20 > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	hashcpy(ud->exclude_sha1, rd->data);
+	rd->data += 20;
+}
+
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+			   const struct stat_data *stat,
+			   const unsigned char *sha1)
+{
+	stat_data_from_disk(&sha1_stat->stat, stat);
+	hashcpy(sha1_stat->sha1, sha1);
+	sha1_stat->valid = 1;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+	const struct ondisk_untracked_cache *ouc;
+	struct untracked_cache *uc;
+	struct read_data rd;
+	const unsigned char *next = data, *end = (const unsigned char *)data + sz;
+	int len;
+
+	if (sz <= 1 || end[-1] != '\0')
+		return NULL;
+	end--;
+
+	ouc = (const struct ondisk_untracked_cache *)next;
+	if (next + ouc_size(0) > end)
+		return NULL;
+
+	uc = xcalloc(1, sizeof(*uc));
+	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+		       ouc->info_exclude_sha1);
+	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+		       ouc->excludes_file_sha1);
+	uc->dir_flags = get_be32(&ouc->dir_flags);
+	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+	/* NUL after exclude_per_dir is covered by sizeof(*ouc) */
+	next += ouc_size(strlen(ouc->exclude_per_dir));
+	if (next >= end)
+		goto done2;
+
+	len = decode_varint(&next);
+	if (next > end || len == 0)
+		goto done2;
+
+	rd.valid      = ewah_new();
+	rd.check_only = ewah_new();
+	rd.sha1_valid = ewah_new();
+	rd.data	      = next;
+	rd.end	      = end;
+	rd.index      = 0;
+	rd.ucd        = xmalloc(sizeof(*rd.ucd) * len);
+
+	if (read_one_dir(&uc->root, &rd) || rd.index != len)
+		goto done;
+
+	next = rd.data;
+	len = ewah_read_mmap(rd.valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.check_only, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.sha1_valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	ewah_each_bit(rd.check_only, set_check_only, &rd);
+	rd.data = next + len;
+	ewah_each_bit(rd.valid, read_stat, &rd);
+	ewah_each_bit(rd.sha1_valid, read_sha1, &rd);
+	next = rd.data;
+
+done:
+	free(rd.ucd);
+	ewah_free(rd.valid);
+	ewah_free(rd.check_only);
+	ewah_free(rd.sha1_valid);
+done2:
+	if (next != end) {
+		free_untracked_cache(uc);
+		uc = NULL;
+	}
+	return uc;
+}
diff --git a/dir.h b/dir.h
index dc3ee0b..40a679a 100644
--- a/dir.h
+++ b/dir.h
@@ -298,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void free_untracked_cache(struct untracked_cache *);
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index baf3057..3736a56 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1366,6 +1366,9 @@ static int read_index_extension(struct index_state *istate,
 		if (read_link_extension(istate, data, sz))
 			return -1;
 		break;
+	case CACHE_EXT_UNTRACKED:
+		istate->untracked = read_untracked_extension(data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1631,6 +1634,8 @@ int discard_index(struct index_state *istate)
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
 	discard_split_index(istate);
+	free_untracked_cache(istate->untracked);
+	istate->untracked = NULL;
 	return 0;
 }
 
-- 
2.3.0.rc1.137.g477eb31

^ permalink raw reply related	[flat|nested] 32+ messages in thread

* [PATCH 11/24] untracked cache: load from UNTR index extension
  2015-01-20 13:03 [PATCH 00/24] nd/untracked-cache update Nguyễn Thái Ngọc Duy
@ 2015-01-20 13:03 ` Nguyễn Thái Ngọc Duy
  0 siblings, 0 replies; 32+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2015-01-20 13:03 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c        | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |   2 +
 read-cache.c |   5 ++
 3 files changed, 227 insertions(+)

diff --git a/dir.c b/dir.c
index 1f2d701..e7d7df3 100644
--- a/dir.c
+++ b/dir.c
@@ -2275,3 +2275,223 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	strbuf_release(&wd.sb_stat);
 	strbuf_release(&wd.sb_sha1);
 }
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+	int i;
+	if (!ucd)
+		return;
+	for (i = 0; i < ucd->dirs_nr; i++)
+		free_untracked(ucd->dirs[i]);
+	for (i = 0; i < ucd->untracked_nr; i++)
+		free(ucd->untracked[i]);
+	free(ucd->untracked);
+	free(ucd->dirs);
+	free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+	if (uc)
+		free_untracked(uc->root);
+	free(uc);
+}
+
+struct read_data {
+	int index;
+	struct untracked_cache_dir **ucd;
+	struct ewah_bitmap *check_only;
+	struct ewah_bitmap *valid;
+	struct ewah_bitmap *sha1_valid;
+	const unsigned char *data;
+	const unsigned char *end;
+};
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+	to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+	to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+	to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+	to->sd_dev	  = get_be32(&from->sd_dev);
+	to->sd_ino	  = get_be32(&from->sd_ino);
+	to->sd_uid	  = get_be32(&from->sd_uid);
+	to->sd_gid	  = get_be32(&from->sd_gid);
+	to->sd_size	  = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+			struct read_data *rd)
+{
+#define NEXT(x) \
+	next = data + (x); \
+	if (next > rd->end) \
+		return -1;
+
+	struct untracked_cache_dir ud, *untracked;
+	const unsigned char *next, *data = rd->data, *end = rd->end;
+	unsigned int value;
+	int i, len;
+
+	memset(&ud, 0, sizeof(ud));
+
+	next = data;
+	value = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.recurse	   = 1;
+	ud.untracked_alloc = value;
+	ud.untracked_nr	   = value;
+	if (ud.untracked_nr)
+		ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+	data = next;
+
+	next = data;
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+	if (next > end)
+		return -1;
+	ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+	data = next;
+
+	len = strlen((const char *)data);
+	NEXT(len + 1);
+	*untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+	memcpy(untracked, &ud, sizeof(ud));
+	memcpy(untracked->name, data, len + 1);
+	data = next;
+
+	for (i = 0; i < untracked->untracked_nr; i++) {
+		len = strlen((const char *)data);
+		NEXT(len + 1);
+		untracked->untracked[i] = xstrdup((const char*)data);
+		data = next;
+	}
+
+	rd->ucd[rd->index++] = untracked;
+	rd->data = data;
+
+	for (i = 0; i < untracked->dirs_nr; i++) {
+		len = read_one_dir(untracked->dirs + i, rd);
+		if (len < 0)
+			return -1;
+	}
+	return 0;
+}
+
+static void set_check_only(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	ud->check_only = 1;
+}
+
+static void read_stat(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + sizeof(struct stat_data) > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	stat_data_from_disk(&ud->stat_data, (struct stat_data *)rd->data);
+	rd->data += sizeof(struct stat_data);
+	ud->valid = 1;
+}
+
+static void read_sha1(size_t pos, void *cb)
+{
+	struct read_data *rd = cb;
+	struct untracked_cache_dir *ud = rd->ucd[pos];
+	if (rd->data + 20 > rd->end) {
+		rd->data = rd->end + 1;
+		return;
+	}
+	hashcpy(ud->exclude_sha1, rd->data);
+	rd->data += 20;
+}
+
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+			   const struct stat_data *stat,
+			   const unsigned char *sha1)
+{
+	stat_data_from_disk(&sha1_stat->stat, stat);
+	hashcpy(sha1_stat->sha1, sha1);
+	sha1_stat->valid = 1;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+	const struct ondisk_untracked_cache *ouc;
+	struct untracked_cache *uc;
+	struct read_data rd;
+	const unsigned char *next = data, *end = (const unsigned char *)data + sz;
+	int len;
+
+	if (sz <= 1 || end[-1] != '\0')
+		return NULL;
+	end--;
+
+	ouc = (const struct ondisk_untracked_cache *)next;
+	if (next + ouc_size(0) > end)
+		return NULL;
+
+	uc = xcalloc(1, sizeof(*uc));
+	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+		       ouc->info_exclude_sha1);
+	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+		       ouc->excludes_file_sha1);
+	uc->dir_flags = get_be32(&ouc->dir_flags);
+	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+	/* NUL after exclude_per_dir is covered by sizeof(*ouc) */
+	next += ouc_size(strlen(ouc->exclude_per_dir));
+	if (next >= end)
+		goto done2;
+
+	len = decode_varint(&next);
+	if (next > end || len == 0)
+		goto done2;
+
+	rd.valid      = ewah_new();
+	rd.check_only = ewah_new();
+	rd.sha1_valid = ewah_new();
+	rd.data	      = next;
+	rd.end	      = end;
+	rd.index      = 0;
+	rd.ucd        = xmalloc(sizeof(*rd.ucd) * len);
+
+	if (read_one_dir(&uc->root, &rd) || rd.index != len)
+		goto done;
+
+	next = rd.data;
+	len = ewah_read_mmap(rd.valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.check_only, next, end - next);
+	if (len < 0)
+		goto done;
+
+	next += len;
+	len = ewah_read_mmap(rd.sha1_valid, next, end - next);
+	if (len < 0)
+		goto done;
+
+	ewah_each_bit(rd.check_only, set_check_only, &rd);
+	rd.data = next + len;
+	ewah_each_bit(rd.valid, read_stat, &rd);
+	ewah_each_bit(rd.sha1_valid, read_sha1, &rd);
+	next = rd.data;
+
+done:
+	free(rd.ucd);
+	ewah_free(rd.valid);
+	ewah_free(rd.check_only);
+	ewah_free(rd.sha1_valid);
+done2:
+	if (next != end) {
+		free_untracked_cache(uc);
+		uc = NULL;
+	}
+	return uc;
+}
diff --git a/dir.h b/dir.h
index dc3ee0b..40a679a 100644
--- a/dir.h
+++ b/dir.h
@@ -298,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void free_untracked_cache(struct untracked_cache *);
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index baf3057..3736a56 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1366,6 +1366,9 @@ static int read_index_extension(struct index_state *istate,
 		if (read_link_extension(istate, data, sz))
 			return -1;
 		break;
+	case CACHE_EXT_UNTRACKED:
+		istate->untracked = read_untracked_extension(data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1631,6 +1634,8 @@ int discard_index(struct index_state *istate)
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
 	discard_split_index(istate);
+	free_untracked_cache(istate->untracked);
+	istate->untracked = NULL;
 	return 0;
 }
 
-- 
2.2.0.84.ge9c7a8a

^ permalink raw reply related	[flat|nested] 32+ messages in thread

end of thread, other threads:[~2015-03-08 10:14 UTC | newest]

Thread overview: 32+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-02-08  8:55 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 01/24] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2015-02-11 21:23   ` Junio C Hamano
2015-02-16  9:45     ` Duy Nguyen
2015-02-16 21:59       ` Junio C Hamano
2015-02-08  8:55 ` [PATCH 02/24] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 03/24] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 04/24] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 05/24] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 06/24] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 07/24] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 08/24] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 09/24] ewah: add convenient wrapper ewah_serialize_strbuf() Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 10/24] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2015-03-07 19:08   ` Stefan Beller
2015-02-08  8:55 ` [PATCH 11/24] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
2015-02-09 22:26   ` Junio C Hamano
2015-02-08  8:55 ` [PATCH 12/24] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 13/24] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 14/24] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 15/24] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 16/24] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 17/24] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 18/24] status: enable untracked cache Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 19/24] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 20/24] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 21/24] t7063: tests for " Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 22/24] mingw32: add uname() Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 23/24] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
2015-02-08  8:55 ` [PATCH 24/24] git-status.txt: advertisement for untracked cache Nguyễn Thái Ngọc Duy
  -- strict thread matches above, loose matches on Subject: below --
2015-03-08 10:12 [PATCH 00/24] nd/untracked-cache updates Nguyễn Thái Ngọc Duy
2015-03-08 10:12 ` [PATCH 11/24] untracked cache: load from UNTR index extension Nguyễn Thái Ngọc Duy
2015-01-20 13:03 [PATCH 00/24] nd/untracked-cache update Nguyễn Thái Ngọc Duy
2015-01-20 13:03 ` [PATCH 11/24] untracked cache: load from UNTR index extension Nguyễn Thái Ngọc Duy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.