All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 00/19] Untracked cache to speed up "git status"
@ 2014-10-27 12:10 Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
                   ` (19 more replies)
  0 siblings, 20 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

The last post was five months ago [1]. It's probably time for a resend
in case I'm hit by a bus. Numbers are in 17/19, saving about 40% time
on "git status". Details are in 02/19 and 06/19. Still on the table:

 - index-helper series [2] probably helps save about 10-15% in total
 - watchman support to lower the numbers even more, but not for Windows

I still need to see how watchman can be made on top of this.

[1] http://thread.gmane.org/gmane.comp.version-control.git/248306
[2] http://thread.gmane.org/gmane.comp.version-control.git/254314/focus=254318
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 22:46   ` Junio C Hamano
  2014-10-28 17:37   ` Torsten Bögershausen
  2014-10-27 12:10 ` [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
                   ` (18 subsequent siblings)
  19 siblings, 2 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This is not used anywhere yet. But the goal is to compare quickly if a
.gitignore file has changed when we have the SHA-1 of both old (cached
somewhere) and new (from index or a tree) versions.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
 dir.h |  5 +++++
 2 files changed, 48 insertions(+), 7 deletions(-)

diff --git a/dir.c b/dir.c
index bd274a7..33a35c1 100644
--- a/dir.c
+++ b/dir.c
@@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base,
 	x->el = el;
 }
 
-static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
+static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
+						struct sha1_stat *ss)
 {
 	int pos, len;
 	unsigned long sz;
@@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 		return NULL;
 	}
 	*size = xsize_t(sz);
+	if (ss) {
+		memset(&ss->stat, 0, sizeof(ss->stat));
+		hashcpy(ss->sha1, active_cache[pos]->sha1);
+	}
 	return data;
 }
 
@@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf)
 		*last_space = '\0';
 }
 
-int add_excludes_from_file_to_list(const char *fname,
-				   const char *base,
-				   int baselen,
-				   struct exclude_list *el,
-				   int check_index)
+/*
+ * Given a file with name "fname", read it (either from disk, or from
+ * the index if "check_index" is non-zero), parse it and store the
+ * exclude rules in "el".
+ *
+ * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
+ * stat data from disk (only valid if add_excludes returns zero). If
+ * ss_valid is non-zero, "ss" must contain good value as input.
+ */
+static int add_excludes(const char *fname, const char *base, int baselen,
+			struct exclude_list *el, int check_index,
+			struct sha1_stat *ss, int ss_valid)
 {
 	struct stat st;
 	int fd, i, lineno = 1;
@@ -547,7 +559,7 @@ int add_excludes_from_file_to_list(const char *fname,
 		if (0 <= fd)
 			close(fd);
 		if (!check_index ||
-		    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
+		    (buf = read_skip_worktree_file_from_index(fname, &size, ss)) == NULL)
 			return -1;
 		if (size == 0) {
 			free(buf);
@@ -560,6 +572,10 @@ int add_excludes_from_file_to_list(const char *fname,
 	} else {
 		size = xsize_t(st.st_size);
 		if (size == 0) {
+			if (ss) {
+				fill_stat_data(&ss->stat, &st);
+				hashcpy(ss->sha1, EMPTY_BLOB_SHA1_BIN);
+			}
 			close(fd);
 			return 0;
 		}
@@ -571,6 +587,19 @@ int add_excludes_from_file_to_list(const char *fname,
 		}
 		buf[size++] = '\n';
 		close(fd);
+		if (ss) {
+			int pos;
+			if (ss_valid && !match_stat_data(&ss->stat, &st))
+				; /* no content change, ss->sha1 still good */
+			else if (check_index &&
+				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
+				 !ce_stage(active_cache[pos]) &&
+				 ce_uptodate(active_cache[pos]))
+				hashcpy(ss->sha1, active_cache[pos]->sha1);
+			else
+				hash_sha1_file(buf, size, "blob", ss->sha1);
+			fill_stat_data(&ss->stat, &st);
+		}
 	}
 
 	el->filebuf = buf;
@@ -589,6 +618,13 @@ int add_excludes_from_file_to_list(const char *fname,
 	return 0;
 }
 
+int add_excludes_from_file_to_list(const char *fname, const char *base,
+				   int baselen, struct exclude_list *el,
+				   int check_index)
+{
+	return add_excludes(fname, base, baselen, el, check_index, NULL, 0);
+}
+
 struct exclude_list *add_exclude_list(struct dir_struct *dir,
 				      int group_type, const char *src)
 {
diff --git a/dir.h b/dir.h
index 6c45e9d..032d197 100644
--- a/dir.h
+++ b/dir.h
@@ -73,6 +73,11 @@ struct exclude_list_group {
 	struct exclude_list *el;
 };
 
+struct sha1_stat {
+	struct stat_data stat;
+	unsigned char sha1[20];
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-28 17:37   ` Torsten Bögershausen
  2014-10-27 12:10 ` [PATCH 03/19] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
                   ` (17 subsequent siblings)
  19 siblings, 1 reply; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

The idea is if we can capture all input and (non-rescursive) output of
read_directory_recursive(), and can verify later that all the input is
the same, then the second r_d_r() should produce the same output as in
the first run.

The requirement for this to work is stat info of a directory MUST
change if an entry is added to or removed from that directory (and
should not change often otherwise). If your OS and filesytem do not
meet this requirement, untracked cache is not for you. Most file
systems on *nix should be fine. On Windows, NTFS is fine while FAT may
be not [1] even though FAT on Linux seems to be fine.

The list of input of r_d_r() is in the big comment block in dir.h. In
short, the output of a directory (not counting subdirs) mainly depends
on stat info of the directory in question, all .gitignore leading to
it and the check_only flag when r_d_r() is called recursively. This
patch records all this info (and the output) as r_d_r() runs.

Two hash_sha1_file() are required for $GIT_DIR/info/exclude and
core.excludesfile unless their stat data matches. hash_sha1_file() is
only needed when .gitignore files in the worktree are modified,
otherwise their SHA-1 in index is used (see the previous patch).

We could store stat data for .gitignore files so we don't have to
rehash them if their content is different from index, but I think
.gitignore files are rarely modified, so not worth extra cache data
(and hashing penalty read-cache.c:verify_hdr(), as we will be storing
this as an index extension).

The implication is, if you change .gitignore, you better add it to the
index soon or you lose all the benefit of untracked cache because a
modified .gitignore invalidates all subdirs recursively. This is
especially bad for .gitignore at root.

This cached output is about untracked files only, not ignored files
because the number of tracked files is usually small, so small cache
overhead, while the number of ignored files could go really high
(e.g. *.o files mixing with source code).

[1] "Description of NTFS date and time stamps for files and folders"
    http://support.microsoft.com/kb/299648

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
 dir.h |  60 +++++++++++++++++++++++++++
 2 files changed, 189 insertions(+), 21 deletions(-)

diff --git a/dir.c b/dir.c
index 33a35c1..25d8c5d 100644
--- a/dir.c
+++ b/dir.c
@@ -32,7 +32,7 @@ enum path_treatment {
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
-	const char *path, int len,
+	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
@@ -535,6 +535,53 @@ static void trim_trailing_spaces(char *buf)
 }
 
 /*
+ * Given a subdirectory name and "dir" of the current directory,
+ * search the subdir in "dir" and return it, or create a new one if it
+ * does not exist in "dir".
+ *
+ * If "name" has the trailing slash, it'll be excluded in the search.
+ */
+static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+						    struct untracked_cache_dir *dir,
+						    const char *name, int len)
+{
+	int first, last;
+	struct untracked_cache_dir *d;
+	if (!dir)
+		return NULL;
+	if (len && name[len - 1] == '/')
+		len--;
+	first = 0;
+	last = dir->dirs_nr;
+	while (last > first) {
+		int cmp, next = (last + first) >> 1;
+		d = dir->dirs[next];
+		cmp = strncmp(name, d->name, len);
+		if (!cmp && strlen(d->name) > len)
+			cmp = -1;
+		if (!cmp)
+			return d;
+		if (cmp < 0) {
+			last = next;
+			continue;
+		}
+		first = next+1;
+	}
+
+	uc->dir_created++;
+	d = xmalloc(sizeof(*d) + len);
+	memset(d, 0, sizeof(*d) + len);
+	memcpy(d->name, name, len);
+
+	ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
+	memmove(dir->dirs + first + 1, dir->dirs + first,
+		(dir->dirs_nr - first) * sizeof(*dir->dirs));
+	dir->dirs_nr++;
+	dir->dirs[first] = d;
+	return d;
+}
+
+/*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
  * exclude rules in "el".
@@ -642,14 +689,20 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
 /*
  * Used to set up core.excludesfile and .git/info/exclude lists.
  */
-void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
+				     struct sha1_stat *ss, int ss_valid)
 {
 	struct exclude_list *el;
 	el = add_exclude_list(dir, EXC_FILE, fname);
-	if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0)
+	if (add_excludes(fname, "", 0, el, 0, ss, ss_valid) < 0)
 		die("cannot use %s as an exclude file", fname);
 }
 
+void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+{
+	add_excludes_from_file_1(dir, fname, NULL, 0);
+}
+
 int match_basename(const char *basename, int basenamelen,
 		   const char *pattern, int prefix, int patternlen,
 		   int flags)
@@ -824,6 +877,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	struct exclude_list_group *group;
 	struct exclude_list *el;
 	struct exclude_stack *stk = NULL;
+	struct untracked_cache_dir *untracked;
 	int current;
 
 	group = &dir->exclude_list_group[EXC_DIRS];
@@ -861,9 +915,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	/* Read from the parent directories and push them down. */
 	current = stk ? stk->baselen : -1;
 	strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
+	if (dir->untracked)
+		untracked = stk ? stk->ucd : dir->untracked->root;
+	else
+		untracked = NULL;
+
 	while (current < baselen) {
 		struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 		const char *cp;
+		struct sha1_stat ss;
 
 		if (current < 0) {
 			cp = base;
@@ -873,10 +933,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			if (!cp)
 				die("oops in prep_exclude");
 			cp++;
+			untracked =
+				lookup_untracked(dir->untracked, untracked,
+						 base + current,
+						 cp - base - current);
 		}
 		stk->prev = dir->exclude_stack;
 		stk->baselen = cp - base;
 		stk->exclude_ix = group->nr;
+		stk->ucd = untracked;
 		el = add_exclude_list(dir, EXC_DIRS, NULL);
 		strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
 		assert(stk->baselen == dir->basebuf.len);
@@ -899,6 +964,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		}
 
 		/* Try to read per-directory file */
+		hashclr(ss.sha1);
 		if (dir->exclude_per_dir) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
@@ -912,8 +978,11 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			strbuf_addbuf(&sb, &dir->basebuf);
 			strbuf_addstr(&sb, dir->exclude_per_dir);
 			el->src = strbuf_detach(&sb, NULL);
-			add_excludes_from_file_to_list(el->src, el->src,
-						       stk->baselen, el, 1);
+			add_excludes(el->src, el->src, stk->baselen, el, 1,
+				     untracked ? &ss : NULL, 0);
+		}
+		if (untracked) {
+			hashcpy(untracked->exclude_sha1, ss.sha1);
 		}
 		dir->exclude_stack = stk;
 		current = stk->baselen;
@@ -1094,6 +1163,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
  *  (c) otherwise, we recurse into it.
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
+	struct untracked_cache_dir *untracked,
 	const char *dirname, int len, int exclude,
 	const struct path_simplify *simplify)
 {
@@ -1121,7 +1191,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return exclude ? path_excluded : path_untracked;
 
-	return read_directory_recursive(dir, dirname, len, 1, simplify);
+	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+	return read_directory_recursive(dir, dirname, len,
+					untracked, 1, simplify);
 }
 
 /*
@@ -1237,6 +1309,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
 }
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
+					  struct untracked_cache_dir *untracked,
 					  struct strbuf *path,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
@@ -1289,7 +1362,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 		return path_none;
 	case DT_DIR:
 		strbuf_addch(path, '/');
-		return treat_directory(dir, path->buf, path->len, exclude,
+		return treat_directory(dir, untracked, path->buf, path->len, exclude,
 			simplify);
 	case DT_REG:
 	case DT_LNK:
@@ -1298,6 +1371,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 }
 
 static enum path_treatment treat_path(struct dir_struct *dir,
+				      struct untracked_cache_dir *untracked,
 				      struct dirent *de,
 				      struct strbuf *path,
 				      int baselen,
@@ -1313,7 +1387,16 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_none;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, path, simplify, dtype, de);
+	return treat_one_path(dir, untracked, path, simplify, dtype, de);
+}
+
+static void add_untracked(struct untracked_cache_dir *dir, const char *name)
+{
+	if (!dir)
+		return;
+	ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
+		   dir->untracked_alloc);
+	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
 /*
@@ -1329,7 +1412,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    const char *base, int baselen,
-				    int check_only,
+				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
 	DIR *fdir;
@@ -1343,24 +1426,36 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	if (!fdir)
 		goto out;
 
+	if (untracked)
+		untracked->check_only = !!check_only;
+
 	while ((de = readdir(fdir)) != NULL) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+
 		if (state > dir_state)
 			dir_state = state;
 
 		/* recurse into subdir if instructed by treat_path */
 		if (state == path_recurse) {
-			subdir_state = read_directory_recursive(dir, path.buf,
-				path.len, check_only, simplify);
+			struct untracked_cache_dir *ud;
+			ud = lookup_untracked(dir->untracked, untracked,
+					      path.buf + baselen,
+					      path.len - baselen);
+			subdir_state =
+				read_directory_recursive(dir, path.buf, path.len,
+							 ud, check_only, simplify);
 			if (subdir_state > dir_state)
 				dir_state = subdir_state;
 		}
 
 		if (check_only) {
 			/* abort early if maximum state has been reached */
-			if (dir_state == path_untracked)
+			if (dir_state == path_untracked) {
+				if (untracked)
+					add_untracked(untracked, path.buf + baselen);
 				break;
+			}
 			/* skip the dir_add_* part */
 			continue;
 		}
@@ -1378,8 +1473,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 
 		case path_untracked:
-			if (!(dir->flags & DIR_SHOW_IGNORED))
-				dir_add_name(dir, path.buf, path.len);
+			if (dir->flags & DIR_SHOW_IGNORED)
+				break;
+			dir_add_name(dir, path.buf, path.len);
+			if (untracked)
+				add_untracked(untracked, path.buf + baselen);
 			break;
 
 		default:
@@ -1456,7 +1554,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, NULL, &sb, simplify,
 				   DT_DIR, NULL) == path_none)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1496,7 +1594,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len, 0, simplify);
+		read_directory_recursive(dir, path, len,
+					 dir->untracked ? dir->untracked->root : NULL,
+					 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
@@ -1666,10 +1766,18 @@ void setup_standard_excludes(struct dir_struct *dir)
 		home_config_paths(NULL, &xdg_path, "ignore");
 		excludes_file = xdg_path;
 	}
-	if (!access_or_warn(path, R_OK, 0))
-		add_excludes_from_file(dir, path);
-	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
-		add_excludes_from_file(dir, excludes_file);
+	if (!access_or_warn(path, R_OK, 0)) {
+		struct sha1_stat *ss = NULL;
+		if (dir->untracked)
+			ss = &dir->ss_info_exclude;
+		add_excludes_from_file_1(dir, path, ss, 0);
+	}
+	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0)) {
+		struct sha1_stat *ss = NULL;
+		if (dir->untracked)
+			ss = &dir->ss_excludes_file;
+		add_excludes_from_file_1(dir, excludes_file, ss, 0);
+	}
 }
 
 int remove_path(const char *name)
diff --git a/dir.h b/dir.h
index 032d197..f282501 100644
--- a/dir.h
+++ b/dir.h
@@ -66,6 +66,7 @@ struct exclude_stack {
 	struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
 	int baselen;
 	int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
+	struct untracked_cache_dir *ucd;
 };
 
 struct exclude_list_group {
@@ -78,6 +79,60 @@ struct sha1_stat {
 	unsigned char sha1[20];
 };
 
+/*
+ *  Untracked cache
+ *
+ *  The following inputs are sufficient to determine what files in a
+ *  directory are excluded:
+ *
+ *   - The list of files and directories of the direction in question
+ *   - The $GIT_DIR/index
+ *   - dir_struct flags
+ *   - The content of $GIT_DIR/info/exclude
+ *   - The content of core.excludesfile
+ *   - The content (or the lack) of .gitignore of all parent directories
+ *     from $GIT_WORK_TREE
+ *   - The check_only flag in read_directory_recursive (for
+ *     DIR_HIDE_EMPTY_DIRECTORIES)
+ *
+ *  The first input can be checked using directory mtime. In many
+ *  filesystems, directory mtime (stat_data field) is updated when its
+ *  files or direct subdirs are added or removed.
+ *
+ *  The second one can be hooked from cache_tree_invalidate_path().
+ *  Whenever a file (or a submodule) is added or removed from a
+ *  directory, we invalidate that directory.
+ *
+ *  The remaining inputs are easy, their SHA-1 could be used to verify
+ *  their contents (exclude_sha1[], info_exclude_sha1[] and
+ *  excludes_file_sha1[])
+ */
+struct untracked_cache_dir {
+	struct untracked_cache_dir **dirs;
+	char **untracked;
+	/* null SHA-1 means this directory does not have .gitignore */
+	unsigned char exclude_sha1[20];
+	struct stat_data stat_data;
+	unsigned int check_only : 1;
+	unsigned int untracked_nr : 29;
+	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+	char name[1];
+};
+
+struct untracked_cache {
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
+	const char *exclude_per_dir;
+	/*
+	 * dir_struct#flags must match dir_flags or the untracked
+	 * cache is ignored.
+	 */
+	unsigned dir_flags;
+	struct untracked_cache_dir *root;
+	/* Statistics */
+	int dir_created;
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
@@ -125,6 +180,11 @@ struct dir_struct {
 	struct exclude_stack *exclude_stack;
 	struct exclude *exclude;
 	struct strbuf basebuf;
+
+	/* Enable untracked file cache if set */
+	struct untracked_cache *untracked;
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
 };
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 03/19] untracked cache: initial untracked cache validation
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 04/19] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
                   ` (16 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Make sure the starting conditions and all global exclude files are
good to go. If not, either disable untracked cache completely, or wipe
out the cache and start fresh.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   4 +++
 2 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 25d8c5d..5161f74 100644
--- a/dir.c
+++ b/dir.c
@@ -581,6 +581,22 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 	return d;
 }
 
+static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
+{
+	int i;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		do_invalidate_gitignore(dir->dirs[i]);
+}
+
+static void invalidate_gitignore(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->gitignore_invalidated++;
+	do_invalidate_gitignore(dir);
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -693,6 +709,13 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 				     struct sha1_stat *ss, int ss_valid)
 {
 	struct exclude_list *el;
+	/*
+	 * catch setup_standard_excludes() that's called before
+	 * dir->untracked is assigned. That function behaves
+	 * differently when dir->untracked is non-NULL.
+	 */
+	if (!dir->untracked)
+		dir->unmanaged_exclude_files++;
 	el = add_exclude_list(dir, EXC_FILE, fname);
 	if (add_excludes(fname, "", 0, el, 0, ss, ss_valid) < 0)
 		die("cannot use %s as an exclude file", fname);
@@ -700,6 +723,7 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 
 void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 {
+	dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
 	add_excludes_from_file_1(dir, fname, NULL, 0);
 }
 
@@ -1567,9 +1591,87 @@ static int treat_leading_path(struct dir_struct *dir,
 	return rc;
 }
 
+static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
+						      int base_len,
+						      const struct pathspec *pathspec)
+{
+	struct untracked_cache_dir *root;
+
+	if (!dir->untracked)
+		return NULL;
+
+	/*
+	 * We only support $GIT_DIR/info/exclude and core.excludesfile
+	 * as the global ignore rule files. Any other additions
+	 * (e.g. from command line) invalidate the cache. This
+	 * condition also catches running setup_standard_excludes()
+	 * before setting dir->untracked!
+	 */
+	if (dir->unmanaged_exclude_files)
+		return NULL;
+
+	/*
+	 * Optimize for the main use case only: whole-tree git
+	 * status. More work involved in treat_leading_path() if we
+	 * use cache on just a subset of the worktree. pathspec
+	 * support could make the matter even worse.
+	 */
+	if (base_len || (pathspec && pathspec->nr))
+		return NULL;
+
+	/* Different set of flags may produce different results */
+	if (dir->flags != dir->untracked->dir_flags ||
+	    /*
+	     * See treat_directory(), case index_nonexistent. Without
+	     * this flag, we may need to also cache .git file content
+	     * for the resolve_gitlink_ref() call, which we don't.
+	     */
+	    !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) ||
+	    /* We don't support collecting ignore files */
+	    (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO |
+			   DIR_COLLECT_IGNORED)))
+		return NULL;
+
+	/*
+	 * If we use .gitignore in the cache and now you change it to
+	 * .gitexclude, everything will go wrong.
+	 */
+	if (dir->exclude_per_dir != dir->untracked->exclude_per_dir &&
+	    strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir))
+		return NULL;
+
+	/*
+	 * EXC_CMDL is not considered in the cache. If people set it,
+	 * skip the cache.
+	 */
+	if (dir->exclude_list_group[EXC_CMDL].nr)
+		return NULL;
+
+	if (!dir->untracked->root) {
+		const int len = sizeof(*dir->untracked->root);
+		dir->untracked->root = xmalloc(len);
+		memset(dir->untracked->root, 0, len);
+	}
+
+	/* Validate $GIT_DIR/info/exclude and core.excludesfile */
+	root = dir->untracked->root;
+	if (hashcmp(dir->ss_info_exclude.sha1,
+		    dir->untracked->ss_info_exclude.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_info_exclude = dir->ss_info_exclude;
+	}
+	if (hashcmp(dir->ss_excludes_file.sha1,
+		    dir->untracked->ss_excludes_file.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
+	}
+	return root;
+}
+
 int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
 {
 	struct path_simplify *simplify;
+	struct untracked_cache_dir *untracked;
 
 	/*
 	 * Check out create_simplify()
@@ -1593,10 +1695,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 * create_simplify().
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
+	untracked = validate_untracked_cache(dir, len, pathspec);
+	if (!untracked)
+		/*
+		 * make sure untracked cache code path is disabled,
+		 * e.g. prep_exclude()
+		 */
+		dir->untracked = NULL;
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len,
-					 dir->untracked ? dir->untracked->root : NULL,
-					 0, simplify);
+		read_directory_recursive(dir, path, len, untracked, 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
diff --git a/dir.h b/dir.h
index f282501..35701b2 100644
--- a/dir.h
+++ b/dir.h
@@ -114,6 +114,8 @@ struct untracked_cache_dir {
 	unsigned char exclude_sha1[20];
 	struct stat_data stat_data;
 	unsigned int check_only : 1;
+	/* all data in this struct are good */
+	unsigned int valid : 1;
 	unsigned int untracked_nr : 29;
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
 	char name[1];
@@ -131,6 +133,7 @@ struct untracked_cache {
 	struct untracked_cache_dir *root;
 	/* Statistics */
 	int dir_created;
+	int gitignore_invalidated;
 };
 
 struct dir_struct {
@@ -185,6 +188,7 @@ struct dir_struct {
 	struct untracked_cache *untracked;
 	struct sha1_stat ss_info_exclude;
 	struct sha1_stat ss_excludes_file;
+	unsigned unmanaged_exclude_files;
 };
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 04/19] untracked cache: invalidate dirs recursively if .gitignore changes
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 03/19] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 05/19] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
                   ` (15 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

It's easy to see that if an existing .gitignore changes, its SHA-1
would be different and invalidate_gitignore() is called.

If .gitignore is removed, add_excludes() will treat it like an empty
.gitignore, which again should invalidate the cached directory data.

if .gitignore is added, lookup_untracked() already fills initial
.gitignore SHA-1 as "empty file", so again invalidate_gitignore() is
called.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 5161f74..a0a0fa8 100644
--- a/dir.c
+++ b/dir.c
@@ -1005,7 +1005,22 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			add_excludes(el->src, el->src, stk->baselen, el, 1,
 				     untracked ? &ss : NULL, 0);
 		}
-		if (untracked) {
+		/*
+		 * NEEDSWORK: when untracked cache is enabled, prep_exclude()
+		 * will first be called in valid_cached_dir() then maybe many
+		 * times more in last_exclude_matching(). When the cache is
+		 * used, last_exclude_matching() will not be called and
+		 * reading .gitignore content will be a waste.
+		 *
+		 * So when it's called by valid_cached_dir() and we can get
+		 * .gitignore SHA-1 from the index (i.e. .gitignore is not
+		 * modified on work tree), we could delay reading the
+		 * .gitignore content until we absolutely need it in
+		 * last_exclude_matching(). Be careful about ignore rule
+		 * order, though, if you do that.
+		 */
+		if (untracked && hashcmp(ss.sha1, untracked->exclude_sha1)) {
+			invalidate_gitignore(dir->untracked, untracked);
 			hashcpy(untracked->exclude_sha1, ss.sha1);
 		}
 		dir->exclude_stack = stk;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 05/19] untracked cache: make a wrapper around {open,read,close}dir()
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (3 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 04/19] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
                   ` (14 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This allows us to feed different info to read_directory_recursive()
based on untracked cache in the next patch.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/dir.c b/dir.c
index a0a0fa8..2793e57 100644
--- a/dir.c
+++ b/dir.c
@@ -31,6 +31,15 @@ enum path_treatment {
 	path_untracked
 };
 
+/*
+ * Support data structure for our opendir/readdir/closedir wrappers
+ */
+struct cached_dir {
+	DIR *fdir;
+	struct untracked_cache_dir *untracked;
+	struct dirent *de;
+};
+
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
@@ -1411,12 +1420,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
-				      struct dirent *de,
+				      struct cached_dir *cdir,
 				      struct strbuf *path,
 				      int baselen,
 				      const struct path_simplify *simplify)
 {
 	int dtype;
+	struct dirent *de = cdir->de;
 
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
@@ -1438,6 +1448,37 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int open_cached_dir(struct cached_dir *cdir,
+			   struct dir_struct *dir,
+			   struct untracked_cache_dir *untracked,
+			   struct strbuf *path,
+			   int check_only)
+{
+	memset(cdir, 0, sizeof(*cdir));
+	cdir->untracked = untracked;
+	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (!cdir->fdir)
+		return -1;
+	return 0;
+}
+
+int read_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir) {
+		cdir->de = readdir(cdir->fdir);
+		if (!cdir->de)
+			return -1;
+		return 0;
+	}
+	return -1;
+}
+
+static void close_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir)
+		closedir(cdir->fdir);
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1454,23 +1495,21 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
-	DIR *fdir;
+	struct cached_dir cdir;
 	enum path_treatment state, subdir_state, dir_state = path_none;
-	struct dirent *de;
 	struct strbuf path = STRBUF_INIT;
 
 	strbuf_add(&path, base, baselen);
 
-	fdir = opendir(path.len ? path.buf : ".");
-	if (!fdir)
+	if (open_cached_dir(&cdir, dir, untracked, &path, check_only))
 		goto out;
 
 	if (untracked)
 		untracked->check_only = !!check_only;
 
-	while ((de = readdir(fdir)) != NULL) {
+	while (!read_cached_dir(&cdir)) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
 
 		if (state > dir_state)
 			dir_state = state;
@@ -1523,7 +1562,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 		}
 	}
-	closedir(fdir);
+	close_cached_dir(&cdir);
  out:
 	strbuf_release(&path);
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (4 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 05/19] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-30 16:19   ` Eric Sunshine
  2014-10-27 12:10 ` [PATCH 07/19] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
                   ` (13 subsequent siblings)
  19 siblings, 1 reply; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

The main readdir loop in read_directory_recursive() is replaced with a
new one that checks if cached results of a directory is still valid.

If a file is added or removed from the index, the containing directory
is invalidated (but not its subdirs). If directory's mtime is changed,
the same happens. If a .gitignore is updated, the containing directory
and all subdirs are invalidated recursively. If dir_struct#flags or
other conditions change, the cache is ignored.

If a directory is invalidated, we opendir/readdir/closedir and run the
exclude machinery on that directory listing as usual. If untracked
cache is also enabled, we'll update the cache along the way. If a
directory is validated, we simply pull the untracked listing out from
the cache. The cache also records the list of direct subdirs that we
have to recurse in. Fully excluded directories are seen as "untracked
files".

In the best case when no dirs are invalidated, read_directory()
becomes a series of

  stat(dir), open(.gitignore), fstat(), read(), close() and optionally
  hash_sha1_file()

For comparison, standard read_directory() is a sequence of

  opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the
  expensive last_exclude_matching() and closedir().

We already try not to open(.gitignore) if we know it does not exist,
so open/fstat/read/close sequence does not apply to every
directory. The sequence could be reduced further, as noted in
prep_exclude() in another patch. So in theory, the entire best-case
read_directory sequence could be reduced to a series of stat() and
nothing else.

This is not a silver bullet approach. When you compile a C file, for
example, the old .o file is removed and a new one with the same name
created, effectively invalidating the containing directory's cache
(but not its subdirectories). If your build process touches every
directory, this cache adds extra overhead for nothing, so it's a good
idea to separate generated files from tracked files.. Editors may use
the same strategy for saving files. And of course you're out of luck
running your repo on an unsupported filesytem and/or operating system.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   2 ++
 2 files changed, 123 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index 2793e57..55780a7 100644
--- a/dir.c
+++ b/dir.c
@@ -37,7 +37,12 @@ enum path_treatment {
 struct cached_dir {
 	DIR *fdir;
 	struct untracked_cache_dir *untracked;
+	int nr_files;
+	int nr_dirs;
+
 	struct dirent *de;
+	const char *file;
+	struct untracked_cache_dir *ucd;
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
@@ -606,6 +611,14 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 	do_invalidate_gitignore(dir);
 }
 
+static void invalidate_directory(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->dir_invalidated++;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -1418,6 +1431,41 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	}
 }
 
+static enum path_treatment treat_path_fast(struct dir_struct *dir,
+					   struct untracked_cache_dir *untracked,
+					   struct cached_dir *cdir,
+					   struct strbuf *path,
+					   int baselen,
+					   const struct path_simplify *simplify)
+{
+	if (!cdir->ucd) {
+		strbuf_setlen(path, baselen);
+		strbuf_addstr(path, cdir->file);
+		return path_untracked;
+	}
+	strbuf_setlen(path, baselen);
+	strbuf_addstr(path, cdir->ucd->name);
+	/* treat_one_path() does this before it calls treat_directory() */
+	if (path->buf[path->len - 1] != '/')
+		strbuf_addch(path, '/');
+	if (cdir->ucd->check_only)
+		/*
+		 * check_only is set as a result of treat_directory() getting
+		 * to its bottom. Verify again the same set of directories
+		 * with check_only set.
+		 */
+		return read_directory_recursive(dir, path->buf, path->len,
+
+						cdir->ucd, 1, simplify);
+	/*
+	 * We get path_recurse in the first run when
+	 * directory_exists_in_index() returns index_nonexistent. We
+	 * are sure that new changes in the index does not impact the
+	 * outcome. Return now.
+	 */
+	return path_recurse;
+}
+
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
 				      struct cached_dir *cdir,
@@ -1428,6 +1476,9 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 	int dtype;
 	struct dirent *de = cdir->de;
 
+	if (!de)
+		return treat_path_fast(dir, untracked, cdir, path,
+				       baselen, simplify);
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
@@ -1448,6 +1499,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int valid_cached_dir(struct dir_struct *dir,
+			    struct untracked_cache_dir *untracked,
+			    struct strbuf *path,
+			    int check_only)
+{
+	struct stat st;
+
+	if (!untracked)
+		return 0;
+
+	if (stat(path->len ? path->buf : ".", &st)) {
+		invalidate_directory(dir->untracked, untracked);
+		memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
+		return 0;
+	}
+	if (!untracked->valid ||
+	    match_stat_data(&untracked->stat_data, &st)) {
+		if (untracked->valid)
+			invalidate_directory(dir->untracked, untracked);
+		fill_stat_data(&untracked->stat_data, &st);
+		return 0;
+	}
+
+	if (untracked->check_only != !!check_only) {
+		invalidate_directory(dir->untracked, untracked);
+		return 0;
+	}
+
+	/*
+	 * prep_exclude will be called eventually on this directory,
+	 * but it's called much later in last_exclude_matching(). We
+	 * need it now to determine the validity of the cache for this
+	 * path. The next calls will be nearly no-op, the way
+	 * prep_exclude() is designed.
+	 */
+	if (path->len && path->buf[path->len - 1] != '/') {
+		strbuf_addch(path, '/');
+		prep_exclude(dir, path->buf, path->len);
+		strbuf_setlen(path, path->len - 1);
+	} else
+		prep_exclude(dir, path->buf, path->len);
+
+	/* hopefully prep_exclude() haven't invalidated this entry... */
+	return untracked->valid;
+}
+
 static int open_cached_dir(struct cached_dir *cdir,
 			   struct dir_struct *dir,
 			   struct untracked_cache_dir *untracked,
@@ -1456,7 +1553,11 @@ static int open_cached_dir(struct cached_dir *cdir,
 {
 	memset(cdir, 0, sizeof(*cdir));
 	cdir->untracked = untracked;
+	if (valid_cached_dir(dir, untracked, path, check_only))
+		return 0;
 	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (dir->untracked)
+		dir->untracked->dir_opened++;
 	if (!cdir->fdir)
 		return -1;
 	return 0;
@@ -1470,6 +1571,18 @@ int read_cached_dir(struct cached_dir *cdir)
 			return -1;
 		return 0;
 	}
+	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
+		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		cdir->ucd = d;
+		cdir->nr_dirs++;
+		return 0;
+	}
+	cdir->ucd = NULL;
+	if (cdir->nr_files < cdir->untracked->untracked_nr) {
+		struct untracked_cache_dir *d = cdir->untracked;
+		cdir->file = d->untracked[cdir->nr_files++];
+		return 0;
+	}
 	return -1;
 }
 
@@ -1477,6 +1590,12 @@ static void close_cached_dir(struct cached_dir *cdir)
 {
 	if (cdir->fdir)
 		closedir(cdir->fdir);
+	/*
+	 * We have gone through this directory and found no untracked
+	 * entries. Mark it valid.
+	 */
+	if (cdir->untracked && !cdir->untracked->valid)
+		cdir->untracked->valid = 1;
 }
 
 /*
@@ -1530,7 +1649,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		if (check_only) {
 			/* abort early if maximum state has been reached */
 			if (dir_state == path_untracked) {
-				if (untracked)
+				if (cdir.fdir)
 					add_untracked(untracked, path.buf + baselen);
 				break;
 			}
@@ -1554,7 +1673,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			if (dir->flags & DIR_SHOW_IGNORED)
 				break;
 			dir_add_name(dir, path.buf, path.len);
-			if (untracked)
+			if (cdir.fdir)
 				add_untracked(untracked, path.buf + baselen);
 			break;
 
diff --git a/dir.h b/dir.h
index 35701b2..1fefd4e 100644
--- a/dir.h
+++ b/dir.h
@@ -134,6 +134,8 @@ struct untracked_cache {
 	/* Statistics */
 	int dir_created;
 	int gitignore_invalidated;
+	int dir_invalidated;
+	int dir_opened;
 };
 
 struct dir_struct {
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 07/19] untracked cache: mark what dirs should be recursed/saved
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (5 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 08/19] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
                   ` (12 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

If we redo this thing in a functional style, we would have one struct
untracked_dir as input tree and another as output. The input is used
for verification. The output is a brand new tree, reflecting current
worktree.

But that means recreate a lot of dir nodes even if a lot could be
shared between input and output trees in good cases. So we go with the
messy but efficient way, combining both input and output trees into
one. We need a way to know which node in this combined tree belongs to
the output. This is the purpose of this "recurse" flag.

"valid" bit can't be used for this because it's about data of the node
except the subdirs. When we invalidate a directory, we want to keep
cached data of the subdirs intact even though we don't really know
what subdir still exists (yet). Then we check worktree to see what
actual subdir remains on disk. Those will have 'recurse' bit set
again. If cached data for those are still valid, we may be able to
avoid computing exclude files for them. Those subdirs that are deleted
will have 'recurse' remained clear and their 'valid' bits do not
matter.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 14 +++++++++++++-
 dir.h |  3 ++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index 55780a7..d9675c8 100644
--- a/dir.c
+++ b/dir.c
@@ -614,9 +614,12 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 static void invalidate_directory(struct untracked_cache *uc,
 				 struct untracked_cache_dir *dir)
 {
+	int i;
 	uc->dir_invalidated++;
 	dir->valid = 0;
 	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		dir->dirs[i]->recurse = 0;
 }
 
 /*
@@ -1573,6 +1576,10 @@ int read_cached_dir(struct cached_dir *cdir)
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
 		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		if (!d->recurse) {
+			cdir->nr_dirs++;
+			continue;
+		}
 		cdir->ucd = d;
 		cdir->nr_dirs++;
 		return 0;
@@ -1594,8 +1601,10 @@ static void close_cached_dir(struct cached_dir *cdir)
 	 * We have gone through this directory and found no untracked
 	 * entries. Mark it valid.
 	 */
-	if (cdir->untracked && !cdir->untracked->valid)
+	if (cdir->untracked) {
 		cdir->untracked->valid = 1;
+		cdir->untracked->recurse = 1;
+	}
 }
 
 /*
@@ -1838,6 +1847,9 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 		invalidate_gitignore(dir->untracked, root);
 		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
 	}
+
+	/* Make sure this directory is not dropped out at saving phase */
+	root->recurse = 1;
 	return root;
 }
 
diff --git a/dir.h b/dir.h
index 1fefd4e..c302dcb 100644
--- a/dir.h
+++ b/dir.h
@@ -113,8 +113,9 @@ struct untracked_cache_dir {
 	/* null SHA-1 means this directory does not have .gitignore */
 	unsigned char exclude_sha1[20];
 	struct stat_data stat_data;
+	unsigned int recurse : 1;
 	unsigned int check_only : 1;
-	/* all data in this struct are good */
+	/* all data except 'dirs' in this struct are good */
 	unsigned int valid : 1;
 	unsigned int untracked_nr : 29;
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 08/19] untracked cache: don't open non-existent .gitignore
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (6 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 07/19] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 09/19] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
                   ` (11 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This cuts down a signficant number of open(.gitignore) because most
directories usually don't have .gitignore files.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index d9675c8..d4f9830 100644
--- a/dir.c
+++ b/dir.c
@@ -1014,7 +1014,21 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 
 		/* Try to read per-directory file */
 		hashclr(ss.sha1);
-		if (dir->exclude_per_dir) {
+		if (dir->exclude_per_dir &&
+		    /*
+		     * If we know that no files have been added in
+		     * this directory (i.e. valid_cached_dir() has
+		     * been executed and set untracked->valid) ..
+		     */
+		    (!untracked || !untracked->valid ||
+		     /*
+		      * .. and .gitignore does not exist before
+		      * (i.e. null exclude_sha1 and skip_worktree is
+		      * not set). Then we can skip loading .gitignore,
+		      * which would result in ENOENT anyway.
+		      * skip_worktree is taken care in read_directory()
+		      */
+		     !is_null_sha1(untracked->exclude_sha1))) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
 			 * need fname to remain unchanged to ensure the src
@@ -1778,6 +1792,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 						      const struct pathspec *pathspec)
 {
 	struct untracked_cache_dir *root;
+	int i;
 
 	if (!dir->untracked)
 		return NULL;
@@ -1829,6 +1844,15 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	if (dir->exclude_list_group[EXC_CMDL].nr)
 		return NULL;
 
+	/*
+	 * An optimization in prep_exclude() does not play well with
+	 * CE_SKIP_WORKTREE. It's a rare case anyway, if a single
+	 * entry has that bit set, disable the whole untracked cache.
+	 */
+	for (i = 0; i < active_nr; i++)
+		if (ce_skip_worktree(active_cache[i]))
+			return NULL;
+
 	if (!dir->untracked->root) {
 		const int len = sizeof(*dir->untracked->root);
 		dir->untracked->root = xmalloc(len);
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 09/19] untracked cache: save to an index extension
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (7 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 08/19] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 10/19] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
                   ` (10 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      |  3 +++
 dir.c        | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |  1 +
 read-cache.c | 12 +++++++++
 4 files changed, 101 insertions(+)

diff --git a/cache.h b/cache.h
index 5b86065..4f903fa 100644
--- a/cache.h
+++ b/cache.h
@@ -298,6 +298,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
+struct untracked_cache;
+
 struct index_state {
 	struct cache_entry **cache;
 	unsigned int version;
@@ -311,6 +313,7 @@ struct index_state {
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
+	struct untracked_cache *untracked;
 };
 
 extern struct index_state the_index;
diff --git a/dir.c b/dir.c
index d4f9830..d696388 100644
--- a/dir.c
+++ b/dir.c
@@ -12,6 +12,7 @@
 #include "refs.h"
 #include "wildmatch.h"
 #include "pathspec.h"
+#include "varint.h"
 
 struct path_simplify {
 	int len;
@@ -2145,3 +2146,87 @@ void clear_directory(struct dir_struct *dir)
 	}
 	strbuf_release(&dir->basebuf);
 }
+
+struct ondisk_untracked_cache {
+	struct stat_data info_exclude_stat;
+	struct stat_data excludes_file_stat;
+	uint32_t dir_flags;
+	unsigned char info_exclude_sha1[20];
+	unsigned char excludes_file_sha1[20];
+	char exclude_per_dir[1];
+};
+
+static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
+	to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
+	to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
+	to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
+	to->sd_dev	  = htonl(from->sd_dev);
+	to->sd_ino	  = htonl(from->sd_ino);
+	to->sd_uid	  = htonl(from->sd_uid);
+	to->sd_gid	  = htonl(from->sd_gid);
+	to->sd_size	  = htonl(from->sd_size);
+}
+
+static void write_one_dir(struct strbuf *out, struct untracked_cache_dir *untracked)
+{
+	struct stat_data stat_data;
+	unsigned char intbuf[16];
+	unsigned int intlen, value;
+	int i;
+
+	stat_data_to_disk(&stat_data, &untracked->stat_data);
+	strbuf_add(out, &stat_data, sizeof(stat_data));
+	strbuf_add(out, untracked->exclude_sha1, 20);
+
+	/*
+	 * untracked_nr should be reset whenever valid is clear, but
+	 * for safety..
+	 */
+	if (!untracked->valid) {
+		untracked->untracked_nr = 0;
+		untracked->check_only = 0;
+	}
+
+	value  = untracked->valid;
+	value |= untracked->check_only   << 1;
+	value |= untracked->untracked_nr << 2;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	/* skip non-recurse directories */
+	for (i = 0, value = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			value++;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
+
+	for (i = 0; i < untracked->untracked_nr; i++)
+		strbuf_add(out, untracked->untracked[i],
+			   strlen(untracked->untracked[i]) + 1);
+
+	for (i = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			write_one_dir(out, untracked->dirs[i]);
+}
+
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
+{
+	struct ondisk_untracked_cache *ouc;
+	int len = 0;
+	if (untracked->exclude_per_dir)
+		len = strlen(untracked->exclude_per_dir);
+	ouc = xmalloc(sizeof(*ouc) + len);
+	stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
+	stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
+	hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
+	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
+	ouc->dir_flags = htonl(untracked->dir_flags);
+	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+	strbuf_add(out, ouc, sizeof(*ouc) + len);
+	if (untracked->root)
+		write_one_dir(out, untracked->root);
+}
diff --git a/dir.h b/dir.h
index c302dcb..7ef0f63 100644
--- a/dir.h
+++ b/dir.h
@@ -297,4 +297,5 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 8f3e9eb..efff4e2 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -38,6 +38,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 #define CACHE_EXT_TREE 0x54524545	/* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
+#define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -2035,6 +2036,17 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (istate->untracked) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_untracked_extension(&sb, istate->untracked);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED,
+					     sb.len) < 0 ||
+			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 10/19] untracked cache: load from UNTR index extension
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (8 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 09/19] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 11/19] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
                   ` (9 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c        | 147 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h        |   3 ++
 read-cache.c |   5 ++
 3 files changed, 151 insertions(+), 4 deletions(-)

diff --git a/dir.c b/dir.c
index d696388..c97b0c3 100644
--- a/dir.c
+++ b/dir.c
@@ -2085,15 +2085,27 @@ void setup_standard_excludes(struct dir_struct *dir)
 	}
 	if (!access_or_warn(path, R_OK, 0)) {
 		struct sha1_stat *ss = NULL;
-		if (dir->untracked)
+		int ss_valid = 0;
+		if (dir->untracked) {
 			ss = &dir->ss_info_exclude;
-		add_excludes_from_file_1(dir, path, ss, 0);
+			if (dir->untracked->loaded) {
+				*ss = dir->untracked->ss_info_exclude;
+				ss_valid = 1;
+			}
+		}
+		add_excludes_from_file_1(dir, path, ss, ss_valid);
 	}
 	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0)) {
 		struct sha1_stat *ss = NULL;
-		if (dir->untracked)
+		int ss_valid = 0;
+		if (dir->untracked) {
 			ss = &dir->ss_excludes_file;
-		add_excludes_from_file_1(dir, excludes_file, ss, 0);
+			if (dir->untracked->loaded) {
+				*ss = dir->untracked->ss_excludes_file;
+				ss_valid = 1;
+			}
+		}
+		add_excludes_from_file_1(dir, excludes_file, ss, ss_valid);
 	}
 }
 
@@ -2230,3 +2242,130 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	if (untracked->root)
 		write_one_dir(out, untracked->root);
 }
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+	int i;
+	if (!ucd)
+		return;
+	for (i = 0; i < ucd->dirs_nr; i++)
+		free_untracked(ucd->dirs[i]);
+	for (i = 0; i < ucd->untracked_nr; i++)
+		free(ucd->untracked[i]);
+	free(ucd->untracked);
+	free(ucd->dirs);
+	free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+	if (uc)
+		free_untracked(uc->root);
+	free(uc);
+}
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+	to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+	to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+	to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+	to->sd_dev	  = get_be32(&from->sd_dev);
+	to->sd_ino	  = get_be32(&from->sd_ino);
+	to->sd_uid	  = get_be32(&from->sd_uid);
+	to->sd_gid	  = get_be32(&from->sd_gid);
+	to->sd_size	  = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+			const unsigned char *data_, unsigned long sz)
+{
+#define NEXT(x) \
+	next = data + (x); \
+	if (next > data_ + sz) \
+		return -1;
+
+	struct untracked_cache_dir ud, *untracked;
+	const unsigned char *next, *data = data_;
+	unsigned int value;
+	int i, len;
+
+	memset(&ud, 0, sizeof(ud));
+
+	NEXT(sizeof(struct stat_data));
+	stat_data_from_disk(&ud.stat_data, (struct stat_data *)data);
+	data = next;
+
+	NEXT(20);
+	hashcpy(ud.exclude_sha1, data);
+	data = next;
+
+	next = data;
+	value = decode_varint(&next);
+	if (next > data_ + sz)
+		return -1;
+	ud.recurse = 1;
+	ud.valid = value & 1;
+	ud.check_only = (value >> 1) & 1;
+	ud.untracked_alloc = ud.untracked_nr = value >> 2;
+	if (ud.untracked_nr)
+		ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+	data = next;
+
+	next = data;
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+	if (next > data_ + sz)
+		return -1;
+	ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+	data = next;
+
+	len = strlen((const char *)data);
+	NEXT(len + 1);
+	*untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+	memcpy(untracked, &ud, sizeof(ud));
+	memcpy(untracked->name, data, len + 1);
+	data = next;
+
+	for (i = 0; i < untracked->untracked_nr; i++) {
+		len = strlen((const char *)data);
+		NEXT(len + 1);
+		untracked->untracked[i] = xstrdup((const char*)data);
+		data = next;
+	}
+
+	for (i = 0; i < untracked->dirs_nr; i++) {
+		len = read_one_dir(untracked->dirs + i, data, sz - (data - data_));
+		if (len < 0)
+			return -1;
+		data += len;
+	}
+	return data - data_;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+	const struct ondisk_untracked_cache *ouc = data;
+	struct untracked_cache *uc;
+	int len;
+
+	if (sz < sizeof(*ouc))
+		return NULL;
+
+	uc = xcalloc(1, sizeof(*uc));
+	stat_data_from_disk(&uc->ss_info_exclude.stat, &ouc->info_exclude_stat);
+	stat_data_from_disk(&uc->ss_excludes_file.stat, &ouc->excludes_file_stat);
+	hashcpy(uc->ss_info_exclude.sha1, ouc->info_exclude_sha1);
+	hashcpy(uc->ss_excludes_file.sha1, ouc->excludes_file_sha1);
+	uc->dir_flags = get_be32(&ouc->dir_flags);
+	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+	uc->loaded = 1;
+	len = sizeof(*ouc) + strlen(ouc->exclude_per_dir);
+	if (sz == len)
+		return uc;
+	if (sz > len &&
+	    read_one_dir(&uc->root, (const unsigned char *)data + len,
+			 sz - len) == sz - len)
+		return uc;
+	free_untracked_cache(uc);
+	return NULL;
+}
diff --git a/dir.h b/dir.h
index 7ef0f63..014f3ed 100644
--- a/dir.h
+++ b/dir.h
@@ -137,6 +137,7 @@ struct untracked_cache {
 	int gitignore_invalidated;
 	int dir_invalidated;
 	int dir_opened;
+	int loaded;
 };
 
 struct dir_struct {
@@ -297,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void free_untracked_cache(struct untracked_cache *);
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index efff4e2..60baeaf 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1359,6 +1359,9 @@ static int read_index_extension(struct index_state *istate,
 		if (read_link_extension(istate, data, sz))
 			return -1;
 		break;
+	case CACHE_EXT_UNTRACKED:
+		istate->untracked = read_untracked_extension(data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1650,6 +1653,8 @@ int discard_index(struct index_state *istate)
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
 	discard_split_index(istate);
+	free_untracked_cache(istate->untracked);
+	istate->untracked = NULL;
 	return 0;
 }
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 11/19] untracked cache: invalidate at index addition or removal
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (9 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 10/19] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 12/19] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
                   ` (8 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Ideally we should implement untracked_cache_remove_from_index() and
untracked_cache_add_to_index() so that they update untracked cache
right away instead of invalidating it and wait for read_directory()
next time to deal with it. But that may need some more work in
unpack-trees.c. So stay simple as the first step.

The new call in add_index_entry_with_check() may look strange because
new calls usually stay close to cache_tree_invalidate_path(). We do it
a bit later than c_t_i_p() in this function because if it's about
replacing the entry with the same name, we don't care (but cache-tree
does).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c          | 31 +++++++++++++++++++++++++++++++
 dir.h          |  4 ++++
 read-cache.c   |  4 ++++
 unpack-trees.c |  7 +++++--
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index c97b0c3..ddc549c 100644
--- a/dir.c
+++ b/dir.c
@@ -2369,3 +2369,34 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	free_untracked_cache(uc);
 	return NULL;
 }
+
+void untracked_cache_invalidate_path(struct index_state *istate,
+				     const char *path)
+{
+	const char *sep;
+	struct untracked_cache_dir *d;
+	if (!istate->untracked || !istate->untracked->root)
+		return;
+	sep = strrchr(path, '/');
+	if (sep)
+		d = lookup_untracked(istate->untracked,
+				     istate->untracked->root,
+				     path, sep - path);
+	else
+		d = istate->untracked->root;
+	istate->untracked->dir_invalidated++;
+	d->valid = 0;
+	d->untracked_nr = 0;
+}
+
+void untracked_cache_remove_from_index(struct index_state *istate,
+				       const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
+
+void untracked_cache_add_to_index(struct index_state *istate,
+				  const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
diff --git a/dir.h b/dir.h
index 014f3ed..8c29324 100644
--- a/dir.h
+++ b/dir.h
@@ -298,6 +298,10 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void untracked_cache_invalidate_path(struct index_state *, const char *);
+void untracked_cache_remove_from_index(struct index_state *, const char *);
+void untracked_cache_add_to_index(struct index_state *, const char *);
+
 void free_untracked_cache(struct untracked_cache *);
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
diff --git a/read-cache.c b/read-cache.c
index 60baeaf..feb10b0 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -79,6 +79,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate, old->name);
+	untracked_cache_remove_from_index(istate, old->name);
 	remove_index_entry_at(istate, nr);
 	add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 }
@@ -538,6 +539,7 @@ int remove_file_from_index(struct index_state *istate, const char *path)
 	if (pos < 0)
 		pos = -pos-1;
 	cache_tree_invalidate_path(istate, path);
+	untracked_cache_remove_from_index(istate, path);
 	while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
 		remove_index_entry_at(istate, pos);
 	return 0;
@@ -969,6 +971,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 	}
 	pos = -pos-1;
 
+	untracked_cache_add_to_index(istate, ce->name);
+
 	/*
 	 * Inserting a merged entry ("stage 0") into the index
 	 * will always replace all non-merged entries..
diff --git a/unpack-trees.c b/unpack-trees.c
index 629c658..e5ddb0c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -9,6 +9,7 @@
 #include "refs.h"
 #include "attr.h"
 #include "split-index.h"
+#include "dir.h"
 
 /*
  * Error messages expected by scripts out of plumbing commands such as
@@ -1255,8 +1256,10 @@ static int verify_uptodate_sparse(const struct cache_entry *ce,
 static void invalidate_ce_path(const struct cache_entry *ce,
 			       struct unpack_trees_options *o)
 {
-	if (ce)
-		cache_tree_invalidate_path(o->src_index, ce->name);
+	if (!ce)
+		return;
+	cache_tree_invalidate_path(o->src_index, ce->name);
+	untracked_cache_invalidate_path(o->src_index, ce->name);
 }
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 12/19] read-cache.c: split racy stat test to a separate function
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (10 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 11/19] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 13/19] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
                   ` (7 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 read-cache.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index feb10b0..a14646b 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -271,20 +271,26 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
-static int is_racy_timestamp(const struct index_state *istate,
-			     const struct cache_entry *ce)
+static int is_racy_stat(const struct index_state *istate,
+			const struct stat_data *sd)
 {
-	return (!S_ISGITLINK(ce->ce_mode) &&
-		istate->timestamp.sec &&
+	return (istate->timestamp.sec &&
 #ifdef USE_NSEC
 		 /* nanosecond timestamped files can also be racy! */
-		(istate->timestamp.sec < ce->ce_stat_data.sd_mtime.sec ||
-		 (istate->timestamp.sec == ce->ce_stat_data.sd_mtime.sec &&
-		  istate->timestamp.nsec <= ce->ce_stat_data.sd_mtime.nsec))
+		(istate->timestamp.sec < sd->sd_mtime.sec ||
+		 (istate->timestamp.sec == sd->sd_mtime.sec &&
+		  istate->timestamp.nsec <= sd->sd_mtime.nsec))
 #else
-		istate->timestamp.sec <= ce->ce_stat_data.sd_mtime.sec
+		istate->timestamp.sec <= sd->sd_mtime.sec
 #endif
-		 );
+		);
+}
+
+static int is_racy_timestamp(const struct index_state *istate,
+			     const struct cache_entry *ce)
+{
+	return (!S_ISGITLINK(ce->ce_mode) &&
+		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
 int ie_match_stat(const struct index_state *istate,
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 13/19] untracked cache: avoid racy timestamps
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (11 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 12/19] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 14/19] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
                   ` (6 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

When a directory is updated within the same second that its timestamp
is last saved, we cannot realize the directory has been updated by
checking timestamps. Assume the worst (something is update). See
29e4d36 (Racy GIT - 2005-12-20) for more information.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      | 2 ++
 dir.c        | 5 +++--
 read-cache.c | 8 ++++++++
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 4f903fa..2b93217 100644
--- a/cache.h
+++ b/cache.h
@@ -562,6 +562,8 @@ extern void fill_stat_data(struct stat_data *sd, struct stat *st);
  * INODE_CHANGED, and DATA_CHANGED.
  */
 extern int match_stat_data(const struct stat_data *sd, struct stat *st);
+extern int match_stat_data_racy(const struct index_state *istate,
+				const struct stat_data *sd, struct stat *st);
 
 extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 
diff --git a/dir.c b/dir.c
index ddc549c..3eb1a12 100644
--- a/dir.c
+++ b/dir.c
@@ -678,7 +678,8 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 		close(fd);
 		if (ss) {
 			int pos;
-			if (ss_valid && !match_stat_data(&ss->stat, &st))
+			if (ss_valid &&
+			    !match_stat_data_racy(&the_index, &ss->stat, &st))
 				; /* no content change, ss->sha1 still good */
 			else if (check_index &&
 				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
@@ -1533,7 +1534,7 @@ static int valid_cached_dir(struct dir_struct *dir,
 		return 0;
 	}
 	if (!untracked->valid ||
-	    match_stat_data(&untracked->stat_data, &st)) {
+	    match_stat_data_racy(&the_index, &untracked->stat_data, &st)) {
 		if (untracked->valid)
 			invalidate_directory(dir->untracked, untracked);
 		fill_stat_data(&untracked->stat_data, &st);
diff --git a/read-cache.c b/read-cache.c
index a14646b..177cbae 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -293,6 +293,14 @@ static int is_racy_timestamp(const struct index_state *istate,
 		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
+int match_stat_data_racy(const struct index_state *istate,
+			 const struct stat_data *sd, struct stat *st)
+{
+	if (is_racy_stat(istate, sd))
+		return MTIME_CHANGED;
+	return match_stat_data(sd, st);
+}
+
 int ie_match_stat(const struct index_state *istate,
 		  const struct cache_entry *ce, struct stat *st,
 		  unsigned int options)
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 14/19] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (12 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 13/19] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 15/19] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
                   ` (5 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This could be used to verify correct behavior in tests

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/dir.c b/dir.c
index 3eb1a12..57b49f7 100644
--- a/dir.c
+++ b/dir.c
@@ -1918,6 +1918,18 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
+	if (dir->untracked) {
+		static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
+		trace_printf_key(&trace_untracked_stats,
+				 "node creation: %u\n"
+				 "gitignore invalidation: %u\n"
+				 "directory invalidation: %u\n"
+				 "opendir: %u\n",
+				 dir->untracked->dir_created,
+				 dir->untracked->gitignore_invalidated,
+				 dir->untracked->dir_invalidated,
+				 dir->untracked->dir_opened);
+	}
 	return dir->nr;
 }
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 15/19] untracked cache: mark index dirty if untracked cache is updated
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (13 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 14/19] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 16/19] status: enable untracked cache Nguyễn Thái Ngọc Duy
                   ` (4 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      | 1 +
 dir.c        | 9 +++++++++
 read-cache.c | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index 2b93217..dcbdc3c 100644
--- a/cache.h
+++ b/cache.h
@@ -296,6 +296,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define RESOLVE_UNDO_CHANGED	(1 << 4)
 #define CACHE_TREE_CHANGED	(1 << 5)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
+#define UNTRACKED_CHANGED       (1 << 7)
 
 struct split_index;
 struct untracked_cache;
diff --git a/dir.c b/dir.c
index 57b49f7..d373d9a 100644
--- a/dir.c
+++ b/dir.c
@@ -1929,6 +1929,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 				 dir->untracked->gitignore_invalidated,
 				 dir->untracked->dir_invalidated,
 				 dir->untracked->dir_opened);
+		if (dir->untracked == the_index.untracked &&
+		    (dir->untracked->dir_opened ||
+		     dir->untracked->gitignore_invalidated ||
+		     dir->untracked->dir_invalidated))
+			the_index.cache_changed |= UNTRACKED_CHANGED;
+		if (dir->untracked != the_index.untracked) {
+			free(dir->untracked);
+			dir->untracked = NULL;
+		}
 	}
 	return dir->nr;
 }
diff --git a/read-cache.c b/read-cache.c
index 177cbae..779c080 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -43,7 +43,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-		 SPLIT_INDEX_ORDERED)
+		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 16/19] status: enable untracked cache
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (14 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 15/19] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 17/19] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
                   ` (3 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

update_index_if_able() is moved down so that the updated untracked
cache could be written out.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/commit.c | 5 +++--
 wt-status.c      | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 81dc622..08e2964 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1408,13 +1408,14 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
 
 	fd = hold_locked_index(&index_lock, 0);
-	if (0 <= fd)
-		update_index_if_able(&the_index, &index_lock);
 
 	s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0;
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	wt_status_collect(&s);
 
+	if (0 <= fd)
+		update_index_if_able(&the_index, &index_lock);
+
 	if (s.relative_paths)
 		s.prefix = prefix;
 
diff --git a/wt-status.c b/wt-status.c
index 1bf5d72..42aeebe 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -585,6 +585,8 @@ static void wt_status_collect_untracked(struct wt_status *s)
 			DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
 	if (s->show_ignored_files)
 		dir.flags |= DIR_SHOW_IGNORED_TOO;
+	else
+		dir.untracked = the_index.untracked;
 	setup_standard_excludes(&dir);
 
 	fill_directory(&dir, &s->pathspec);
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 17/19] update-index: manually enable or disable untracked cache
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (15 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 16/19] status: enable untracked cache Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Overall time saving on "git status" is about 40% in the best case
scenario, removing ..collect_untracked() as the most time consuming
function. read and refresh index operations are now at the top (which
should drop when index-helper and/or watchman support is added). More
numbers and analysis below.

webkit.git
==========

169k files. 6k dirs. Lots of test data (i.e. not touched most of the
time)

Base status
-----------

Index version 4 in split index mode and cache-tree populated. No
untracked cache. It shows how time is consumed by "git status". The
same settings are used for other repos below.

18:28:10.199679 builtin/commit.c:1394   performance: 0.000000451 s: cmd_status:setup
18:28:10.474847 read-cache.c:1407       performance: 0.274873831 s: read_index
18:28:10.475295 read-cache.c:1407       performance: 0.000000656 s: read_index
18:28:10.728443 preload-index.c:131     performance: 0.253147487 s: read_index_preload
18:28:10.741422 read-cache.c:1254       performance: 0.012868340 s: refresh_index
18:28:10.752300 wt-status.c:623         performance: 0.010421357 s: wt_status_collect_changes_worktree
18:28:10.762069 wt-status.c:629         performance: 0.009644748 s: wt_status_collect_changes_index
18:28:11.601019 wt-status.c:632         performance: 0.838859547 s: wt_status_collect_untracked
18:28:11.605939 builtin/commit.c:1421   performance: 0.004835004 s: cmd_status:update_index
18:28:11.606580 trace.c:415             performance: 1.407878388 s: git command: 'git' 'status'

Populating status
-----------------

This is after enabling untracked cache and the cache is still empty.
We see a slight increase in .._collect_untracked() and update_index
(because new cache has to be written to $GIT_DIR/index).

18:28:18.915213 builtin/commit.c:1394   performance: 0.000000326 s: cmd_status:setup
18:28:19.197364 read-cache.c:1407       performance: 0.281901416 s: read_index
18:28:19.197754 read-cache.c:1407       performance: 0.000000546 s: read_index
18:28:19.451355 preload-index.c:131     performance: 0.253599607 s: read_index_preload
18:28:19.464400 read-cache.c:1254       performance: 0.012935336 s: refresh_index
18:28:19.475115 wt-status.c:623         performance: 0.010236920 s: wt_status_collect_changes_worktree
18:28:19.486022 wt-status.c:629         performance: 0.010801685 s: wt_status_collect_changes_index
18:28:20.362660 wt-status.c:632         performance: 0.876551366 s: wt_status_collect_untracked
18:28:20.396199 builtin/commit.c:1421   performance: 0.033447969 s: cmd_status:update_index
18:28:20.396939 trace.c:415             performance: 1.482695902 s: git command: 'git' 'status'

Populated status
----------------

After the cache is populated, wt_status_collect_untracked() drops 82%
from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now
read_index() and read_index_preload().

18:28:20.408605 builtin/commit.c:1394   performance: 0.000000457 s: cmd_status:setup
18:28:20.692864 read-cache.c:1407       performance: 0.283980458 s: read_index
18:28:20.693273 read-cache.c:1407       performance: 0.000000661 s: read_index
18:28:20.958814 preload-index.c:131     performance: 0.265540254 s: read_index_preload
18:28:20.972375 read-cache.c:1254       performance: 0.013437429 s: refresh_index
18:28:20.983959 wt-status.c:623         performance: 0.011146646 s: wt_status_collect_changes_worktree
18:28:20.993948 wt-status.c:629         performance: 0.009879094 s: wt_status_collect_changes_index
18:28:21.138125 wt-status.c:632         performance: 0.144084737 s: wt_status_collect_untracked
18:28:21.173678 builtin/commit.c:1421   performance: 0.035463949 s: cmd_status:update_index
18:28:21.174251 trace.c:415             performance: 0.766707355 s: git command: 'git' 'status'

gentoo-x86.git
==============

This repository is a strange one with a balanced, wide and shallow
worktree (about 100k files and 23k dirs) and no .gitignore in
worktree. .._collect_untracked() time drops 88%, total time drops 56%.

Base status
-----------
18:20:40.828642 builtin/commit.c:1394   performance: 0.000000496 s: cmd_status:setup
18:20:41.027233 read-cache.c:1407       performance: 0.198130532 s: read_index
18:20:41.027670 read-cache.c:1407       performance: 0.000000581 s: read_index
18:20:41.171716 preload-index.c:131     performance: 0.144045594 s: read_index_preload
18:20:41.179171 read-cache.c:1254       performance: 0.007320424 s: refresh_index
18:20:41.185785 wt-status.c:623         performance: 0.006144638 s: wt_status_collect_changes_worktree
18:20:41.192701 wt-status.c:629         performance: 0.006780184 s: wt_status_collect_changes_index
18:20:41.991723 wt-status.c:632         performance: 0.798927029 s: wt_status_collect_untracked
18:20:41.994664 builtin/commit.c:1421   performance: 0.002852772 s: cmd_status:update_index
18:20:41.995458 trace.c:415             performance: 1.168427502 s: git command: 'git' 'status'
Populating status
-----------------
18:20:48.968848 builtin/commit.c:1394   performance: 0.000000380 s: cmd_status:setup
18:20:49.172918 read-cache.c:1407       performance: 0.203734214 s: read_index
18:20:49.173341 read-cache.c:1407       performance: 0.000000562 s: read_index
18:20:49.320013 preload-index.c:131     performance: 0.146671391 s: read_index_preload
18:20:49.328039 read-cache.c:1254       performance: 0.007921957 s: refresh_index
18:20:49.334680 wt-status.c:623         performance: 0.006172020 s: wt_status_collect_changes_worktree
18:20:49.342526 wt-status.c:629         performance: 0.007731746 s: wt_status_collect_changes_index
18:20:50.257510 wt-status.c:632         performance: 0.914864222 s: wt_status_collect_untracked
18:20:50.338371 builtin/commit.c:1421   performance: 0.080776477 s: cmd_status:update_index
18:20:50.338900 trace.c:415             performance: 1.371462446 s: git command: 'git' 'status'
Populated status
----------------
18:20:50.351160 builtin/commit.c:1394   performance: 0.000000571 s: cmd_status:setup
18:20:50.577358 read-cache.c:1407       performance: 0.225917338 s: read_index
18:20:50.577794 read-cache.c:1407       performance: 0.000000617 s: read_index
18:20:50.734140 preload-index.c:131     performance: 0.156345564 s: read_index_preload
18:20:50.745717 read-cache.c:1254       performance: 0.011463075 s: refresh_index
18:20:50.755176 wt-status.c:623         performance: 0.008877929 s: wt_status_collect_changes_worktree
18:20:50.763768 wt-status.c:629         performance: 0.008471633 s: wt_status_collect_changes_index
18:20:50.854885 wt-status.c:632         performance: 0.090988721 s: wt_status_collect_untracked
18:20:50.857765 builtin/commit.c:1421   performance: 0.002789097 s: cmd_status:update_index
18:20:50.858411 trace.c:415             performance: 0.508647673 s: git command: 'git' 'status'

linux-2.6
=========

Reference repo. Not too big. .._collect_status() drops 84%. Total time
drops 42%.

Base status
-----------
18:34:09.870122 builtin/commit.c:1394   performance: 0.000000385 s: cmd_status:setup
18:34:09.943218 read-cache.c:1407       performance: 0.072871177 s: read_index
18:34:09.943614 read-cache.c:1407       performance: 0.000000491 s: read_index
18:34:10.004364 preload-index.c:131     performance: 0.060748102 s: read_index_preload
18:34:10.008190 read-cache.c:1254       performance: 0.003714285 s: refresh_index
18:34:10.012087 wt-status.c:623         performance: 0.002775446 s: wt_status_collect_changes_worktree
18:34:10.016054 wt-status.c:629         performance: 0.003862140 s: wt_status_collect_changes_index
18:34:10.214747 wt-status.c:632         performance: 0.198604837 s: wt_status_collect_untracked
18:34:10.216102 builtin/commit.c:1421   performance: 0.001244166 s: cmd_status:update_index
18:34:10.216817 trace.c:415             performance: 0.347670735 s: git command: 'git' 'status'
Populating status
-----------------
18:34:16.595102 builtin/commit.c:1394   performance: 0.000000456 s: cmd_status:setup
18:34:16.666600 read-cache.c:1407       performance: 0.070992413 s: read_index
18:34:16.667012 read-cache.c:1407       performance: 0.000000606 s: read_index
18:34:16.729375 preload-index.c:131     performance: 0.062362492 s: read_index_preload
18:34:16.732565 read-cache.c:1254       performance: 0.003075517 s: refresh_index
18:34:16.736148 wt-status.c:623         performance: 0.002422201 s: wt_status_collect_changes_worktree
18:34:16.739990 wt-status.c:629         performance: 0.003746618 s: wt_status_collect_changes_index
18:34:16.948505 wt-status.c:632         performance: 0.208426710 s: wt_status_collect_untracked
18:34:16.961744 builtin/commit.c:1421   performance: 0.013151887 s: cmd_status:update_index
18:34:16.962233 trace.c:415             performance: 0.368537535 s: git command: 'git' 'status'
Populated status
----------------
18:34:16.970026 builtin/commit.c:1394   performance: 0.000000631 s: cmd_status:setup
18:34:17.046235 read-cache.c:1407       performance: 0.075904673 s: read_index
18:34:17.046644 read-cache.c:1407       performance: 0.000000681 s: read_index
18:34:17.113564 preload-index.c:131     performance: 0.066920253 s: read_index_preload
18:34:17.117281 read-cache.c:1254       performance: 0.003604055 s: refresh_index
18:34:17.121115 wt-status.c:623         performance: 0.002508345 s: wt_status_collect_changes_worktree
18:34:17.125089 wt-status.c:629         performance: 0.003871636 s: wt_status_collect_changes_index
18:34:17.156089 wt-status.c:632         performance: 0.030895703 s: wt_status_collect_untracked
18:34:17.169861 builtin/commit.c:1421   performance: 0.013686404 s: cmd_status:update_index
18:34:17.170391 trace.c:415             performance: 0.201474531 s: git command: 'git' 'status'

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt |  8 ++++++++
 builtin/update-index.c             | 16 ++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index 82eca6f..16f2686 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -172,6 +172,14 @@ may not support it yet.
 	the shared index file. This mode is designed for very large
 	indexes that take a signficant amount of time to read or write.
 
+--untracked-cache::
+--no-untracked-cache::
+	Enable or disable untracked cache extension. This could speed
+	up for commands that involve determining untracked files such
+	as `git status`. The underlying operating system and file
+	system must change `st_mtime` field of a directory if files
+	are added or deleted in that directory.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index b0e3dc9..e57e2d7 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -741,6 +741,7 @@ static int reupdate_callback(struct parse_opt_ctx_t *ctx,
 int cmd_update_index(int argc, const char **argv, const char *prefix)
 {
 	int newfd, entries, has_errors = 0, line_termination = '\n';
+	int untracked_cache = -1;
 	int read_from_stdin = 0;
 	int prefix_length = prefix ? strlen(prefix) : 0;
 	int preferred_index_format = 0;
@@ -832,6 +833,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("write index in this format")),
 		OPT_BOOL(0, "split-index", &split_index,
 			N_("enable or disable split index")),
+		OPT_BOOL(0, "untracked-cache", &untracked_cache,
+			N_("enable/disable untracked cache")),
 		OPT_END()
 	};
 
@@ -938,6 +941,19 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		the_index.split_index = NULL;
 		the_index.cache_changed |= SOMETHING_CHANGED;
 	}
+	if (untracked_cache > 0 && !the_index.untracked) {
+		struct untracked_cache *uc;
+
+		uc = xcalloc(1, sizeof(*uc));
+		uc->exclude_per_dir = ".gitignore";
+		/* should be the same flags used by git-status */
+		uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
+		the_index.untracked = uc;
+		the_index.cache_changed |= SOMETHING_CHANGED;
+	} else if (!untracked_cache && the_index.untracked) {
+		the_index.untracked = NULL;
+		the_index.cache_changed |= SOMETHING_CHANGED;
+	}
 
 	if (active_cache_changed) {
 		if (newfd < 0) {
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 18/19] update-index: test the system before enabling untracked cache
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (16 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 17/19] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-10-28 17:37   ` Torsten Bögershausen
  2014-10-28 23:25   ` Eric Sunshine
  2014-10-27 12:10 ` [PATCH 19/19] t7063: tests for " Nguyễn Thái Ngọc Duy
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
  19 siblings, 2 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt |   6 ++
 builtin/update-index.c             | 146 +++++++++++++++++++++++++++++++++++++
 2 files changed, 152 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index 16f2686..fab1fea 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -180,6 +180,12 @@ may not support it yet.
 	system must change `st_mtime` field of a directory if files
 	are added or deleted in that directory.
 
+--force-untracked-cache::
+	For safety, `--untracked-cache` performs tests on the working
+	directory to make sure untracked cache can be used. These
+	tests can take a few seconds. `--force-untracked-cache` can be
+	used to skip the tests.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index e57e2d7..471c0b4 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -48,6 +48,145 @@ static void report(const char *fmt, ...)
 	va_end(vp);
 }
 
+static void remove_test_directory(void)
+{
+	struct strbuf sb = STRBUF_INIT;
+	strbuf_addstr(&sb, "dir-mtime-test");
+	remove_dir_recursively(&sb, 0);
+	strbuf_release(&sb);
+}
+
+static void xmkdir(const char *path)
+{
+	if (mkdir(path, 0700))
+		die_errno(_("failed to create directory %s"), path);
+}
+
+static int xstat(const char *path, struct stat *st)
+{
+	if (stat(path, st))
+		die_errno(_("failed to stat %s"), path);
+	return 0;
+}
+
+static int create_file(const char *path)
+{
+	int fd = open(path, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		die_errno(_("failed to create file %s"), path);
+	return fd;
+}
+
+static void xunlink(const char *path)
+{
+	if (unlink(path))
+		die_errno(_("failed to delete file %s"), path);
+}
+
+static void xrmdir(const char *path)
+{
+	if (rmdir(path))
+		die_errno(_("failed to delete directory %s"), path);
+}
+
+static void avoid_racy(void)
+{
+	/*
+	 * not use if we could usleep(10) if USE_NSEC is defined. The
+	 * field nsec could be there, but the OS could choose to
+	 * ignore it?
+	 */
+	sleep(1);
+}
+
+static int test_if_untracked_cache_is_supported(void)
+{
+	struct stat st;
+	struct stat_data base;
+	int fd;
+
+	fprintf(stderr, _("Testing "));
+	xmkdir("dir-mtime-test");
+	atexit(remove_test_directory);
+	xstat("dir-mtime-test", &st);
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	fd = create_file("dir-mtime-test/newfile");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr,_("directory stat info does not "
+				    "change after adding a new file"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xmkdir("dir-mtime-test/new-dir");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not change "
+				     "after adding a new directory"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	write_or_die(fd, "data", 4);
+	close(fd);
+	xstat("dir-mtime-test", &st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes "
+				     "after updating a file"));
+		return 0;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	close(create_file("dir-mtime-test/new-dir/new"));
+	xstat("dir-mtime-test", &st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes after "
+				     "adding a file inside subdirectory"));
+		return 0;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("dir-mtime-test/newfile");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a file"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("dir-mtime-test/new-dir/new");
+	xrmdir("dir-mtime-test/new-dir");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a directory"));
+		return 0;
+	}
+
+	xrmdir("dir-mtime-test");
+	fprintf_ln(stderr, _(" OK"));
+	return 1;
+}
+
 static int mark_ce_flags(const char *path, int flag, int mark)
 {
 	int namelen = strlen(path);
@@ -835,6 +974,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("enable or disable split index")),
 		OPT_BOOL(0, "untracked-cache", &untracked_cache,
 			N_("enable/disable untracked cache")),
+		OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
+			    N_("enable untracked cache without testing the filesystem"), 2),
 		OPT_END()
 	};
 
@@ -944,6 +1085,11 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	if (untracked_cache > 0 && !the_index.untracked) {
 		struct untracked_cache *uc;
 
+		if (untracked_cache < 2) {
+			setup_work_tree();
+			if (!test_if_untracked_cache_is_supported())
+				return 1;
+		}
 		uc = xcalloc(1, sizeof(*uc));
 		uc->exclude_per_dir = ".gitignore";
 		/* should be the same flags used by git-status */
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH 19/19] t7063: tests for untracked cache
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (17 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
@ 2014-10-27 12:10 ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
  19 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-10-27 12:10 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 .gitignore                                 |   1 +
 Makefile                                   |   1 +
 t/t7063-status-untracked-cache.sh (new +x) | 353 +++++++++++++++++++++++++++++
 test-dump-untracked-cache.c (new)          |  61 +++++
 4 files changed, 416 insertions(+)
 create mode 100755 t/t7063-status-untracked-cache.sh
 create mode 100644 test-dump-untracked-cache.c

diff --git a/.gitignore b/.gitignore
index 9ec40fa..fde1143 100644
--- a/.gitignore
+++ b/.gitignore
@@ -183,6 +183,7 @@
 /test-delta
 /test-dump-cache-tree
 /test-dump-split-index
+/test-dump-untracked-cache
 /test-scrap-cache-tree
 /test-genrandom
 /test-hashmap
diff --git a/Makefile b/Makefile
index 356feb5..8070e56 100644
--- a/Makefile
+++ b/Makefile
@@ -552,6 +552,7 @@ TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
+TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-hashmap
 TEST_PROGRAMS_NEED_X += test-index-version
diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh
new file mode 100755
index 0000000..2b2ffd7
--- /dev/null
+++ b/t/t7063-status-untracked-cache.sh
@@ -0,0 +1,353 @@
+#!/bin/sh
+
+test_description='test untracked cache'
+
+. ./test-lib.sh
+
+avoid_racy() {
+	sleep 1
+}
+
+git update-index --untracked-cache
+# It's fine if git update-index returns an error code other than one,
+# it'll be caught in the first test.
+if test $? -eq 1; then
+	skip_all='This system does not support untracked cache'
+	test_done
+fi
+
+test_expect_success 'setup' '
+	git init worktree &&
+	cd worktree &&
+	mkdir done dtwo dthree &&
+	touch one two three done/one dtwo/two dthree/three &&
+	git add one two done/one &&
+	: >.git/info/exclude &&
+	git update-index --untracked-cache
+'
+
+test_expect_success 'untracked cache is empty' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 0000000000000000000000000000000000000000
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+EOF
+	test_cmp ../expect ../actual
+'
+
+cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? three
+EOF
+
+cat >../dump.expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+
+test_expect_success 'status first time (empty cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 3
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after first status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'status second time (fully populated cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 0
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after second status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'modify in root directory, one dir invalidation' '
+	avoid_racy &&
+	: >four &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? four
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 1
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+four
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new .gitignore invalidates recursively' '
+	avoid_racy &&
+	echo four >.gitignore &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dthree/
+?? dtwo/
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 1
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new info/exclude invalidates everything' '
+	avoid_racy &&
+	echo three >>.git/info/exclude &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from tracked to untracked' '
+	git rm --cached two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+?? .gitignore
+?? dtwo/
+?? two
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+two
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from untracked to tracked' '
+	git add two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_done
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
new file mode 100644
index 0000000..710441e
--- /dev/null
+++ b/test-dump-untracked-cache.c
@@ -0,0 +1,61 @@
+#include "cache.h"
+#include "dir.h"
+
+static int compare_untracked(const void *a_, const void *b_)
+{
+	const char *const *a = a_;
+	const char *const *b = b_;
+	return strcmp(*a, *b);
+}
+
+static int compare_dir(const void *a_, const void *b_)
+{
+	const struct untracked_cache_dir *const *a = a_;
+	const struct untracked_cache_dir *const *b = b_;
+	return strcmp((*a)->name, (*b)->name);
+}
+
+static void dump(struct untracked_cache_dir *ucd, struct strbuf *base)
+{
+	int i, len;
+	qsort(ucd->untracked, ucd->untracked_nr, sizeof(*ucd->untracked),
+	      compare_untracked);
+	qsort(ucd->dirs, ucd->dirs_nr, sizeof(*ucd->dirs),
+	      compare_dir);
+	len = base->len;
+	strbuf_addf(base, "%s/", ucd->name);
+	printf("%s %s", base->buf,
+	       sha1_to_hex(ucd->exclude_sha1));
+	if (ucd->recurse)
+		fputs(" recurse", stdout);
+	if (ucd->check_only)
+		fputs(" check_only", stdout);
+	if (ucd->valid)
+		fputs(" valid", stdout);
+	printf("\n");
+	for (i = 0; i < ucd->untracked_nr; i++)
+		printf("%s\n", ucd->untracked[i]);
+	for (i = 0; i < ucd->dirs_nr; i++)
+		dump(ucd->dirs[i], base);
+	strbuf_setlen(base, len);
+}
+
+int main(int ac, char **av)
+{
+	struct untracked_cache *uc;
+	struct strbuf base = STRBUF_INIT;
+	if (read_cache() < 0)
+		die("unable to read index file");
+	uc = the_index.untracked;
+	if (!uc) {
+		printf("no untracked cache\n");
+		return 0;
+	}
+	printf("info/exclude %s\n", sha1_to_hex(uc->ss_info_exclude.sha1));
+	printf("core.excludesfile %s\n", sha1_to_hex(uc->ss_excludes_file.sha1));
+	printf("exclude_per_dir %s\n", uc->exclude_per_dir);
+	printf("flags %08x\n", uc->dir_flags);
+	if (uc->root)
+		dump(uc->root, &base);
+	return 0;
+}
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file
  2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2014-10-27 22:46   ` Junio C Hamano
  2014-10-28  0:12     ` Duy Nguyen
  2014-10-28 17:37   ` Torsten Bögershausen
  1 sibling, 1 reply; 65+ messages in thread
From: Junio C Hamano @ 2014-10-27 22:46 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> This is not used anywhere yet. But the goal is to compare quickly if a
> .gitignore file has changed when we have the SHA-1 of both old (cached
> somewhere) and new (from index or a tree) versions.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  dir.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
>  dir.h |  5 +++++
>  2 files changed, 48 insertions(+), 7 deletions(-)
>
> diff --git a/dir.c b/dir.c
> index bd274a7..33a35c1 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base,
>  	x->el = el;
>  }
>  
> -static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
> +static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
> +						struct sha1_stat *ss)
>  {
>  	int pos, len;
>  	unsigned long sz;
> @@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
>  		return NULL;
>  	}
>  	*size = xsize_t(sz);
> +	if (ss) {
> +		memset(&ss->stat, 0, sizeof(ss->stat));
> +		hashcpy(ss->sha1, active_cache[pos]->sha1);
> +	}
>  	return data;
>  }
>  
> @@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf)
>  		*last_space = '\0';
>  }
>  
> -int add_excludes_from_file_to_list(const char *fname,
> -				   const char *base,
> -				   int baselen,
> -				   struct exclude_list *el,
> -				   int check_index)
> +/*
> + * Given a file with name "fname", read it (either from disk, or from
> + * the index if "check_index" is non-zero), parse it and store the
> + * exclude rules in "el".
> + *
> + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
> + * stat data from disk (only valid if add_excludes returns zero). If
> + * ss_valid is non-zero, "ss" must contain good value as input.
> + */

Hmm, do we want a separate parameter for that?  Wouldn't it be
sufficient and cleaner to pass is_null_sha1(ss->sha1[]), or
alternatively have an element ss->valid that is initialized to
false?

That is...

> @@ -571,6 +587,19 @@ int add_excludes_from_file_to_list(const char *fname,
>  		}
>  		buf[size++] = '\n';
>  		close(fd);
> +		if (ss) {
> +			int pos;
> +			if (ss_valid && !match_stat_data(&ss->stat, &st))

s/ss_valid/ss->valid/;

> +				; /* no content change, ss->sha1 still good */
> +			else if (check_index &&
> +				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
> +				 !ce_stage(active_cache[pos]) &&
> +				 ce_uptodate(active_cache[pos]))
> +				hashcpy(ss->sha1, active_cache[pos]->sha1);
> +			else
> +				hash_sha1_file(buf, size, "blob", ss->sha1);
> +			fill_stat_data(&ss->stat, &st);

And toggle "ss->valid = 1"; here...

> @@ -589,6 +618,13 @@ int add_excludes_from_file_to_list(const char *fname,
>  	return 0;
>  }
>  
> +int add_excludes_from_file_to_list(const char *fname, const char *base,
> +				   int baselen, struct exclude_list *el,
> +				   int check_index)
> +{
> +	return add_excludes(fname, base, baselen, el, check_index, NULL, 0);
> +}
> +
>  struct exclude_list *add_exclude_list(struct dir_struct *dir,
>  				      int group_type, const char *src)
>  {
> diff --git a/dir.h b/dir.h
> index 6c45e9d..032d197 100644
> --- a/dir.h
> +++ b/dir.h
> @@ -73,6 +73,11 @@ struct exclude_list_group {
>  	struct exclude_list *el;
>  };
>  
> +struct sha1_stat {
> +	struct stat_data stat;
> +	unsigned char sha1[20];
> +};
> +
>  struct dir_struct {
>  	int nr, alloc;
>  	int ignored_nr, ignored_alloc;

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file
  2014-10-27 22:46   ` Junio C Hamano
@ 2014-10-28  0:12     ` Duy Nguyen
  0 siblings, 0 replies; 65+ messages in thread
From: Duy Nguyen @ 2014-10-28  0:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Git Mailing List

On Tue, Oct 28, 2014 at 5:46 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> This is not used anywhere yet. But the goal is to compare quickly if a
>> .gitignore file has changed when we have the SHA-1 of both old (cached
>> somewhere) and new (from index or a tree) versions.
>>
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>> ---
>>  dir.c | 50 +++++++++++++++++++++++++++++++++++++++++++-------
>>  dir.h |  5 +++++
>>  2 files changed, 48 insertions(+), 7 deletions(-)
>>
>> diff --git a/dir.c b/dir.c
>> index bd274a7..33a35c1 100644
>> --- a/dir.c
>> +++ b/dir.c
>> @@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base,
>>       x->el = el;
>>  }
>>
>> -static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
>> +static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
>> +                                             struct sha1_stat *ss)
>>  {
>>       int pos, len;
>>       unsigned long sz;
>> @@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
>>               return NULL;
>>       }
>>       *size = xsize_t(sz);
>> +     if (ss) {
>> +             memset(&ss->stat, 0, sizeof(ss->stat));
>> +             hashcpy(ss->sha1, active_cache[pos]->sha1);
>> +     }
>>       return data;
>>  }
>>
>> @@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf)
>>               *last_space = '\0';
>>  }
>>
>> -int add_excludes_from_file_to_list(const char *fname,
>> -                                const char *base,
>> -                                int baselen,
>> -                                struct exclude_list *el,
>> -                                int check_index)
>> +/*
>> + * Given a file with name "fname", read it (either from disk, or from
>> + * the index if "check_index" is non-zero), parse it and store the
>> + * exclude rules in "el".
>> + *
>> + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
>> + * stat data from disk (only valid if add_excludes returns zero). If
>> + * ss_valid is non-zero, "ss" must contain good value as input.
>> + */
>
> Hmm, do we want a separate parameter for that?  Wouldn't it be
> sufficient and cleaner to pass is_null_sha1(ss->sha1[]),

Hm.. no. If both ss->sha1 and ss->stat are valid, then we could try to
match stat on disk and reuse ss->sha1, so we can't use a special value
of ss->sha1[] to mark the validity.

> or alternatively have an element ss->valid that is initialized to false?

Yeah..
-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file
  2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
  2014-10-27 22:46   ` Junio C Hamano
@ 2014-10-28 17:37   ` Torsten Bögershausen
  2014-11-02  1:25     ` Duy Nguyen
  1 sibling, 1 reply; 65+ messages in thread
From: Torsten Bögershausen @ 2014-10-28 17:37 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy, git


On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:
[]
Nice serious, I can imagine to test & benchmark it (so I assume there is a branch 
on github or so ?)
Another thing:
Can we switch the feature off?

It could be nice to benchmark with and without the cache on the command line,
and besides that we may want to switch it on or off, depending on the file system.
I think this can be easily done when reading and writing the index file.
(But may cost a config variable, core.dirmtime ??)

To my knowledge there is support for the mtime in SAMBA (and probably NFS),
but I can help to find out more.



> diff --git a/dir.c b/dir.c
> +static int add_excludes(const char *fname, const char *base, int baselen,
> +			struct exclude_list *el, int check_index,
> +			struct sha1_stat *ss, int ss_valid)
Cosmetic question: does it make sense to write

struct sha1_stat *sha1_stat
or 
struct sha1_stat *s_stat

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy
  2014-10-27 12:10 ` [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
@ 2014-10-28 17:37   ` Torsten Bögershausen
  0 siblings, 0 replies; 65+ messages in thread
From: Torsten Bögershausen @ 2014-10-28 17:37 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy, git

On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:

>  dir.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++----------
>  dir.h |  60 +++++++++++++++++++++++++++

>  2 files changed, 189 insertions(+), 21 deletions(-)

[]

> +struct untracked_cache_dir {
> +	struct untracked_cache_dir **dirs;
> +	char **untracked;
> +	/* null SHA-1 means this directory does not have .gitignore */
> +	unsigned char exclude_sha1[20];
> +	struct stat_data stat_data;
> +	unsigned int check_only : 1;
> +	unsigned int untracked_nr : 29;
> +	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
> +	char name[1];
> +};
Are we utilizing the CPU and the L2 cache in a good way ?
I would consider to re-arrange, according to the following rule:
- 64 bit ints first (we do not have any)
- pointers (may be 64 or 32 bits)
- structs
- ints
- chars

And then we have the question why untracked_nr gets 29 bits,
and check_only one bit, which means we have 2 bits spare ?

From what I know from CPUs and compilers it could be slighty better to give
32 bits to untracked_nr and either 1 bit to check_only,
or simply make check_only a char.
Or do I miss something ?

> +	struct untracked_cache_dir **dirs;
> +	char **untracked;
> +	/* null SHA-1 means this directory does not have .gitignore */
> +	struct stat_data stat_data;
> +	unsigned char exclude_sha1[20];
> +	unsigned int check_only : 1;
> +	unsigned int untracked_nr : 29;
> +	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
> +	char name[1];

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 18/19] update-index: test the system before enabling untracked cache
  2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
@ 2014-10-28 17:37   ` Torsten Bögershausen
  2014-11-03 12:16     ` Duy Nguyen
  2014-11-03 18:09     ` Junio C Hamano
  2014-10-28 23:25   ` Eric Sunshine
  1 sibling, 2 replies; 65+ messages in thread
From: Torsten Bögershausen @ 2014-10-28 17:37 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy, git

On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:
[]

> +static void xmkdir(const char *path)
> +{
> +	if (mkdir(path, 0700))
> +		die_errno(_("failed to create directory %s"), path);
> +}

Does it makes sense to ignore EINTR and do a "retry" ?
Another question is if the function could be called mkdir_or_die() instead?
 
I realized that there are 2 families of xfunc() in wrapper.c, some die, some retry.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 18/19] update-index: test the system before enabling untracked cache
  2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
  2014-10-28 17:37   ` Torsten Bögershausen
@ 2014-10-28 23:25   ` Eric Sunshine
  1 sibling, 0 replies; 65+ messages in thread
From: Eric Sunshine @ 2014-10-28 23:25 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: Git List

On Mon, Oct 27, 2014 at 8:10 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
> diff --git a/builtin/update-index.c b/builtin/update-index.c
> index e57e2d7..471c0b4 100644
> --- a/builtin/update-index.c
> +++ b/builtin/update-index.c
> @@ -48,6 +48,145 @@ static void report(const char *fmt, ...)
>         va_end(vp);
>  }
>
> +static int test_if_untracked_cache_is_supported(void)
> +{
> +       struct stat st;
> +       struct stat_data base;
> +       int fd;
> +
> +       fprintf(stderr, _("Testing "));
> +       xmkdir("dir-mtime-test");
> +       atexit(remove_test_directory);
> +       xstat("dir-mtime-test", &st);
> +       fill_stat_data(&base, &st);
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       fd = create_file("dir-mtime-test/newfile");
> +       xstat("dir-mtime-test", &st);
> +       if (!match_stat_data(&base, &st)) {

close(fd);

> +               fputc('\n', stderr);
> +               fprintf_ln(stderr,_("directory stat info does not "
> +                                   "change after adding a new file"));
> +               return 0;
> +       }
> +       fill_stat_data(&base, &st);
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       xmkdir("dir-mtime-test/new-dir");
> +       xstat("dir-mtime-test", &st);
> +       if (!match_stat_data(&base, &st)) {

close(fd);

> +               fputc('\n', stderr);
> +               fprintf_ln(stderr, _("directory stat info does not change "
> +                                    "after adding a new directory"));
> +               return 0;
> +       }
> +       fill_stat_data(&base, &st);
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       write_or_die(fd, "data", 4);
> +       close(fd);
> +       xstat("dir-mtime-test", &st);
> +       if (match_stat_data(&base, &st)) {
> +               fputc('\n', stderr);
> +               fprintf_ln(stderr, _("directory stat info changes "
> +                                    "after updating a file"));
> +               return 0;
> +       }
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       close(create_file("dir-mtime-test/new-dir/new"));
> +       xstat("dir-mtime-test", &st);
> +       if (match_stat_data(&base, &st)) {
> +               fputc('\n', stderr);
> +               fprintf_ln(stderr, _("directory stat info changes after "
> +                                    "adding a file inside subdirectory"));
> +               return 0;
> +       }
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       xunlink("dir-mtime-test/newfile");
> +       xstat("dir-mtime-test", &st);
> +       if (!match_stat_data(&base, &st)) {
> +               fputc('\n', stderr);
> +               fprintf_ln(stderr, _("directory stat info does not "
> +                                    "change after deleting a file"));
> +               return 0;
> +       }
> +       fill_stat_data(&base, &st);
> +       fputc('.', stderr);
> +
> +       avoid_racy();
> +       xunlink("dir-mtime-test/new-dir/new");
> +       xrmdir("dir-mtime-test/new-dir");
> +       xstat("dir-mtime-test", &st);
> +       if (!match_stat_data(&base, &st)) {
> +               fputc('\n', stderr);
> +               fprintf_ln(stderr, _("directory stat info does not "
> +                                    "change after deleting a directory"));
> +               return 0;
> +       }
> +
> +       xrmdir("dir-mtime-test");
> +       fprintf_ln(stderr, _(" OK"));
> +       return 1;
> +}

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output
  2014-10-27 12:10 ` [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
@ 2014-10-30 16:19   ` Eric Sunshine
  0 siblings, 0 replies; 65+ messages in thread
From: Eric Sunshine @ 2014-10-30 16:19 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: Git List

On Mon, Oct 27, 2014 at 8:10 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> diff --git a/dir.c b/dir.c
> index 2793e57..55780a7 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -37,7 +37,12 @@ enum path_treatment {
> +static enum path_treatment treat_path_fast(struct dir_struct *dir,
> +                                          struct untracked_cache_dir *untracked,
> +                                          struct cached_dir *cdir,
> +                                          struct strbuf *path,
> +                                          int baselen,
> +                                          const struct path_simplify *simplify)
> +{
> +       if (!cdir->ucd) {
> +               strbuf_setlen(path, baselen);
> +               strbuf_addstr(path, cdir->file);
> +               return path_untracked;
> +       }
> +       strbuf_setlen(path, baseline);

Would it make sense to move the strbuf_setlen(path,baseline) above the
conditional since it is common to both cases, or are they conceptually
distinct enough that it is clearer to duplicate the function call for
each case?

> +       strbuf_addstr(path, cdir->ucd->name);
> +       /* treat_one_path() does this before it calls treat_directory() */
> +       if (path->buf[path->len - 1] != '/')
> +               strbuf_addch(path, '/');
> +       if (cdir->ucd->check_only)
> +               /*
> +                * check_only is set as a result of treat_directory() getting
> +                * to its bottom. Verify again the same set of directories
> +                * with check_only set.
> +                */
> +               return read_directory_recursive(dir, path->buf, path->len,
> +
> +                                               cdir->ucd, 1, simplify);

Unusual blank line placement.

> +       /*
> +        * We get path_recurse in the first run when
> +        * directory_exists_in_index() returns index_nonexistent. We
> +        * are sure that new changes in the index does not impact the
> +        * outcome. Return now.
> +        */
> +       return path_recurse;
> +}
> +
> @@ -1477,6 +1590,12 @@ static void close_cached_dir(struct cached_dir *cdir)
>  {
>         if (cdir->fdir)
>                 closedir(cdir->fdir);
> +       /*
> +        * We have gone through this directory and found no untracked
> +        * entries. Mark it valid.
> +        */
> +       if (cdir->untracked && !cdir->untracked->valid)
> +               cdir->untracked->valid = 1;

Or, stated more simply:

   if (cdir->untracked)
        cdir->untracked->valid = 1;

>  }
>
>  /*

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file
  2014-10-28 17:37   ` Torsten Bögershausen
@ 2014-11-02  1:25     ` Duy Nguyen
  0 siblings, 0 replies; 65+ messages in thread
From: Duy Nguyen @ 2014-11-02  1:25 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Git Mailing List

On Wed, Oct 29, 2014 at 12:37 AM, Torsten Bögershausen <tboegi@web.de> wrote:
>
> On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:
> []
> Nice serious, I can imagine to test & benchmark it (so I assume there is a branch
> on github or so ?)

It's on 'pu' now. There's a branch on my github repo, but it has some
extra debugging on top, so 'pu' is probably the best option.

> Another thing:
> Can we switch the feature off?
>
> It could be nice to benchmark with and without the cache on the command line,
> and besides that we may want to switch it on or off, depending on the file system.
> I think this can be easily done when reading and writing the index file.
> (But may cost a config variable, core.dirmtime ??)

You can permanently switch it off with "git update-index
--no-untracked-cache". An option to temporarily disable it is not
available. I'll add an environment variable for that.

For a normal case, "update-index --untracked-cache" would test if the
OS/FS supports this before enabling it. If the repo is moved to
another fs, or being used by a different OS, then the user has to
manually disable it first. I don't know what we can do here, maybe
record uname and filesystem in the index as well..

>> diff --git a/dir.c b/dir.c
>> +static int add_excludes(const char *fname, const char *base, int baselen,
>> +                     struct exclude_list *el, int check_index,
>> +                     struct sha1_stat *ss, int ss_valid)
> Cosmetic question: does it make sense to write
>
> struct sha1_stat *sha1_stat
> or
> struct sha1_stat *s_stat

Noted.


-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 18/19] update-index: test the system before enabling untracked cache
  2014-10-28 17:37   ` Torsten Bögershausen
@ 2014-11-03 12:16     ` Duy Nguyen
  2014-11-03 18:09     ` Junio C Hamano
  1 sibling, 0 replies; 65+ messages in thread
From: Duy Nguyen @ 2014-11-03 12:16 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Git Mailing List

On Wed, Oct 29, 2014 at 12:37 AM, Torsten Bögershausen <tboegi@web.de> wrote:
> On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:
> []
>
>> +static void xmkdir(const char *path)
>> +{
>> +     if (mkdir(path, 0700))
>> +             die_errno(_("failed to create directory %s"), path);
>> +}
>
> Does it makes sense to ignore EINTR and do a "retry" ?
> Another question is if the function could be called mkdir_or_die() instead?
>
> I realized that there are 2 families of xfunc() in wrapper.c, some die, some retry.

This is only used interactively, I think it's ok to ignore EINTR as
long as we report clearly the case (and hope the user to re-enter the
command)
-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH 18/19] update-index: test the system before enabling untracked cache
  2014-10-28 17:37   ` Torsten Bögershausen
  2014-11-03 12:16     ` Duy Nguyen
@ 2014-11-03 18:09     ` Junio C Hamano
  1 sibling, 0 replies; 65+ messages in thread
From: Junio C Hamano @ 2014-11-03 18:09 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Nguyễn Thái Ngọc Duy, git

Torsten Bögershausen <tboegi@web.de> writes:

> On 2014-10-27 13.10, Nguyễn Thái Ngọc Duy wrote:
> []
>
>> +static void xmkdir(const char *path)
>> +{
>> +	if (mkdir(path, 0700))
>> +		die_errno(_("failed to create directory %s"), path);
>> +}
>
> Does it makes sense to ignore EINTR and do a "retry" ?
> Another question is if the function could be called mkdir_or_die() instead?

Probably.  It is in the same league as xread() and xwrite().

> I realized that there are 2 families of xfunc() in wrapper.c, some die, some retry.

The general idea of wrapper.c is to free callers from coding
something they have only one sensible choice for.  The ones that
retry such as xread() and xwrite() are to free callers from
diagnosing and retrying upon EAGAIN/EINTR because they want to read
or write and there is no other thing they want to do.  The ones that
die such as xmkstemp() are for the failure modes that are likely
without any other choice.  xcalloc() and other allocators started as
(and still are) the latter, but instead of immediately dying they
know a last-ditch effort that is common to all Git operations.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [PATCH v2 00/22] untracked cache updates
  2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
                   ` (18 preceding siblings ...)
  2014-10-27 12:10 ` [PATCH 19/19] t7063: tests for " Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39 ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
                     ` (21 more replies)
  19 siblings, 22 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Updated based on comments from the list, including three new patches:

 - 16/22 allows to ignore untracked cache without destroying it (for
   comparison and verification)
 - 21/22 and 22/22 add some protection against filesystem or operating
   system changes

Also fix 'update-index --untracked-cache' essentially merging the
split index back because I set wrong update flag.

This series is also available on github [1] but you will have to
ignore the few top debugging patches first. Diff against the version
on 'pu' below.

[1] https://github.com/pclouds/git.git untracked-cache

diff --git a/builtin/update-index.c b/builtin/update-index.c
index c1c18db..f23ec83 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -115,6 +115,7 @@ static int test_if_untracked_cache_is_supported(void)
 	fd = create_file("dir-mtime-test/newfile");
 	xstat("dir-mtime-test", &st);
 	if (!match_stat_data(&base, &st)) {
+		close(fd);
 		fputc('\n', stderr);
 		fprintf_ln(stderr,_("directory stat info does not "
 				    "change after adding a new file"));
@@ -127,6 +128,7 @@ static int test_if_untracked_cache_is_supported(void)
 	xmkdir("dir-mtime-test/new-dir");
 	xstat("dir-mtime-test", &st);
 	if (!match_stat_data(&base, &st)) {
+		close(fd);
 		fputc('\n', stderr);
 		fprintf_ln(stderr, _("directory stat info does not change "
 				     "after adding a new directory"));
@@ -1094,10 +1096,10 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		/* should be the same flags used by git-status */
 		uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
 		the_index.untracked = uc;
-		the_index.cache_changed |= SOMETHING_CHANGED;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
 	} else if (!untracked_cache && the_index.untracked) {
 		the_index.untracked = NULL;
-		the_index.cache_changed |= SOMETHING_CHANGED;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
 	}
 
 	if (active_cache_changed) {
diff --git a/compat/mingw.c b/compat/mingw.c
index c5c37e5..b817678 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -2128,3 +2128,14 @@ void mingw_startup()
 	/* initialize Unicode console */
 	winansi_init();
 }
+
+int uname(struct utsname *buf)
+{
+	DWORD v = GetVersion();
+	memset(buf, 0, sizeof(*buf));
+	sprintf(buf->sysname, "Windows");
+	sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff);
+	/* assuming NT variants only.. */
+	sprintf(buf->version, "%u", (v >> 16) & 0x7fff);
+	return 0;
+}
diff --git a/compat/mingw.h b/compat/mingw.h
index df0e320..d00ba7a 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -77,6 +77,14 @@ struct itimerval {
 };
 #define ITIMER_REAL 0
 
+struct utsname {
+	char sysname[16];
+	char nodename[1];
+	char release[16];
+	char version[16];
+	char machine[1];
+};
+
 /*
  * sanitize preprocessor namespace polluted by Windows headers defining
  * macros which collide with git local versions
@@ -166,6 +174,7 @@ struct passwd *getpwuid(uid_t uid);
 int setitimer(int type, struct itimerval *in, struct itimerval *out);
 int sigaction(int sig, struct sigaction *in, struct sigaction *out);
 int link(const char *oldpath, const char *newpath);
+int uname(struct utsname *buf);
 
 /*
  * replacements of existing functions
diff --git a/dir.c b/dir.c
index 0ae2188..2324c52 100644
--- a/dir.c
+++ b/dir.c
@@ -482,7 +482,7 @@ void add_exclude(const char *string, const char *base,
 }
 
 static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
-						struct sha1_stat *ss)
+						struct sha1_stat *sha1_stat)
 {
 	int pos, len;
 	unsigned long sz;
@@ -501,9 +501,9 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
 		return NULL;
 	}
 	*size = xsize_t(sz);
-	if (ss) {
-		memset(&ss->stat, 0, sizeof(ss->stat));
-		hashcpy(ss->sha1, active_cache[pos]->sha1);
+	if (sha1_stat) {
+		memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
+		hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
 	}
 	return data;
 }
@@ -634,7 +634,7 @@ static void invalidate_directory(struct untracked_cache *uc,
  */
 static int add_excludes(const char *fname, const char *base, int baselen,
 			struct exclude_list *el, int check_index,
-			struct sha1_stat *ss, int ss_valid)
+			struct sha1_stat *sha1_stat)
 {
 	struct stat st;
 	int fd, i, lineno = 1;
@@ -648,7 +648,7 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 		if (0 <= fd)
 			close(fd);
 		if (!check_index ||
-		    (buf = read_skip_worktree_file_from_index(fname, &size, ss)) == NULL)
+		    (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL)
 			return -1;
 		if (size == 0) {
 			free(buf);
@@ -661,9 +661,10 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 	} else {
 		size = xsize_t(st.st_size);
 		if (size == 0) {
-			if (ss) {
-				fill_stat_data(&ss->stat, &st);
-				hashcpy(ss->sha1, EMPTY_BLOB_SHA1_BIN);
+			if (sha1_stat) {
+				fill_stat_data(&sha1_stat->stat, &st);
+				hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN);
+				sha1_stat->valid = 1;
 			}
 			close(fd);
 			return 0;
@@ -676,19 +677,20 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 		}
 		buf[size++] = '\n';
 		close(fd);
-		if (ss) {
+		if (sha1_stat) {
 			int pos;
-			if (ss_valid &&
-			    !match_stat_data_racy(&the_index, &ss->stat, &st))
+			if (sha1_stat->valid &&
+			    !match_stat_data_racy(&the_index, &sha1_stat->stat, &st))
 				; /* no content change, ss->sha1 still good */
 			else if (check_index &&
 				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
 				 !ce_stage(active_cache[pos]) &&
 				 ce_uptodate(active_cache[pos]))
-				hashcpy(ss->sha1, active_cache[pos]->sha1);
+				hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
 			else
-				hash_sha1_file(buf, size, "blob", ss->sha1);
-			fill_stat_data(&ss->stat, &st);
+				hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
+			fill_stat_data(&sha1_stat->stat, &st);
+			sha1_stat->valid = 1;
 		}
 	}
 
@@ -712,7 +714,7 @@ int add_excludes_from_file_to_list(const char *fname, const char *base,
 				   int baselen, struct exclude_list *el,
 				   int check_index)
 {
-	return add_excludes(fname, base, baselen, el, check_index, NULL, 0);
+	return add_excludes(fname, base, baselen, el, check_index, NULL);
 }
 
 struct exclude_list *add_exclude_list(struct dir_struct *dir,
@@ -733,7 +735,7 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
  * Used to set up core.excludesfile and .git/info/exclude lists.
  */
 static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
-				     struct sha1_stat *ss, int ss_valid)
+				     struct sha1_stat *sha1_stat)
 {
 	struct exclude_list *el;
 	/*
@@ -744,14 +746,14 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 	if (!dir->untracked)
 		dir->unmanaged_exclude_files++;
 	el = add_exclude_list(dir, EXC_FILE, fname);
-	if (add_excludes(fname, "", 0, el, 0, ss, ss_valid) < 0)
+	if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 		die("cannot use %s as an exclude file", fname);
 }
 
 void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 {
 	dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
-	add_excludes_from_file_1(dir, fname, NULL, 0);
+	add_excludes_from_file_1(dir, fname, NULL);
 }
 
 int match_basename(const char *basename, int basenamelen,
@@ -974,7 +976,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	while (current < baselen) {
 		struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 		const char *cp;
-		struct sha1_stat ss;
+		struct sha1_stat sha1_stat;
 
 		if (current < 0) {
 			cp = base;
@@ -1015,7 +1017,8 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		}
 
 		/* Try to read per-directory file */
-		hashclr(ss.sha1);
+		hashclr(sha1_stat.sha1);
+		sha1_stat.valid = 0;
 		if (dir->exclude_per_dir &&
 		    /*
 		     * If we know that no files have been added in
@@ -1044,7 +1047,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			strbuf_addstr(&sb, dir->exclude_per_dir);
 			el->src = strbuf_detach(&sb, NULL);
 			add_excludes(el->src, el->src, stk->baselen, el, 1,
-				     untracked ? &ss : NULL, 0);
+				     untracked ? &sha1_stat : NULL);
 		}
 		/*
 		 * NEEDSWORK: when untracked cache is enabled, prep_exclude()
@@ -1060,9 +1063,10 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		 * last_exclude_matching(). Be careful about ignore rule
 		 * order, though, if you do that.
 		 */
-		if (untracked && hashcmp(ss.sha1, untracked->exclude_sha1)) {
+		if (untracked &&
+		    hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) {
 			invalidate_gitignore(dir->untracked, untracked);
-			hashcpy(untracked->exclude_sha1, ss.sha1);
+			hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
 		}
 		dir->exclude_stack = stk;
 		current = stk->baselen;
@@ -1457,12 +1461,11 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
 					   int baselen,
 					   const struct path_simplify *simplify)
 {
+	strbuf_setlen(path, baselen);
 	if (!cdir->ucd) {
-		strbuf_setlen(path, baselen);
 		strbuf_addstr(path, cdir->file);
 		return path_untracked;
 	}
-	strbuf_setlen(path, baselen);
 	strbuf_addstr(path, cdir->ucd->name);
 	/* treat_one_path() does this before it calls treat_directory() */
 	if (path->buf[path->len - 1] != '/')
@@ -1474,7 +1477,6 @@ static enum path_treatment treat_path_fast(struct dir_struct *dir,
 		 * with check_only set.
 		 */
 		return read_directory_recursive(dir, path->buf, path->len,
-
 						cdir->ucd, 1, simplify);
 	/*
 	 * We get path_recurse in the first run when
@@ -1582,7 +1584,7 @@ static int open_cached_dir(struct cached_dir *cdir,
 	return 0;
 }
 
-int read_cached_dir(struct cached_dir *cdir)
+static int read_cached_dir(struct cached_dir *cdir)
 {
 	if (cdir->fdir) {
 		cdir->de = readdir(cdir->fdir);
@@ -1796,7 +1798,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	struct untracked_cache_dir *root;
 	int i;
 
-	if (!dir->untracked)
+	if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE"))
 		return NULL;
 
 	/*
@@ -2101,30 +2103,12 @@ void setup_standard_excludes(struct dir_struct *dir)
 		home_config_paths(NULL, &xdg_path, "ignore");
 		excludes_file = xdg_path;
 	}
-	if (!access_or_warn(path, R_OK, 0)) {
-		struct sha1_stat *ss = NULL;
-		int ss_valid = 0;
-		if (dir->untracked) {
-			ss = &dir->ss_info_exclude;
-			if (dir->untracked->loaded) {
-				*ss = dir->untracked->ss_info_exclude;
-				ss_valid = 1;
-			}
-		}
-		add_excludes_from_file_1(dir, path, ss, ss_valid);
-	}
-	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0)) {
-		struct sha1_stat *ss = NULL;
-		int ss_valid = 0;
-		if (dir->untracked) {
-			ss = &dir->ss_excludes_file;
-			if (dir->untracked->loaded) {
-				*ss = dir->untracked->ss_excludes_file;
-				ss_valid = 1;
-			}
-		}
-		add_excludes_from_file_1(dir, excludes_file, ss, ss_valid);
-	}
+	if (!access_or_warn(path, R_OK, 0))
+		add_excludes_from_file_1(dir, path,
+					 dir->untracked ? &dir->ss_info_exclude : NULL);
+	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
+		add_excludes_from_file_1(dir, excludes_file,
+					 dir->untracked ? &dir->ss_excludes_file : NULL);
 }
 
 int remove_path(const char *name)
@@ -2243,10 +2227,22 @@ static void write_one_dir(struct strbuf *out, struct untracked_cache_dir *untrac
 			write_one_dir(out, untracked->dirs[i]);
 }
 
+static void get_ident_string(struct strbuf *sb)
+{
+	struct utsname uts;
+
+	if (uname(&uts))
+		die_errno(_("failed to get kernel name and information"));
+	strbuf_addf(sb, "Location %s, system %s %s %s", get_git_work_tree(),
+		    uts.sysname, uts.release, uts.version);
+}
+
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
 {
 	struct ondisk_untracked_cache *ouc;
-	int len = 0;
+	struct strbuf sb = STRBUF_INIT;
+	unsigned char varbuf[16];
+	int len = 0, varint_len;
 	if (untracked->exclude_per_dir)
 		len = strlen(untracked->exclude_per_dir);
 	ouc = xmalloc(sizeof(*ouc) + len);
@@ -2256,6 +2252,13 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
 	ouc->dir_flags = htonl(untracked->dir_flags);
 	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+
+	get_ident_string(&sb);
+	varint_len = encode_varint(sb.len + 1, varbuf);
+	strbuf_add(out, varbuf, varint_len);
+	strbuf_add(out, sb.buf, sb.len + 1);
+	strbuf_release(&sb);
+
 	strbuf_add(out, ouc, sizeof(*ouc) + len);
 	if (untracked->root)
 		write_one_dir(out, untracked->root);
@@ -2360,28 +2363,55 @@ static int read_one_dir(struct untracked_cache_dir **untracked_,
 	return data - data_;
 }
 
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+			   const struct stat_data *stat,
+			   const unsigned char *sha1)
+{
+	stat_data_from_disk(&sha1_stat->stat, stat);
+	hashcpy(sha1_stat->sha1, sha1);
+	sha1_stat->valid = 1;
+}
+
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
 {
-	const struct ondisk_untracked_cache *ouc = data;
+	const struct ondisk_untracked_cache *ouc;
 	struct untracked_cache *uc;
+	const unsigned char *next = data;
+	struct strbuf sb = STRBUF_INIT;
 	int len;
 
+	len = decode_varint(&next);
+	if (sz <= (next - (const unsigned char *)data) + len ||
+	    next[len - 1] != '\0')
+		return NULL;
+
+	get_ident_string(&sb);
+	if (strcmp(sb.buf, (const char *)next)) {
+		warning(_("system identification does not match, untracked cache disabled.\n"
+			  "Stored: %s\nCurrent: %s\n"),
+			next, sb.buf);
+		strbuf_release(&sb);
+		return NULL;
+	}
+	strbuf_release(&sb);
+	ouc = (const struct ondisk_untracked_cache *)(next + len);
+	sz -= (const char *)ouc - (const char *)data;
+
 	if (sz < sizeof(*ouc))
 		return NULL;
 
 	uc = xcalloc(1, sizeof(*uc));
-	stat_data_from_disk(&uc->ss_info_exclude.stat, &ouc->info_exclude_stat);
-	stat_data_from_disk(&uc->ss_excludes_file.stat, &ouc->excludes_file_stat);
-	hashcpy(uc->ss_info_exclude.sha1, ouc->info_exclude_sha1);
-	hashcpy(uc->ss_excludes_file.sha1, ouc->excludes_file_sha1);
+	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+		       ouc->info_exclude_sha1);
+	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+		       ouc->excludes_file_sha1);
 	uc->dir_flags = get_be32(&ouc->dir_flags);
 	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
-	uc->loaded = 1;
 	len = sizeof(*ouc) + strlen(ouc->exclude_per_dir);
 	if (sz == len)
 		return uc;
 	if (sz > len &&
-	    read_one_dir(&uc->root, (const unsigned char *)data + len,
+	    read_one_dir(&uc->root, (const unsigned char *)ouc + len,
 			 sz - len) == sz - len)
 		return uc;
 	free_untracked_cache(uc);
diff --git a/dir.h b/dir.h
index 8c29324..708cdd5 100644
--- a/dir.h
+++ b/dir.h
@@ -77,6 +77,7 @@ struct exclude_list_group {
 struct sha1_stat {
 	struct stat_data stat;
 	unsigned char sha1[20];
+	int valid;
 };
 
 /*
@@ -110,15 +111,15 @@ struct sha1_stat {
 struct untracked_cache_dir {
 	struct untracked_cache_dir **dirs;
 	char **untracked;
-	/* null SHA-1 means this directory does not have .gitignore */
-	unsigned char exclude_sha1[20];
 	struct stat_data stat_data;
-	unsigned int recurse : 1;
+	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+	unsigned int untracked_nr;
 	unsigned int check_only : 1;
 	/* all data except 'dirs' in this struct are good */
 	unsigned int valid : 1;
-	unsigned int untracked_nr : 29;
-	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+	unsigned int recurse : 1;
+	/* null SHA-1 means this directory does not have .gitignore */
+	unsigned char exclude_sha1[20];
 	char name[1];
 };
 
@@ -137,7 +138,6 @@ struct untracked_cache {
 	int gitignore_invalidated;
 	int dir_invalidated;
 	int dir_opened;
-	int loaded;
 };
 
 struct dir_struct {
diff --git a/git-compat-util.h b/git-compat-util.h
index f587749..6b1f259 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -125,6 +125,7 @@
 #else
 #include <poll.h>
 #endif
+#include <sys/utsname.h>
 
 #if defined(__MINGW32__)
 /* pull in Windows compatibility stuff */
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
index 710441e..25d855d 100644
--- a/test-dump-untracked-cache.c
+++ b/test-dump-untracked-cache.c
@@ -44,6 +44,7 @@ int main(int ac, char **av)
 {
 	struct untracked_cache *uc;
 	struct strbuf base = STRBUF_INIT;
+	setup_git_directory();
 	if (read_cache() < 0)
 		die("unable to read index file");
 	uc = the_index.untracked;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-17 19:31     ` David Turner
  2014-11-08  9:39   ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
                     ` (20 subsequent siblings)
  21 siblings, 1 reply; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This is not used anywhere yet. But the goal is to compare quickly if a
.gitignore file has changed when we have the SHA-1 of both old (cached
somewhere) and new (from index or a tree) versions.

Helped-by: Junio C Hamano <gitster@pobox.com>
Helped-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++-------
 dir.h |  6 ++++++
 2 files changed, 52 insertions(+), 7 deletions(-)

diff --git a/dir.c b/dir.c
index fcb6872..4cc936b 100644
--- a/dir.c
+++ b/dir.c
@@ -466,7 +466,8 @@ void add_exclude(const char *string, const char *base,
 	x->el = el;
 }
 
-static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
+static void *read_skip_worktree_file_from_index(const char *path, size_t *size,
+						struct sha1_stat *sha1_stat)
 {
 	int pos, len;
 	unsigned long sz;
@@ -485,6 +486,10 @@ static void *read_skip_worktree_file_from_index(const char *path, size_t *size)
 		return NULL;
 	}
 	*size = xsize_t(sz);
+	if (sha1_stat) {
+		memset(&sha1_stat->stat, 0, sizeof(sha1_stat->stat));
+		hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+	}
 	return data;
 }
 
@@ -529,11 +534,18 @@ static void trim_trailing_spaces(char *buf)
 		*last_space = '\0';
 }
 
-int add_excludes_from_file_to_list(const char *fname,
-				   const char *base,
-				   int baselen,
-				   struct exclude_list *el,
-				   int check_index)
+/*
+ * Given a file with name "fname", read it (either from disk, or from
+ * the index if "check_index" is non-zero), parse it and store the
+ * exclude rules in "el".
+ *
+ * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
+ * stat data from disk (only valid if add_excludes returns zero). If
+ * ss_valid is non-zero, "ss" must contain good value as input.
+ */
+static int add_excludes(const char *fname, const char *base, int baselen,
+			struct exclude_list *el, int check_index,
+			struct sha1_stat *sha1_stat)
 {
 	struct stat st;
 	int fd, i, lineno = 1;
@@ -547,7 +559,7 @@ int add_excludes_from_file_to_list(const char *fname,
 		if (0 <= fd)
 			close(fd);
 		if (!check_index ||
-		    (buf = read_skip_worktree_file_from_index(fname, &size)) == NULL)
+		    (buf = read_skip_worktree_file_from_index(fname, &size, sha1_stat)) == NULL)
 			return -1;
 		if (size == 0) {
 			free(buf);
@@ -560,6 +572,11 @@ int add_excludes_from_file_to_list(const char *fname,
 	} else {
 		size = xsize_t(st.st_size);
 		if (size == 0) {
+			if (sha1_stat) {
+				fill_stat_data(&sha1_stat->stat, &st);
+				hashcpy(sha1_stat->sha1, EMPTY_BLOB_SHA1_BIN);
+				sha1_stat->valid = 1;
+			}
 			close(fd);
 			return 0;
 		}
@@ -571,6 +588,21 @@ int add_excludes_from_file_to_list(const char *fname,
 		}
 		buf[size++] = '\n';
 		close(fd);
+		if (sha1_stat) {
+			int pos;
+			if (sha1_stat->valid &&
+			    !match_stat_data(&sha1_stat->stat, &st))
+				; /* no content change, ss->sha1 still good */
+			else if (check_index &&
+				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
+				 !ce_stage(active_cache[pos]) &&
+				 ce_uptodate(active_cache[pos]))
+				hashcpy(sha1_stat->sha1, active_cache[pos]->sha1);
+			else
+				hash_sha1_file(buf, size, "blob", sha1_stat->sha1);
+			fill_stat_data(&sha1_stat->stat, &st);
+			sha1_stat->valid = 1;
+		}
 	}
 
 	el->filebuf = buf;
@@ -589,6 +621,13 @@ int add_excludes_from_file_to_list(const char *fname,
 	return 0;
 }
 
+int add_excludes_from_file_to_list(const char *fname, const char *base,
+				   int baselen, struct exclude_list *el,
+				   int check_index)
+{
+	return add_excludes(fname, base, baselen, el, check_index, NULL);
+}
+
 struct exclude_list *add_exclude_list(struct dir_struct *dir,
 				      int group_type, const char *src)
 {
diff --git a/dir.h b/dir.h
index 6c45e9d..cdca71b 100644
--- a/dir.h
+++ b/dir.h
@@ -73,6 +73,12 @@ struct exclude_list_group {
 	struct exclude_list *el;
 };
 
+struct sha1_stat {
+	struct stat_data stat;
+	unsigned char sha1[20];
+	int valid;
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08 17:08     ` brian m. carlson
  2014-11-17 20:35     ` David Turner
  2014-11-08  9:39   ` [PATCH v2 03/22] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
                     ` (19 subsequent siblings)
  21 siblings, 2 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

The idea is if we can capture all input and (non-rescursive) output of
read_directory_recursive(), and can verify later that all the input is
the same, then the second r_d_r() should produce the same output as in
the first run.

The requirement for this to work is stat info of a directory MUST
change if an entry is added to or removed from that directory (and
should not change often otherwise). If your OS and filesytem do not
meet this requirement, untracked cache is not for you. Most file
systems on *nix should be fine. On Windows, NTFS is fine while FAT may
be not [1] even though FAT on Linux seems to be fine.

The list of input of r_d_r() is in the big comment block in dir.h. In
short, the output of a directory (not counting subdirs) mainly depends
on stat info of the directory in question, all .gitignore leading to
it and the check_only flag when r_d_r() is called recursively. This
patch records all this info (and the output) as r_d_r() runs.

Two hash_sha1_file() are required for $GIT_DIR/info/exclude and
core.excludesfile unless their stat data matches. hash_sha1_file() is
only needed when .gitignore files in the worktree are modified,
otherwise their SHA-1 in index is used (see the previous patch).

We could store stat data for .gitignore files so we don't have to
rehash them if their content is different from index, but I think
.gitignore files are rarely modified, so not worth extra cache data
(and hashing penalty read-cache.c:verify_hdr(), as we will be storing
this as an index extension).

The implication is, if you change .gitignore, you better add it to the
index soon or you lose all the benefit of untracked cache because a
modified .gitignore invalidates all subdirs recursively. This is
especially bad for .gitignore at root.

This cached output is about untracked files only, not ignored files
because the number of tracked files is usually small, so small cache
overhead, while the number of ignored files could go really high
(e.g. *.o files mixing with source code).

[1] "Description of NTFS date and time stamps for files and folders"
    http://support.microsoft.com/kb/299648

Helped-by: Torsten Bögershausen <tboegi@web.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 141 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
 dir.h |  60 ++++++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 19 deletions(-)

diff --git a/dir.c b/dir.c
index 4cc936b..19f4b1e 100644
--- a/dir.c
+++ b/dir.c
@@ -32,7 +32,7 @@ enum path_treatment {
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
-	const char *path, int len,
+	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
 static int get_dtype(struct dirent *de, const char *path, int len);
 
@@ -535,6 +535,53 @@ static void trim_trailing_spaces(char *buf)
 }
 
 /*
+ * Given a subdirectory name and "dir" of the current directory,
+ * search the subdir in "dir" and return it, or create a new one if it
+ * does not exist in "dir".
+ *
+ * If "name" has the trailing slash, it'll be excluded in the search.
+ */
+static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
+						    struct untracked_cache_dir *dir,
+						    const char *name, int len)
+{
+	int first, last;
+	struct untracked_cache_dir *d;
+	if (!dir)
+		return NULL;
+	if (len && name[len - 1] == '/')
+		len--;
+	first = 0;
+	last = dir->dirs_nr;
+	while (last > first) {
+		int cmp, next = (last + first) >> 1;
+		d = dir->dirs[next];
+		cmp = strncmp(name, d->name, len);
+		if (!cmp && strlen(d->name) > len)
+			cmp = -1;
+		if (!cmp)
+			return d;
+		if (cmp < 0) {
+			last = next;
+			continue;
+		}
+		first = next+1;
+	}
+
+	uc->dir_created++;
+	d = xmalloc(sizeof(*d) + len);
+	memset(d, 0, sizeof(*d) + len);
+	memcpy(d->name, name, len);
+
+	ALLOC_GROW(dir->dirs, dir->dirs_nr + 1, dir->dirs_alloc);
+	memmove(dir->dirs + first + 1, dir->dirs + first,
+		(dir->dirs_nr - first) * sizeof(*dir->dirs));
+	dir->dirs_nr++;
+	dir->dirs[first] = d;
+	return d;
+}
+
+/*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
  * exclude rules in "el".
@@ -645,14 +692,20 @@ struct exclude_list *add_exclude_list(struct dir_struct *dir,
 /*
  * Used to set up core.excludesfile and .git/info/exclude lists.
  */
-void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
+				     struct sha1_stat *sha1_stat)
 {
 	struct exclude_list *el;
 	el = add_exclude_list(dir, EXC_FILE, fname);
-	if (add_excludes_from_file_to_list(fname, "", 0, el, 0) < 0)
+	if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 		die("cannot use %s as an exclude file", fname);
 }
 
+void add_excludes_from_file(struct dir_struct *dir, const char *fname)
+{
+	add_excludes_from_file_1(dir, fname, NULL);
+}
+
 int match_basename(const char *basename, int basenamelen,
 		   const char *pattern, int prefix, int patternlen,
 		   int flags)
@@ -827,6 +880,7 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	struct exclude_list_group *group;
 	struct exclude_list *el;
 	struct exclude_stack *stk = NULL;
+	struct untracked_cache_dir *untracked;
 	int current;
 
 	group = &dir->exclude_list_group[EXC_DIRS];
@@ -864,9 +918,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 	/* Read from the parent directories and push them down. */
 	current = stk ? stk->baselen : -1;
 	strbuf_setlen(&dir->basebuf, current < 0 ? 0 : current);
+	if (dir->untracked)
+		untracked = stk ? stk->ucd : dir->untracked->root;
+	else
+		untracked = NULL;
+
 	while (current < baselen) {
 		struct exclude_stack *stk = xcalloc(1, sizeof(*stk));
 		const char *cp;
+		struct sha1_stat sha1_stat;
 
 		if (current < 0) {
 			cp = base;
@@ -876,10 +936,15 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			if (!cp)
 				die("oops in prep_exclude");
 			cp++;
+			untracked =
+				lookup_untracked(dir->untracked, untracked,
+						 base + current,
+						 cp - base - current);
 		}
 		stk->prev = dir->exclude_stack;
 		stk->baselen = cp - base;
 		stk->exclude_ix = group->nr;
+		stk->ucd = untracked;
 		el = add_exclude_list(dir, EXC_DIRS, NULL);
 		strbuf_add(&dir->basebuf, base + current, stk->baselen - current);
 		assert(stk->baselen == dir->basebuf.len);
@@ -902,6 +967,8 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		}
 
 		/* Try to read per-directory file */
+		hashclr(sha1_stat.sha1);
+		sha1_stat.valid = 0;
 		if (dir->exclude_per_dir) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
@@ -915,8 +982,11 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			strbuf_addbuf(&sb, &dir->basebuf);
 			strbuf_addstr(&sb, dir->exclude_per_dir);
 			el->src = strbuf_detach(&sb, NULL);
-			add_excludes_from_file_to_list(el->src, el->src,
-						       stk->baselen, el, 1);
+			add_excludes(el->src, el->src, stk->baselen, el, 1,
+				     untracked ? &sha1_stat : NULL);
+		}
+		if (untracked) {
+			hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
 		}
 		dir->exclude_stack = stk;
 		current = stk->baselen;
@@ -1097,6 +1167,7 @@ static enum exist_status directory_exists_in_index(const char *dirname, int len)
  *  (c) otherwise, we recurse into it.
  */
 static enum path_treatment treat_directory(struct dir_struct *dir,
+	struct untracked_cache_dir *untracked,
 	const char *dirname, int len, int exclude,
 	const struct path_simplify *simplify)
 {
@@ -1124,7 +1195,9 @@ static enum path_treatment treat_directory(struct dir_struct *dir,
 	if (!(dir->flags & DIR_HIDE_EMPTY_DIRECTORIES))
 		return exclude ? path_excluded : path_untracked;
 
-	return read_directory_recursive(dir, dirname, len, 1, simplify);
+	untracked = lookup_untracked(dir->untracked, untracked, dirname, len);
+	return read_directory_recursive(dir, dirname, len,
+					untracked, 1, simplify);
 }
 
 /*
@@ -1240,6 +1313,7 @@ static int get_dtype(struct dirent *de, const char *path, int len)
 }
 
 static enum path_treatment treat_one_path(struct dir_struct *dir,
+					  struct untracked_cache_dir *untracked,
 					  struct strbuf *path,
 					  const struct path_simplify *simplify,
 					  int dtype, struct dirent *de)
@@ -1292,7 +1366,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 		return path_none;
 	case DT_DIR:
 		strbuf_addch(path, '/');
-		return treat_directory(dir, path->buf, path->len, exclude,
+		return treat_directory(dir, untracked, path->buf, path->len, exclude,
 			simplify);
 	case DT_REG:
 	case DT_LNK:
@@ -1301,6 +1375,7 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 }
 
 static enum path_treatment treat_path(struct dir_struct *dir,
+				      struct untracked_cache_dir *untracked,
 				      struct dirent *de,
 				      struct strbuf *path,
 				      int baselen,
@@ -1316,7 +1391,16 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 		return path_none;
 
 	dtype = DTYPE(de);
-	return treat_one_path(dir, path, simplify, dtype, de);
+	return treat_one_path(dir, untracked, path, simplify, dtype, de);
+}
+
+static void add_untracked(struct untracked_cache_dir *dir, const char *name)
+{
+	if (!dir)
+		return;
+	ALLOC_GROW(dir->untracked, dir->untracked_nr + 1,
+		   dir->untracked_alloc);
+	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
 /*
@@ -1332,7 +1416,7 @@ static enum path_treatment treat_path(struct dir_struct *dir,
  */
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    const char *base, int baselen,
-				    int check_only,
+				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
 	DIR *fdir;
@@ -1346,24 +1430,36 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	if (!fdir)
 		goto out;
 
+	if (untracked)
+		untracked->check_only = !!check_only;
+
 	while ((de = readdir(fdir)) != NULL) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+
 		if (state > dir_state)
 			dir_state = state;
 
 		/* recurse into subdir if instructed by treat_path */
 		if (state == path_recurse) {
-			subdir_state = read_directory_recursive(dir, path.buf,
-				path.len, check_only, simplify);
+			struct untracked_cache_dir *ud;
+			ud = lookup_untracked(dir->untracked, untracked,
+					      path.buf + baselen,
+					      path.len - baselen);
+			subdir_state =
+				read_directory_recursive(dir, path.buf, path.len,
+							 ud, check_only, simplify);
 			if (subdir_state > dir_state)
 				dir_state = subdir_state;
 		}
 
 		if (check_only) {
 			/* abort early if maximum state has been reached */
-			if (dir_state == path_untracked)
+			if (dir_state == path_untracked) {
+				if (untracked)
+					add_untracked(untracked, path.buf + baselen);
 				break;
+			}
 			/* skip the dir_add_* part */
 			continue;
 		}
@@ -1381,8 +1477,11 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 
 		case path_untracked:
-			if (!(dir->flags & DIR_SHOW_IGNORED))
-				dir_add_name(dir, path.buf, path.len);
+			if (dir->flags & DIR_SHOW_IGNORED)
+				break;
+			dir_add_name(dir, path.buf, path.len);
+			if (untracked)
+				add_untracked(untracked, path.buf + baselen);
 			break;
 
 		default:
@@ -1459,7 +1558,7 @@ static int treat_leading_path(struct dir_struct *dir,
 			break;
 		if (simplify_away(sb.buf, sb.len, simplify))
 			break;
-		if (treat_one_path(dir, &sb, simplify,
+		if (treat_one_path(dir, NULL, &sb, simplify,
 				   DT_DIR, NULL) == path_none)
 			break; /* do not recurse into it */
 		if (len <= baselen) {
@@ -1499,7 +1598,9 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len, 0, simplify);
+		read_directory_recursive(dir, path, len,
+					 dir->untracked ? dir->untracked->root : NULL,
+					 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
@@ -1666,9 +1767,11 @@ void setup_standard_excludes(struct dir_struct *dir)
 		excludes_file = xdg_path;
 	}
 	if (!access_or_warn(path, R_OK, 0))
-		add_excludes_from_file(dir, path);
+		add_excludes_from_file_1(dir, path,
+					 dir->untracked ? &dir->ss_info_exclude : NULL);
 	if (excludes_file && !access_or_warn(excludes_file, R_OK, 0))
-		add_excludes_from_file(dir, excludes_file);
+		add_excludes_from_file_1(dir, excludes_file,
+					 dir->untracked ? &dir->ss_excludes_file : NULL);
 }
 
 int remove_path(const char *name)
diff --git a/dir.h b/dir.h
index cdca71b..278c464 100644
--- a/dir.h
+++ b/dir.h
@@ -66,6 +66,7 @@ struct exclude_stack {
 	struct exclude_stack *prev; /* the struct exclude_stack for the parent directory */
 	int baselen;
 	int exclude_ix; /* index of exclude_list within EXC_DIRS exclude_list_group */
+	struct untracked_cache_dir *ucd;
 };
 
 struct exclude_list_group {
@@ -79,6 +80,60 @@ struct sha1_stat {
 	int valid;
 };
 
+/*
+ *  Untracked cache
+ *
+ *  The following inputs are sufficient to determine what files in a
+ *  directory are excluded:
+ *
+ *   - The list of files and directories of the direction in question
+ *   - The $GIT_DIR/index
+ *   - dir_struct flags
+ *   - The content of $GIT_DIR/info/exclude
+ *   - The content of core.excludesfile
+ *   - The content (or the lack) of .gitignore of all parent directories
+ *     from $GIT_WORK_TREE
+ *   - The check_only flag in read_directory_recursive (for
+ *     DIR_HIDE_EMPTY_DIRECTORIES)
+ *
+ *  The first input can be checked using directory mtime. In many
+ *  filesystems, directory mtime (stat_data field) is updated when its
+ *  files or direct subdirs are added or removed.
+ *
+ *  The second one can be hooked from cache_tree_invalidate_path().
+ *  Whenever a file (or a submodule) is added or removed from a
+ *  directory, we invalidate that directory.
+ *
+ *  The remaining inputs are easy, their SHA-1 could be used to verify
+ *  their contents (exclude_sha1[], info_exclude_sha1[] and
+ *  excludes_file_sha1[])
+ */
+struct untracked_cache_dir {
+	struct untracked_cache_dir **dirs;
+	char **untracked;
+	struct stat_data stat_data;
+	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
+	unsigned int untracked_nr;
+	unsigned int check_only : 1;
+	/* null SHA-1 means this directory does not have .gitignore */
+	unsigned char exclude_sha1[20];
+	char name[1];
+};
+
+struct untracked_cache {
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
+	const char *exclude_per_dir;
+	/*
+	 * dir_struct#flags must match dir_flags or the untracked
+	 * cache is ignored.
+	 */
+	unsigned dir_flags;
+	struct untracked_cache_dir *root;
+	/* Statistics */
+	int dir_created;
+};
+
 struct dir_struct {
 	int nr, alloc;
 	int ignored_nr, ignored_alloc;
@@ -126,6 +181,11 @@ struct dir_struct {
 	struct exclude_stack *exclude_stack;
 	struct exclude *exclude;
 	struct strbuf basebuf;
+
+	/* Enable untracked file cache if set */
+	struct untracked_cache *untracked;
+	struct sha1_stat ss_info_exclude;
+	struct sha1_stat ss_excludes_file;
 };
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 03/22] untracked cache: initial untracked cache validation
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 04/22] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
                     ` (18 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Make sure the starting conditions and all global exclude files are
good to go. If not, either disable untracked cache completely, or wipe
out the cache and start fresh.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   4 +++
 2 files changed, 114 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 19f4b1e..4f4f302 100644
--- a/dir.c
+++ b/dir.c
@@ -581,6 +581,22 @@ static struct untracked_cache_dir *lookup_untracked(struct untracked_cache *uc,
 	return d;
 }
 
+static void do_invalidate_gitignore(struct untracked_cache_dir *dir)
+{
+	int i;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		do_invalidate_gitignore(dir->dirs[i]);
+}
+
+static void invalidate_gitignore(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->gitignore_invalidated++;
+	do_invalidate_gitignore(dir);
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -696,6 +712,13 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 				     struct sha1_stat *sha1_stat)
 {
 	struct exclude_list *el;
+	/*
+	 * catch setup_standard_excludes() that's called before
+	 * dir->untracked is assigned. That function behaves
+	 * differently when dir->untracked is non-NULL.
+	 */
+	if (!dir->untracked)
+		dir->unmanaged_exclude_files++;
 	el = add_exclude_list(dir, EXC_FILE, fname);
 	if (add_excludes(fname, "", 0, el, 0, sha1_stat) < 0)
 		die("cannot use %s as an exclude file", fname);
@@ -703,6 +726,7 @@ static void add_excludes_from_file_1(struct dir_struct *dir, const char *fname,
 
 void add_excludes_from_file(struct dir_struct *dir, const char *fname)
 {
+	dir->unmanaged_exclude_files++; /* see validate_untracked_cache() */
 	add_excludes_from_file_1(dir, fname, NULL);
 }
 
@@ -1571,9 +1595,87 @@ static int treat_leading_path(struct dir_struct *dir,
 	return rc;
 }
 
+static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *dir,
+						      int base_len,
+						      const struct pathspec *pathspec)
+{
+	struct untracked_cache_dir *root;
+
+	if (!dir->untracked)
+		return NULL;
+
+	/*
+	 * We only support $GIT_DIR/info/exclude and core.excludesfile
+	 * as the global ignore rule files. Any other additions
+	 * (e.g. from command line) invalidate the cache. This
+	 * condition also catches running setup_standard_excludes()
+	 * before setting dir->untracked!
+	 */
+	if (dir->unmanaged_exclude_files)
+		return NULL;
+
+	/*
+	 * Optimize for the main use case only: whole-tree git
+	 * status. More work involved in treat_leading_path() if we
+	 * use cache on just a subset of the worktree. pathspec
+	 * support could make the matter even worse.
+	 */
+	if (base_len || (pathspec && pathspec->nr))
+		return NULL;
+
+	/* Different set of flags may produce different results */
+	if (dir->flags != dir->untracked->dir_flags ||
+	    /*
+	     * See treat_directory(), case index_nonexistent. Without
+	     * this flag, we may need to also cache .git file content
+	     * for the resolve_gitlink_ref() call, which we don't.
+	     */
+	    !(dir->flags & DIR_SHOW_OTHER_DIRECTORIES) ||
+	    /* We don't support collecting ignore files */
+	    (dir->flags & (DIR_SHOW_IGNORED | DIR_SHOW_IGNORED_TOO |
+			   DIR_COLLECT_IGNORED)))
+		return NULL;
+
+	/*
+	 * If we use .gitignore in the cache and now you change it to
+	 * .gitexclude, everything will go wrong.
+	 */
+	if (dir->exclude_per_dir != dir->untracked->exclude_per_dir &&
+	    strcmp(dir->exclude_per_dir, dir->untracked->exclude_per_dir))
+		return NULL;
+
+	/*
+	 * EXC_CMDL is not considered in the cache. If people set it,
+	 * skip the cache.
+	 */
+	if (dir->exclude_list_group[EXC_CMDL].nr)
+		return NULL;
+
+	if (!dir->untracked->root) {
+		const int len = sizeof(*dir->untracked->root);
+		dir->untracked->root = xmalloc(len);
+		memset(dir->untracked->root, 0, len);
+	}
+
+	/* Validate $GIT_DIR/info/exclude and core.excludesfile */
+	root = dir->untracked->root;
+	if (hashcmp(dir->ss_info_exclude.sha1,
+		    dir->untracked->ss_info_exclude.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_info_exclude = dir->ss_info_exclude;
+	}
+	if (hashcmp(dir->ss_excludes_file.sha1,
+		    dir->untracked->ss_excludes_file.sha1)) {
+		invalidate_gitignore(dir->untracked, root);
+		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
+	}
+	return root;
+}
+
 int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
 {
 	struct path_simplify *simplify;
+	struct untracked_cache_dir *untracked;
 
 	/*
 	 * Check out create_simplify()
@@ -1597,10 +1699,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	 * create_simplify().
 	 */
 	simplify = create_simplify(pathspec ? pathspec->_raw : NULL);
+	untracked = validate_untracked_cache(dir, len, pathspec);
+	if (!untracked)
+		/*
+		 * make sure untracked cache code path is disabled,
+		 * e.g. prep_exclude()
+		 */
+		dir->untracked = NULL;
 	if (!len || treat_leading_path(dir, path, len, simplify))
-		read_directory_recursive(dir, path, len,
-					 dir->untracked ? dir->untracked->root : NULL,
-					 0, simplify);
+		read_directory_recursive(dir, path, len, untracked, 0, simplify);
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
diff --git a/dir.h b/dir.h
index 278c464..bd51948 100644
--- a/dir.h
+++ b/dir.h
@@ -115,6 +115,8 @@ struct untracked_cache_dir {
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
 	unsigned int untracked_nr;
 	unsigned int check_only : 1;
+	/* all data in this struct are good */
+	unsigned int valid : 1;
 	/* null SHA-1 means this directory does not have .gitignore */
 	unsigned char exclude_sha1[20];
 	char name[1];
@@ -132,6 +134,7 @@ struct untracked_cache {
 	struct untracked_cache_dir *root;
 	/* Statistics */
 	int dir_created;
+	int gitignore_invalidated;
 };
 
 struct dir_struct {
@@ -186,6 +189,7 @@ struct dir_struct {
 	struct untracked_cache *untracked;
 	struct sha1_stat ss_info_exclude;
 	struct sha1_stat ss_excludes_file;
+	unsigned unmanaged_exclude_files;
 };
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 04/22] untracked cache: invalidate dirs recursively if .gitignore changes
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (2 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 03/22] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 05/22] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
                     ` (17 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

It's easy to see that if an existing .gitignore changes, its SHA-1
would be different and invalidate_gitignore() is called.

If .gitignore is removed, add_excludes() will treat it like an empty
.gitignore, which again should invalidate the cached directory data.

if .gitignore is added, lookup_untracked() already fills initial
.gitignore SHA-1 as "empty file", so again invalidate_gitignore() is
called.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 4f4f302..d5e35ea 100644
--- a/dir.c
+++ b/dir.c
@@ -1009,7 +1009,23 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 			add_excludes(el->src, el->src, stk->baselen, el, 1,
 				     untracked ? &sha1_stat : NULL);
 		}
-		if (untracked) {
+		/*
+		 * NEEDSWORK: when untracked cache is enabled, prep_exclude()
+		 * will first be called in valid_cached_dir() then maybe many
+		 * times more in last_exclude_matching(). When the cache is
+		 * used, last_exclude_matching() will not be called and
+		 * reading .gitignore content will be a waste.
+		 *
+		 * So when it's called by valid_cached_dir() and we can get
+		 * .gitignore SHA-1 from the index (i.e. .gitignore is not
+		 * modified on work tree), we could delay reading the
+		 * .gitignore content until we absolutely need it in
+		 * last_exclude_matching(). Be careful about ignore rule
+		 * order, though, if you do that.
+		 */
+		if (untracked &&
+		    hashcmp(sha1_stat.sha1, untracked->exclude_sha1)) {
+			invalidate_gitignore(dir->untracked, untracked);
 			hashcpy(untracked->exclude_sha1, sha1_stat.sha1);
 		}
 		dir->exclude_stack = stk;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 05/22] untracked cache: make a wrapper around {open,read,close}dir()
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (3 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 04/22] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 06/22] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
                     ` (16 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This allows us to feed different info to read_directory_recursive()
based on untracked cache in the next patch.

Helped-by: Ramsay Jones <ramsay@ramsay1.demon.co.uk>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 47 insertions(+), 8 deletions(-)

diff --git a/dir.c b/dir.c
index d5e35ea..65ba98e 100644
--- a/dir.c
+++ b/dir.c
@@ -31,6 +31,15 @@ enum path_treatment {
 	path_untracked
 };
 
+/*
+ * Support data structure for our opendir/readdir/closedir wrappers
+ */
+struct cached_dir {
+	DIR *fdir;
+	struct untracked_cache_dir *untracked;
+	struct dirent *de;
+};
+
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 	const char *path, int len, struct untracked_cache_dir *untracked,
 	int check_only, const struct path_simplify *simplify);
@@ -1416,12 +1425,13 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
-				      struct dirent *de,
+				      struct cached_dir *cdir,
 				      struct strbuf *path,
 				      int baselen,
 				      const struct path_simplify *simplify)
 {
 	int dtype;
+	struct dirent *de = cdir->de;
 
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
@@ -1443,6 +1453,37 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int open_cached_dir(struct cached_dir *cdir,
+			   struct dir_struct *dir,
+			   struct untracked_cache_dir *untracked,
+			   struct strbuf *path,
+			   int check_only)
+{
+	memset(cdir, 0, sizeof(*cdir));
+	cdir->untracked = untracked;
+	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (!cdir->fdir)
+		return -1;
+	return 0;
+}
+
+static int read_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir) {
+		cdir->de = readdir(cdir->fdir);
+		if (!cdir->de)
+			return -1;
+		return 0;
+	}
+	return -1;
+}
+
+static void close_cached_dir(struct cached_dir *cdir)
+{
+	if (cdir->fdir)
+		closedir(cdir->fdir);
+}
+
 /*
  * Read a directory tree. We currently ignore anything but
  * directories, regular files and symlinks. That's because git
@@ -1459,23 +1500,21 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 				    struct untracked_cache_dir *untracked, int check_only,
 				    const struct path_simplify *simplify)
 {
-	DIR *fdir;
+	struct cached_dir cdir;
 	enum path_treatment state, subdir_state, dir_state = path_none;
-	struct dirent *de;
 	struct strbuf path = STRBUF_INIT;
 
 	strbuf_add(&path, base, baselen);
 
-	fdir = opendir(path.len ? path.buf : ".");
-	if (!fdir)
+	if (open_cached_dir(&cdir, dir, untracked, &path, check_only))
 		goto out;
 
 	if (untracked)
 		untracked->check_only = !!check_only;
 
-	while ((de = readdir(fdir)) != NULL) {
+	while (!read_cached_dir(&cdir)) {
 		/* check how the file or directory should be treated */
-		state = treat_path(dir, untracked, de, &path, baselen, simplify);
+		state = treat_path(dir, untracked, &cdir, &path, baselen, simplify);
 
 		if (state > dir_state)
 			dir_state = state;
@@ -1528,7 +1567,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			break;
 		}
 	}
-	closedir(fdir);
+	close_cached_dir(&cdir);
  out:
 	strbuf_release(&path);
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 06/22] untracked cache: record/validate dir mtime and reuse cached output
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (4 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 05/22] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 07/22] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
                     ` (15 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

The main readdir loop in read_directory_recursive() is replaced with a
new one that checks if cached results of a directory is still valid.

If a file is added or removed from the index, the containing directory
is invalidated (but not its subdirs). If directory's mtime is changed,
the same happens. If a .gitignore is updated, the containing directory
and all subdirs are invalidated recursively. If dir_struct#flags or
other conditions change, the cache is ignored.

If a directory is invalidated, we opendir/readdir/closedir and run the
exclude machinery on that directory listing as usual. If untracked
cache is also enabled, we'll update the cache along the way. If a
directory is validated, we simply pull the untracked listing out from
the cache. The cache also records the list of direct subdirs that we
have to recurse in. Fully excluded directories are seen as "untracked
files".

In the best case when no dirs are invalidated, read_directory()
becomes a series of

  stat(dir), open(.gitignore), fstat(), read(), close() and optionally
  hash_sha1_file()

For comparison, standard read_directory() is a sequence of

  opendir(), readdir(), open(.gitignore), fstat(), read(), close(), the
  expensive last_exclude_matching() and closedir().

We already try not to open(.gitignore) if we know it does not exist,
so open/fstat/read/close sequence does not apply to every
directory. The sequence could be reduced further, as noted in
prep_exclude() in another patch. So in theory, the entire best-case
read_directory sequence could be reduced to a series of stat() and
nothing else.

This is not a silver bullet approach. When you compile a C file, for
example, the old .o file is removed and a new one with the same name
created, effectively invalidating the containing directory's cache
(but not its subdirectories). If your build process touches every
directory, this cache adds extra overhead for nothing, so it's a good
idea to separate generated files from tracked files.. Editors may use
the same strategy for saving files. And of course you're out of luck
running your repo on an unsupported filesytem and/or operating system.

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 121 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 dir.h |   2 ++
 2 files changed, 121 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index 65ba98e..f590761 100644
--- a/dir.c
+++ b/dir.c
@@ -37,7 +37,12 @@ enum path_treatment {
 struct cached_dir {
 	DIR *fdir;
 	struct untracked_cache_dir *untracked;
+	int nr_files;
+	int nr_dirs;
+
 	struct dirent *de;
+	const char *file;
+	struct untracked_cache_dir *ucd;
 };
 
 static enum path_treatment read_directory_recursive(struct dir_struct *dir,
@@ -606,6 +611,14 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 	do_invalidate_gitignore(dir);
 }
 
+static void invalidate_directory(struct untracked_cache *uc,
+				 struct untracked_cache_dir *dir)
+{
+	uc->dir_invalidated++;
+	dir->valid = 0;
+	dir->untracked_nr = 0;
+}
+
 /*
  * Given a file with name "fname", read it (either from disk, or from
  * the index if "check_index" is non-zero), parse it and store the
@@ -1423,6 +1436,39 @@ static enum path_treatment treat_one_path(struct dir_struct *dir,
 	}
 }
 
+static enum path_treatment treat_path_fast(struct dir_struct *dir,
+					   struct untracked_cache_dir *untracked,
+					   struct cached_dir *cdir,
+					   struct strbuf *path,
+					   int baselen,
+					   const struct path_simplify *simplify)
+{
+	strbuf_setlen(path, baselen);
+	if (!cdir->ucd) {
+		strbuf_addstr(path, cdir->file);
+		return path_untracked;
+	}
+	strbuf_addstr(path, cdir->ucd->name);
+	/* treat_one_path() does this before it calls treat_directory() */
+	if (path->buf[path->len - 1] != '/')
+		strbuf_addch(path, '/');
+	if (cdir->ucd->check_only)
+		/*
+		 * check_only is set as a result of treat_directory() getting
+		 * to its bottom. Verify again the same set of directories
+		 * with check_only set.
+		 */
+		return read_directory_recursive(dir, path->buf, path->len,
+						cdir->ucd, 1, simplify);
+	/*
+	 * We get path_recurse in the first run when
+	 * directory_exists_in_index() returns index_nonexistent. We
+	 * are sure that new changes in the index does not impact the
+	 * outcome. Return now.
+	 */
+	return path_recurse;
+}
+
 static enum path_treatment treat_path(struct dir_struct *dir,
 				      struct untracked_cache_dir *untracked,
 				      struct cached_dir *cdir,
@@ -1433,6 +1479,9 @@ static enum path_treatment treat_path(struct dir_struct *dir,
 	int dtype;
 	struct dirent *de = cdir->de;
 
+	if (!de)
+		return treat_path_fast(dir, untracked, cdir, path,
+				       baselen, simplify);
 	if (is_dot_or_dotdot(de->d_name) || !strcmp(de->d_name, ".git"))
 		return path_none;
 	strbuf_setlen(path, baselen);
@@ -1453,6 +1502,52 @@ static void add_untracked(struct untracked_cache_dir *dir, const char *name)
 	dir->untracked[dir->untracked_nr++] = xstrdup(name);
 }
 
+static int valid_cached_dir(struct dir_struct *dir,
+			    struct untracked_cache_dir *untracked,
+			    struct strbuf *path,
+			    int check_only)
+{
+	struct stat st;
+
+	if (!untracked)
+		return 0;
+
+	if (stat(path->len ? path->buf : ".", &st)) {
+		invalidate_directory(dir->untracked, untracked);
+		memset(&untracked->stat_data, 0, sizeof(untracked->stat_data));
+		return 0;
+	}
+	if (!untracked->valid ||
+	    match_stat_data(&untracked->stat_data, &st)) {
+		if (untracked->valid)
+			invalidate_directory(dir->untracked, untracked);
+		fill_stat_data(&untracked->stat_data, &st);
+		return 0;
+	}
+
+	if (untracked->check_only != !!check_only) {
+		invalidate_directory(dir->untracked, untracked);
+		return 0;
+	}
+
+	/*
+	 * prep_exclude will be called eventually on this directory,
+	 * but it's called much later in last_exclude_matching(). We
+	 * need it now to determine the validity of the cache for this
+	 * path. The next calls will be nearly no-op, the way
+	 * prep_exclude() is designed.
+	 */
+	if (path->len && path->buf[path->len - 1] != '/') {
+		strbuf_addch(path, '/');
+		prep_exclude(dir, path->buf, path->len);
+		strbuf_setlen(path, path->len - 1);
+	} else
+		prep_exclude(dir, path->buf, path->len);
+
+	/* hopefully prep_exclude() haven't invalidated this entry... */
+	return untracked->valid;
+}
+
 static int open_cached_dir(struct cached_dir *cdir,
 			   struct dir_struct *dir,
 			   struct untracked_cache_dir *untracked,
@@ -1461,7 +1556,11 @@ static int open_cached_dir(struct cached_dir *cdir,
 {
 	memset(cdir, 0, sizeof(*cdir));
 	cdir->untracked = untracked;
+	if (valid_cached_dir(dir, untracked, path, check_only))
+		return 0;
 	cdir->fdir = opendir(path->len ? path->buf : ".");
+	if (dir->untracked)
+		dir->untracked->dir_opened++;
 	if (!cdir->fdir)
 		return -1;
 	return 0;
@@ -1475,6 +1574,18 @@ static int read_cached_dir(struct cached_dir *cdir)
 			return -1;
 		return 0;
 	}
+	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
+		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		cdir->ucd = d;
+		cdir->nr_dirs++;
+		return 0;
+	}
+	cdir->ucd = NULL;
+	if (cdir->nr_files < cdir->untracked->untracked_nr) {
+		struct untracked_cache_dir *d = cdir->untracked;
+		cdir->file = d->untracked[cdir->nr_files++];
+		return 0;
+	}
 	return -1;
 }
 
@@ -1482,6 +1593,12 @@ static void close_cached_dir(struct cached_dir *cdir)
 {
 	if (cdir->fdir)
 		closedir(cdir->fdir);
+	/*
+	 * We have gone through this directory and found no untracked
+	 * entries. Mark it valid.
+	 */
+	if (cdir->untracked)
+		cdir->untracked->valid = 1;
 }
 
 /*
@@ -1535,7 +1652,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 		if (check_only) {
 			/* abort early if maximum state has been reached */
 			if (dir_state == path_untracked) {
-				if (untracked)
+				if (cdir.fdir)
 					add_untracked(untracked, path.buf + baselen);
 				break;
 			}
@@ -1559,7 +1676,7 @@ static enum path_treatment read_directory_recursive(struct dir_struct *dir,
 			if (dir->flags & DIR_SHOW_IGNORED)
 				break;
 			dir_add_name(dir, path.buf, path.len);
-			if (untracked)
+			if (cdir.fdir)
 				add_untracked(untracked, path.buf + baselen);
 			break;
 
diff --git a/dir.h b/dir.h
index bd51948..a755e49 100644
--- a/dir.h
+++ b/dir.h
@@ -135,6 +135,8 @@ struct untracked_cache {
 	/* Statistics */
 	int dir_created;
 	int gitignore_invalidated;
+	int dir_invalidated;
+	int dir_opened;
 };
 
 struct dir_struct {
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 07/22] untracked cache: mark what dirs should be recursed/saved
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (5 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 06/22] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 08/22] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
                     ` (14 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

If we redo this thing in a functional style, we would have one struct
untracked_dir as input tree and another as output. The input is used
for verification. The output is a brand new tree, reflecting current
worktree.

But that means recreate a lot of dir nodes even if a lot could be
shared between input and output trees in good cases. So we go with the
messy but efficient way, combining both input and output trees into
one. We need a way to know which node in this combined tree belongs to
the output. This is the purpose of this "recurse" flag.

"valid" bit can't be used for this because it's about data of the node
except the subdirs. When we invalidate a directory, we want to keep
cached data of the subdirs intact even though we don't really know
what subdir still exists (yet). Then we check worktree to see what
actual subdir remains on disk. Those will have 'recurse' bit set
again. If cached data for those are still valid, we may be able to
avoid computing exclude files for them. Those subdirs that are deleted
will have 'recurse' remained clear and their 'valid' bits do not
matter.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 14 +++++++++++++-
 dir.h |  3 ++-
 2 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index f590761..32406bf 100644
--- a/dir.c
+++ b/dir.c
@@ -614,9 +614,12 @@ static void invalidate_gitignore(struct untracked_cache *uc,
 static void invalidate_directory(struct untracked_cache *uc,
 				 struct untracked_cache_dir *dir)
 {
+	int i;
 	uc->dir_invalidated++;
 	dir->valid = 0;
 	dir->untracked_nr = 0;
+	for (i = 0; i < dir->dirs_nr; i++)
+		dir->dirs[i]->recurse = 0;
 }
 
 /*
@@ -1576,6 +1579,10 @@ static int read_cached_dir(struct cached_dir *cdir)
 	}
 	while (cdir->nr_dirs < cdir->untracked->dirs_nr) {
 		struct untracked_cache_dir *d = cdir->untracked->dirs[cdir->nr_dirs];
+		if (!d->recurse) {
+			cdir->nr_dirs++;
+			continue;
+		}
 		cdir->ucd = d;
 		cdir->nr_dirs++;
 		return 0;
@@ -1597,8 +1604,10 @@ static void close_cached_dir(struct cached_dir *cdir)
 	 * We have gone through this directory and found no untracked
 	 * entries. Mark it valid.
 	 */
-	if (cdir->untracked)
+	if (cdir->untracked) {
 		cdir->untracked->valid = 1;
+		cdir->untracked->recurse = 1;
+	}
 }
 
 /*
@@ -1841,6 +1850,9 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 		invalidate_gitignore(dir->untracked, root);
 		dir->untracked->ss_excludes_file = dir->ss_excludes_file;
 	}
+
+	/* Make sure this directory is not dropped out at saving phase */
+	root->recurse = 1;
 	return root;
 }
 
diff --git a/dir.h b/dir.h
index a755e49..5c688da 100644
--- a/dir.h
+++ b/dir.h
@@ -115,8 +115,9 @@ struct untracked_cache_dir {
 	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
 	unsigned int untracked_nr;
 	unsigned int check_only : 1;
-	/* all data in this struct are good */
+	/* all data except 'dirs' in this struct are good */
 	unsigned int valid : 1;
+	unsigned int recurse : 1;
 	/* null SHA-1 means this directory does not have .gitignore */
 	unsigned char exclude_sha1[20];
 	char name[1];
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 08/22] untracked cache: don't open non-existent .gitignore
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (6 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 07/22] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 09/22] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
                     ` (13 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This cuts down a signficant number of open(.gitignore) because most
directories usually don't have .gitignore files.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index 32406bf..5936c84 100644
--- a/dir.c
+++ b/dir.c
@@ -1018,7 +1018,21 @@ static void prep_exclude(struct dir_struct *dir, const char *base, int baselen)
 		/* Try to read per-directory file */
 		hashclr(sha1_stat.sha1);
 		sha1_stat.valid = 0;
-		if (dir->exclude_per_dir) {
+		if (dir->exclude_per_dir &&
+		    /*
+		     * If we know that no files have been added in
+		     * this directory (i.e. valid_cached_dir() has
+		     * been executed and set untracked->valid) ..
+		     */
+		    (!untracked || !untracked->valid ||
+		     /*
+		      * .. and .gitignore does not exist before
+		      * (i.e. null exclude_sha1 and skip_worktree is
+		      * not set). Then we can skip loading .gitignore,
+		      * which would result in ENOENT anyway.
+		      * skip_worktree is taken care in read_directory()
+		      */
+		     !is_null_sha1(untracked->exclude_sha1))) {
 			/*
 			 * dir->basebuf gets reused by the traversal, but we
 			 * need fname to remain unchanged to ensure the src
@@ -1781,6 +1795,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 						      const struct pathspec *pathspec)
 {
 	struct untracked_cache_dir *root;
+	int i;
 
 	if (!dir->untracked)
 		return NULL;
@@ -1832,6 +1847,15 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	if (dir->exclude_list_group[EXC_CMDL].nr)
 		return NULL;
 
+	/*
+	 * An optimization in prep_exclude() does not play well with
+	 * CE_SKIP_WORKTREE. It's a rare case anyway, if a single
+	 * entry has that bit set, disable the whole untracked cache.
+	 */
+	for (i = 0; i < active_nr; i++)
+		if (ce_skip_worktree(active_cache[i]))
+			return NULL;
+
 	if (!dir->untracked->root) {
 		const int len = sizeof(*dir->untracked->root);
 		dir->untracked->root = xmalloc(len);
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 09/22] untracked cache: save to an index extension
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (7 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 08/22] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 10/22] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
                     ` (12 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      |  3 +++
 dir.c        | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |  1 +
 read-cache.c | 12 +++++++++
 4 files changed, 101 insertions(+)

diff --git a/cache.h b/cache.h
index dcf3a2a..b14d6e2 100644
--- a/cache.h
+++ b/cache.h
@@ -297,6 +297,8 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
 
 struct split_index;
+struct untracked_cache;
+
 struct index_state {
 	struct cache_entry **cache;
 	unsigned int version;
@@ -310,6 +312,7 @@ struct index_state {
 	struct hashmap name_hash;
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
+	struct untracked_cache *untracked;
 };
 
 extern struct index_state the_index;
diff --git a/dir.c b/dir.c
index 5936c84..4e15f20 100644
--- a/dir.c
+++ b/dir.c
@@ -12,6 +12,7 @@
 #include "refs.h"
 #include "wildmatch.h"
 #include "pathspec.h"
+#include "varint.h"
 
 struct path_simplify {
 	int len;
@@ -2138,3 +2139,87 @@ void clear_directory(struct dir_struct *dir)
 	}
 	strbuf_release(&dir->basebuf);
 }
+
+struct ondisk_untracked_cache {
+	struct stat_data info_exclude_stat;
+	struct stat_data excludes_file_stat;
+	uint32_t dir_flags;
+	unsigned char info_exclude_sha1[20];
+	unsigned char excludes_file_sha1[20];
+	char exclude_per_dir[1];
+};
+
+static void stat_data_to_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = htonl(from->sd_ctime.sec);
+	to->sd_ctime.nsec = htonl(from->sd_ctime.nsec);
+	to->sd_mtime.sec  = htonl(from->sd_mtime.sec);
+	to->sd_mtime.nsec = htonl(from->sd_mtime.nsec);
+	to->sd_dev	  = htonl(from->sd_dev);
+	to->sd_ino	  = htonl(from->sd_ino);
+	to->sd_uid	  = htonl(from->sd_uid);
+	to->sd_gid	  = htonl(from->sd_gid);
+	to->sd_size	  = htonl(from->sd_size);
+}
+
+static void write_one_dir(struct strbuf *out, struct untracked_cache_dir *untracked)
+{
+	struct stat_data stat_data;
+	unsigned char intbuf[16];
+	unsigned int intlen, value;
+	int i;
+
+	stat_data_to_disk(&stat_data, &untracked->stat_data);
+	strbuf_add(out, &stat_data, sizeof(stat_data));
+	strbuf_add(out, untracked->exclude_sha1, 20);
+
+	/*
+	 * untracked_nr should be reset whenever valid is clear, but
+	 * for safety..
+	 */
+	if (!untracked->valid) {
+		untracked->untracked_nr = 0;
+		untracked->check_only = 0;
+	}
+
+	value  = untracked->valid;
+	value |= untracked->check_only   << 1;
+	value |= untracked->untracked_nr << 2;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	/* skip non-recurse directories */
+	for (i = 0, value = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			value++;
+	intlen = encode_varint(value, intbuf);
+	strbuf_add(out, intbuf, intlen);
+
+	strbuf_add(out, untracked->name, strlen(untracked->name) + 1);
+
+	for (i = 0; i < untracked->untracked_nr; i++)
+		strbuf_add(out, untracked->untracked[i],
+			   strlen(untracked->untracked[i]) + 1);
+
+	for (i = 0; i < untracked->dirs_nr; i++)
+		if (untracked->dirs[i]->recurse)
+			write_one_dir(out, untracked->dirs[i]);
+}
+
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
+{
+	struct ondisk_untracked_cache *ouc;
+	int len = 0;
+	if (untracked->exclude_per_dir)
+		len = strlen(untracked->exclude_per_dir);
+	ouc = xmalloc(sizeof(*ouc) + len);
+	stat_data_to_disk(&ouc->info_exclude_stat, &untracked->ss_info_exclude.stat);
+	stat_data_to_disk(&ouc->excludes_file_stat, &untracked->ss_excludes_file.stat);
+	hashcpy(ouc->info_exclude_sha1, untracked->ss_info_exclude.sha1);
+	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
+	ouc->dir_flags = htonl(untracked->dir_flags);
+	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+	strbuf_add(out, ouc, sizeof(*ouc) + len);
+	if (untracked->root)
+		write_one_dir(out, untracked->root);
+}
diff --git a/dir.h b/dir.h
index 5c688da..c771057 100644
--- a/dir.h
+++ b/dir.h
@@ -298,4 +298,5 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 6f0057f..26e938d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -37,6 +37,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 #define CACHE_EXT_TREE 0x54524545	/* "TREE" */
 #define CACHE_EXT_RESOLVE_UNDO 0x52455543 /* "REUC" */
 #define CACHE_EXT_LINK 0x6c696e6b	  /* "link" */
+#define CACHE_EXT_UNTRACKED 0x554E5452	  /* "UNTR" */
 
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
@@ -2016,6 +2017,17 @@ static int do_write_index(struct index_state *istate, int newfd,
 		if (err)
 			return -1;
 	}
+	if (istate->untracked) {
+		struct strbuf sb = STRBUF_INIT;
+
+		write_untracked_extension(&sb, istate->untracked);
+		err = write_index_ext_header(&c, newfd, CACHE_EXT_UNTRACKED,
+					     sb.len) < 0 ||
+			ce_write(&c, newfd, sb.buf, sb.len) < 0;
+		strbuf_release(&sb);
+		if (err)
+			return -1;
+	}
 
 	if (ce_flush(&c, newfd, istate->sha1) || fstat(newfd, &st))
 		return -1;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 10/22] untracked cache: load from UNTR index extension
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (8 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 09/22] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 11/22] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
                     ` (11 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c        | 135 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 dir.h        |   2 +
 read-cache.c |   5 +++
 3 files changed, 142 insertions(+)

diff --git a/dir.c b/dir.c
index 4e15f20..bc196b3 100644
--- a/dir.c
+++ b/dir.c
@@ -2223,3 +2223,138 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	if (untracked->root)
 		write_one_dir(out, untracked->root);
 }
+
+static void free_untracked(struct untracked_cache_dir *ucd)
+{
+	int i;
+	if (!ucd)
+		return;
+	for (i = 0; i < ucd->dirs_nr; i++)
+		free_untracked(ucd->dirs[i]);
+	for (i = 0; i < ucd->untracked_nr; i++)
+		free(ucd->untracked[i]);
+	free(ucd->untracked);
+	free(ucd->dirs);
+	free(ucd);
+}
+
+void free_untracked_cache(struct untracked_cache *uc)
+{
+	if (uc)
+		free_untracked(uc->root);
+	free(uc);
+}
+
+static void stat_data_from_disk(struct stat_data *to, const struct stat_data *from)
+{
+	to->sd_ctime.sec  = get_be32(&from->sd_ctime.sec);
+	to->sd_ctime.nsec = get_be32(&from->sd_ctime.nsec);
+	to->sd_mtime.sec  = get_be32(&from->sd_mtime.sec);
+	to->sd_mtime.nsec = get_be32(&from->sd_mtime.nsec);
+	to->sd_dev	  = get_be32(&from->sd_dev);
+	to->sd_ino	  = get_be32(&from->sd_ino);
+	to->sd_uid	  = get_be32(&from->sd_uid);
+	to->sd_gid	  = get_be32(&from->sd_gid);
+	to->sd_size	  = get_be32(&from->sd_size);
+}
+
+static int read_one_dir(struct untracked_cache_dir **untracked_,
+			const unsigned char *data_, unsigned long sz)
+{
+#define NEXT(x) \
+	next = data + (x); \
+	if (next > data_ + sz) \
+		return -1;
+
+	struct untracked_cache_dir ud, *untracked;
+	const unsigned char *next, *data = data_;
+	unsigned int value;
+	int i, len;
+
+	memset(&ud, 0, sizeof(ud));
+
+	NEXT(sizeof(struct stat_data));
+	stat_data_from_disk(&ud.stat_data, (struct stat_data *)data);
+	data = next;
+
+	NEXT(20);
+	hashcpy(ud.exclude_sha1, data);
+	data = next;
+
+	next = data;
+	value = decode_varint(&next);
+	if (next > data_ + sz)
+		return -1;
+	ud.recurse = 1;
+	ud.valid = value & 1;
+	ud.check_only = (value >> 1) & 1;
+	ud.untracked_alloc = ud.untracked_nr = value >> 2;
+	if (ud.untracked_nr)
+		ud.untracked = xmalloc(sizeof(*ud.untracked) * ud.untracked_nr);
+	data = next;
+
+	next = data;
+	ud.dirs_alloc = ud.dirs_nr = decode_varint(&next);
+	if (next > data_ + sz)
+		return -1;
+	ud.dirs = xmalloc(sizeof(*ud.dirs) * ud.dirs_nr);
+	data = next;
+
+	len = strlen((const char *)data);
+	NEXT(len + 1);
+	*untracked_ = untracked = xmalloc(sizeof(*untracked) + len);
+	memcpy(untracked, &ud, sizeof(ud));
+	memcpy(untracked->name, data, len + 1);
+	data = next;
+
+	for (i = 0; i < untracked->untracked_nr; i++) {
+		len = strlen((const char *)data);
+		NEXT(len + 1);
+		untracked->untracked[i] = xstrdup((const char*)data);
+		data = next;
+	}
+
+	for (i = 0; i < untracked->dirs_nr; i++) {
+		len = read_one_dir(untracked->dirs + i, data, sz - (data - data_));
+		if (len < 0)
+			return -1;
+		data += len;
+	}
+	return data - data_;
+}
+
+static void load_sha1_stat(struct sha1_stat *sha1_stat,
+			   const struct stat_data *stat,
+			   const unsigned char *sha1)
+{
+	stat_data_from_disk(&sha1_stat->stat, stat);
+	hashcpy(sha1_stat->sha1, sha1);
+	sha1_stat->valid = 1;
+}
+
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
+{
+	const struct ondisk_untracked_cache *ouc = data;
+	struct untracked_cache *uc;
+	int len;
+
+	if (sz < sizeof(*ouc))
+		return NULL;
+
+	uc = xcalloc(1, sizeof(*uc));
+	load_sha1_stat(&uc->ss_info_exclude, &ouc->info_exclude_stat,
+		       ouc->info_exclude_sha1);
+	load_sha1_stat(&uc->ss_excludes_file, &ouc->excludes_file_stat,
+		       ouc->excludes_file_sha1);
+	uc->dir_flags = get_be32(&ouc->dir_flags);
+	uc->exclude_per_dir = xstrdup(ouc->exclude_per_dir);
+	len = sizeof(*ouc) + strlen(ouc->exclude_per_dir);
+	if (sz == len)
+		return uc;
+	if (sz > len &&
+	    read_one_dir(&uc->root, (const unsigned char *)data + len,
+			 sz - len) == sz - len)
+		return uc;
+	free_untracked_cache(uc);
+	return NULL;
+}
diff --git a/dir.h b/dir.h
index c771057..14ee4fd 100644
--- a/dir.h
+++ b/dir.h
@@ -298,5 +298,7 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void free_untracked_cache(struct untracked_cache *);
+struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
 #endif
diff --git a/read-cache.c b/read-cache.c
index 26e938d..f5bd174 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1366,6 +1366,9 @@ static int read_index_extension(struct index_state *istate,
 		if (read_link_extension(istate, data, sz))
 			return -1;
 		break;
+	case CACHE_EXT_UNTRACKED:
+		istate->untracked = read_untracked_extension(data, sz);
+		break;
 	default:
 		if (*ext < 'A' || 'Z' < *ext)
 			return error("index uses %.4s extension, which we do not understand",
@@ -1631,6 +1634,8 @@ int discard_index(struct index_state *istate)
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
 	discard_split_index(istate);
+	free_untracked_cache(istate->untracked);
+	istate->untracked = NULL;
 	return 0;
 }
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 11/22] untracked cache: invalidate at index addition or removal
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (9 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 10/22] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 12/22] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
                     ` (10 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Ideally we should implement untracked_cache_remove_from_index() and
untracked_cache_add_to_index() so that they update untracked cache
right away instead of invalidating it and wait for read_directory()
next time to deal with it. But that may need some more work in
unpack-trees.c. So stay simple as the first step.

The new call in add_index_entry_with_check() may look strange because
new calls usually stay close to cache_tree_invalidate_path(). We do it
a bit later than c_t_i_p() in this function because if it's about
replacing the entry with the same name, we don't care (but cache-tree
does).

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c          | 31 +++++++++++++++++++++++++++++++
 dir.h          |  4 ++++
 read-cache.c   |  4 ++++
 unpack-trees.c |  7 +++++--
 4 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/dir.c b/dir.c
index bc196b3..b3ba278 100644
--- a/dir.c
+++ b/dir.c
@@ -2358,3 +2358,34 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	free_untracked_cache(uc);
 	return NULL;
 }
+
+void untracked_cache_invalidate_path(struct index_state *istate,
+				     const char *path)
+{
+	const char *sep;
+	struct untracked_cache_dir *d;
+	if (!istate->untracked || !istate->untracked->root)
+		return;
+	sep = strrchr(path, '/');
+	if (sep)
+		d = lookup_untracked(istate->untracked,
+				     istate->untracked->root,
+				     path, sep - path);
+	else
+		d = istate->untracked->root;
+	istate->untracked->dir_invalidated++;
+	d->valid = 0;
+	d->untracked_nr = 0;
+}
+
+void untracked_cache_remove_from_index(struct index_state *istate,
+				       const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
+
+void untracked_cache_add_to_index(struct index_state *istate,
+				  const char *path)
+{
+	untracked_cache_invalidate_path(istate, path);
+}
diff --git a/dir.h b/dir.h
index 14ee4fd..708cdd5 100644
--- a/dir.h
+++ b/dir.h
@@ -298,6 +298,10 @@ static inline int dir_path_match(const struct dir_entry *ent,
 			      has_trailing_dir);
 }
 
+void untracked_cache_invalidate_path(struct index_state *, const char *);
+void untracked_cache_remove_from_index(struct index_state *, const char *);
+void untracked_cache_add_to_index(struct index_state *, const char *);
+
 void free_untracked_cache(struct untracked_cache *);
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz);
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked);
diff --git a/read-cache.c b/read-cache.c
index f5bd174..7870d4a 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -78,6 +78,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n
 	memcpy(new->name, new_name, namelen + 1);
 
 	cache_tree_invalidate_path(istate, old->name);
+	untracked_cache_remove_from_index(istate, old->name);
 	remove_index_entry_at(istate, nr);
 	add_index_entry(istate, new, ADD_CACHE_OK_TO_ADD|ADD_CACHE_OK_TO_REPLACE);
 }
@@ -537,6 +538,7 @@ int remove_file_from_index(struct index_state *istate, const char *path)
 	if (pos < 0)
 		pos = -pos-1;
 	cache_tree_invalidate_path(istate, path);
+	untracked_cache_remove_from_index(istate, path);
 	while (pos < istate->cache_nr && !strcmp(istate->cache[pos]->name, path))
 		remove_index_entry_at(istate, pos);
 	return 0;
@@ -968,6 +970,8 @@ static int add_index_entry_with_check(struct index_state *istate, struct cache_e
 	}
 	pos = -pos-1;
 
+	untracked_cache_add_to_index(istate, ce->name);
+
 	/*
 	 * Inserting a merged entry ("stage 0") into the index
 	 * will always replace all non-merged entries..
diff --git a/unpack-trees.c b/unpack-trees.c
index 629c658..e5ddb0c 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -9,6 +9,7 @@
 #include "refs.h"
 #include "attr.h"
 #include "split-index.h"
+#include "dir.h"
 
 /*
  * Error messages expected by scripts out of plumbing commands such as
@@ -1255,8 +1256,10 @@ static int verify_uptodate_sparse(const struct cache_entry *ce,
 static void invalidate_ce_path(const struct cache_entry *ce,
 			       struct unpack_trees_options *o)
 {
-	if (ce)
-		cache_tree_invalidate_path(o->src_index, ce->name);
+	if (!ce)
+		return;
+	cache_tree_invalidate_path(o->src_index, ce->name);
+	untracked_cache_invalidate_path(o->src_index, ce->name);
 }
 
 /*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 12/22] read-cache.c: split racy stat test to a separate function
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (10 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 11/22] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 13/22] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
                     ` (9 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 read-cache.c | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 7870d4a..794176f 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -270,20 +270,26 @@ static int ce_match_stat_basic(const struct cache_entry *ce, struct stat *st)
 	return changed;
 }
 
-static int is_racy_timestamp(const struct index_state *istate,
-			     const struct cache_entry *ce)
+static int is_racy_stat(const struct index_state *istate,
+			const struct stat_data *sd)
 {
-	return (!S_ISGITLINK(ce->ce_mode) &&
-		istate->timestamp.sec &&
+	return (istate->timestamp.sec &&
 #ifdef USE_NSEC
 		 /* nanosecond timestamped files can also be racy! */
-		(istate->timestamp.sec < ce->ce_stat_data.sd_mtime.sec ||
-		 (istate->timestamp.sec == ce->ce_stat_data.sd_mtime.sec &&
-		  istate->timestamp.nsec <= ce->ce_stat_data.sd_mtime.nsec))
+		(istate->timestamp.sec < sd->sd_mtime.sec ||
+		 (istate->timestamp.sec == sd->sd_mtime.sec &&
+		  istate->timestamp.nsec <= sd->sd_mtime.nsec))
 #else
-		istate->timestamp.sec <= ce->ce_stat_data.sd_mtime.sec
+		istate->timestamp.sec <= sd->sd_mtime.sec
 #endif
-		 );
+		);
+}
+
+static int is_racy_timestamp(const struct index_state *istate,
+			     const struct cache_entry *ce)
+{
+	return (!S_ISGITLINK(ce->ce_mode) &&
+		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
 int ie_match_stat(const struct index_state *istate,
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 13/22] untracked cache: avoid racy timestamps
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (11 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 12/22] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 14/22] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
                     ` (8 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

When a directory is updated within the same second that its timestamp
is last saved, we cannot realize the directory has been updated by
checking timestamps. Assume the worst (something is update). See
29e4d36 (Racy GIT - 2005-12-20) for more information.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      | 2 ++
 dir.c        | 4 ++--
 read-cache.c | 8 ++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index b14d6e2..f8b3dc5 100644
--- a/cache.h
+++ b/cache.h
@@ -561,6 +561,8 @@ extern void fill_stat_data(struct stat_data *sd, struct stat *st);
  * INODE_CHANGED, and DATA_CHANGED.
  */
 extern int match_stat_data(const struct stat_data *sd, struct stat *st);
+extern int match_stat_data_racy(const struct index_state *istate,
+				const struct stat_data *sd, struct stat *st);
 
 extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 
diff --git a/dir.c b/dir.c
index b3ba278..52daaf7 100644
--- a/dir.c
+++ b/dir.c
@@ -680,7 +680,7 @@ static int add_excludes(const char *fname, const char *base, int baselen,
 		if (sha1_stat) {
 			int pos;
 			if (sha1_stat->valid &&
-			    !match_stat_data(&sha1_stat->stat, &st))
+			    !match_stat_data_racy(&the_index, &sha1_stat->stat, &st))
 				; /* no content change, ss->sha1 still good */
 			else if (check_index &&
 				 (pos = cache_name_pos(fname, strlen(fname))) >= 0 &&
@@ -1536,7 +1536,7 @@ static int valid_cached_dir(struct dir_struct *dir,
 		return 0;
 	}
 	if (!untracked->valid ||
-	    match_stat_data(&untracked->stat_data, &st)) {
+	    match_stat_data_racy(&the_index, &untracked->stat_data, &st)) {
 		if (untracked->valid)
 			invalidate_directory(dir->untracked, untracked);
 		fill_stat_data(&untracked->stat_data, &st);
diff --git a/read-cache.c b/read-cache.c
index 794176f..103c294 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -292,6 +292,14 @@ static int is_racy_timestamp(const struct index_state *istate,
 		is_racy_stat(istate, &ce->ce_stat_data));
 }
 
+int match_stat_data_racy(const struct index_state *istate,
+			 const struct stat_data *sd, struct stat *st)
+{
+	if (is_racy_stat(istate, sd))
+		return MTIME_CHANGED;
+	return match_stat_data(sd, st);
+}
+
 int ie_match_stat(const struct index_state *istate,
 		  const struct cache_entry *ce, struct stat *st,
 		  unsigned int options)
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 14/22] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (12 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 13/22] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 15/22] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
                     ` (7 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This could be used to verify correct behavior in tests

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/dir.c b/dir.c
index 52daaf7..27bd40a 100644
--- a/dir.c
+++ b/dir.c
@@ -1920,6 +1920,18 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 	free_simplify(simplify);
 	qsort(dir->entries, dir->nr, sizeof(struct dir_entry *), cmp_name);
 	qsort(dir->ignored, dir->ignored_nr, sizeof(struct dir_entry *), cmp_name);
+	if (dir->untracked) {
+		static struct trace_key trace_untracked_stats = TRACE_KEY_INIT(UNTRACKED_STATS);
+		trace_printf_key(&trace_untracked_stats,
+				 "node creation: %u\n"
+				 "gitignore invalidation: %u\n"
+				 "directory invalidation: %u\n"
+				 "opendir: %u\n",
+				 dir->untracked->dir_created,
+				 dir->untracked->gitignore_invalidated,
+				 dir->untracked->dir_invalidated,
+				 dir->untracked->dir_opened);
+	}
 	return dir->nr;
 }
 
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 15/22] untracked cache: mark index dirty if untracked cache is updated
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (13 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 14/22] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 16/22] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
                     ` (6 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 cache.h      | 1 +
 dir.c        | 9 +++++++++
 read-cache.c | 2 +-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/cache.h b/cache.h
index f8b3dc5..201b22e 100644
--- a/cache.h
+++ b/cache.h
@@ -295,6 +295,7 @@ static inline unsigned int canon_mode(unsigned int mode)
 #define RESOLVE_UNDO_CHANGED	(1 << 4)
 #define CACHE_TREE_CHANGED	(1 << 5)
 #define SPLIT_INDEX_ORDERED	(1 << 6)
+#define UNTRACKED_CHANGED       (1 << 7)
 
 struct split_index;
 struct untracked_cache;
diff --git a/dir.c b/dir.c
index 27bd40a..b1f2be5 100644
--- a/dir.c
+++ b/dir.c
@@ -1931,6 +1931,15 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 				 dir->untracked->gitignore_invalidated,
 				 dir->untracked->dir_invalidated,
 				 dir->untracked->dir_opened);
+		if (dir->untracked == the_index.untracked &&
+		    (dir->untracked->dir_opened ||
+		     dir->untracked->gitignore_invalidated ||
+		     dir->untracked->dir_invalidated))
+			the_index.cache_changed |= UNTRACKED_CHANGED;
+		if (dir->untracked != the_index.untracked) {
+			free(dir->untracked);
+			dir->untracked = NULL;
+		}
 	}
 	return dir->nr;
 }
diff --git a/read-cache.c b/read-cache.c
index 103c294..21ae963 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -42,7 +42,7 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce,
 /* changes that can be kept in $GIT_DIR/index (basically all extensions) */
 #define EXTMASK (RESOLVE_UNDO_CHANGED | CACHE_TREE_CHANGED | \
 		 CE_ENTRY_ADDED | CE_ENTRY_REMOVED | CE_ENTRY_CHANGED | \
-		 SPLIT_INDEX_ORDERED)
+		 SPLIT_INDEX_ORDERED | UNTRACKED_CHANGED)
 
 struct index_state the_index;
 static const char *alternate_index_output;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 16/22] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (14 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 15/22] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 17/22] status: enable untracked cache Nguyễn Thái Ngọc Duy
                     ` (5 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

This can be used to double check if results with untracked cache are
correctly, compared to vanilla version. Untracked cache remains in
index, but not used.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/dir.c b/dir.c
index b1f2be5..a99bd63 100644
--- a/dir.c
+++ b/dir.c
@@ -1798,7 +1798,7 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
 	struct untracked_cache_dir *root;
 	int i;
 
-	if (!dir->untracked)
+	if (!dir->untracked || getenv("GIT_DISABLE_UNTRACKED_CACHE"))
 		return NULL;
 
 	/*
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 17/22] status: enable untracked cache
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (15 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 16/22] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 18/22] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
                     ` (4 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

update_index_if_able() is moved down so that the updated untracked
cache could be written out.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/commit.c | 5 +++--
 wt-status.c      | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 5ed6036..bdcfa61 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1372,13 +1372,14 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	refresh_index(&the_index, REFRESH_QUIET|REFRESH_UNMERGED, &s.pathspec, NULL, NULL);
 
 	fd = hold_locked_index(&index_lock, 0);
-	if (0 <= fd)
-		update_index_if_able(&the_index, &index_lock);
 
 	s.is_initial = get_sha1(s.reference, sha1) ? 1 : 0;
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	wt_status_collect(&s);
 
+	if (0 <= fd)
+		update_index_if_able(&the_index, &index_lock);
+
 	if (s.relative_paths)
 		s.prefix = prefix;
 
diff --git a/wt-status.c b/wt-status.c
index 27da529..8880c3b 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -585,6 +585,8 @@ static void wt_status_collect_untracked(struct wt_status *s)
 			DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
 	if (s->show_ignored_files)
 		dir.flags |= DIR_SHOW_IGNORED_TOO;
+	else
+		dir.untracked = the_index.untracked;
 	setup_standard_excludes(&dir);
 
 	fill_directory(&dir, &s->pathspec);
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 18/22] update-index: manually enable or disable untracked cache
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (16 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 17/22] status: enable untracked cache Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 19/22] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
                     ` (3 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Overall time saving on "git status" is about 40% in the best case
scenario, removing ..collect_untracked() as the most time consuming
function. read and refresh index operations are now at the top (which
should drop when index-helper and/or watchman support is added). More
numbers and analysis below.

webkit.git
==========

169k files. 6k dirs. Lots of test data (i.e. not touched most of the
time)

Base status
-----------

Index version 4 in split index mode and cache-tree populated. No
untracked cache. It shows how time is consumed by "git status". The
same settings are used for other repos below.

18:28:10.199679 builtin/commit.c:1394   performance: 0.000000451 s: cmd_status:setup
18:28:10.474847 read-cache.c:1407       performance: 0.274873831 s: read_index
18:28:10.475295 read-cache.c:1407       performance: 0.000000656 s: read_index
18:28:10.728443 preload-index.c:131     performance: 0.253147487 s: read_index_preload
18:28:10.741422 read-cache.c:1254       performance: 0.012868340 s: refresh_index
18:28:10.752300 wt-status.c:623         performance: 0.010421357 s: wt_status_collect_changes_worktree
18:28:10.762069 wt-status.c:629         performance: 0.009644748 s: wt_status_collect_changes_index
18:28:11.601019 wt-status.c:632         performance: 0.838859547 s: wt_status_collect_untracked
18:28:11.605939 builtin/commit.c:1421   performance: 0.004835004 s: cmd_status:update_index
18:28:11.606580 trace.c:415             performance: 1.407878388 s: git command: 'git' 'status'

Populating status
-----------------

This is after enabling untracked cache and the cache is still empty.
We see a slight increase in .._collect_untracked() and update_index
(because new cache has to be written to $GIT_DIR/index).

18:28:18.915213 builtin/commit.c:1394   performance: 0.000000326 s: cmd_status:setup
18:28:19.197364 read-cache.c:1407       performance: 0.281901416 s: read_index
18:28:19.197754 read-cache.c:1407       performance: 0.000000546 s: read_index
18:28:19.451355 preload-index.c:131     performance: 0.253599607 s: read_index_preload
18:28:19.464400 read-cache.c:1254       performance: 0.012935336 s: refresh_index
18:28:19.475115 wt-status.c:623         performance: 0.010236920 s: wt_status_collect_changes_worktree
18:28:19.486022 wt-status.c:629         performance: 0.010801685 s: wt_status_collect_changes_index
18:28:20.362660 wt-status.c:632         performance: 0.876551366 s: wt_status_collect_untracked
18:28:20.396199 builtin/commit.c:1421   performance: 0.033447969 s: cmd_status:update_index
18:28:20.396939 trace.c:415             performance: 1.482695902 s: git command: 'git' 'status'

Populated status
----------------

After the cache is populated, wt_status_collect_untracked() drops 82%
from 0.838s to 0.144s. Overall time drops 45%. Top offenders are now
read_index() and read_index_preload().

18:28:20.408605 builtin/commit.c:1394   performance: 0.000000457 s: cmd_status:setup
18:28:20.692864 read-cache.c:1407       performance: 0.283980458 s: read_index
18:28:20.693273 read-cache.c:1407       performance: 0.000000661 s: read_index
18:28:20.958814 preload-index.c:131     performance: 0.265540254 s: read_index_preload
18:28:20.972375 read-cache.c:1254       performance: 0.013437429 s: refresh_index
18:28:20.983959 wt-status.c:623         performance: 0.011146646 s: wt_status_collect_changes_worktree
18:28:20.993948 wt-status.c:629         performance: 0.009879094 s: wt_status_collect_changes_index
18:28:21.138125 wt-status.c:632         performance: 0.144084737 s: wt_status_collect_untracked
18:28:21.173678 builtin/commit.c:1421   performance: 0.035463949 s: cmd_status:update_index
18:28:21.174251 trace.c:415             performance: 0.766707355 s: git command: 'git' 'status'

gentoo-x86.git
==============

This repository is a strange one with a balanced, wide and shallow
worktree (about 100k files and 23k dirs) and no .gitignore in
worktree. .._collect_untracked() time drops 88%, total time drops 56%.

Base status
-----------
18:20:40.828642 builtin/commit.c:1394   performance: 0.000000496 s: cmd_status:setup
18:20:41.027233 read-cache.c:1407       performance: 0.198130532 s: read_index
18:20:41.027670 read-cache.c:1407       performance: 0.000000581 s: read_index
18:20:41.171716 preload-index.c:131     performance: 0.144045594 s: read_index_preload
18:20:41.179171 read-cache.c:1254       performance: 0.007320424 s: refresh_index
18:20:41.185785 wt-status.c:623         performance: 0.006144638 s: wt_status_collect_changes_worktree
18:20:41.192701 wt-status.c:629         performance: 0.006780184 s: wt_status_collect_changes_index
18:20:41.991723 wt-status.c:632         performance: 0.798927029 s: wt_status_collect_untracked
18:20:41.994664 builtin/commit.c:1421   performance: 0.002852772 s: cmd_status:update_index
18:20:41.995458 trace.c:415             performance: 1.168427502 s: git command: 'git' 'status'
Populating status
-----------------
18:20:48.968848 builtin/commit.c:1394   performance: 0.000000380 s: cmd_status:setup
18:20:49.172918 read-cache.c:1407       performance: 0.203734214 s: read_index
18:20:49.173341 read-cache.c:1407       performance: 0.000000562 s: read_index
18:20:49.320013 preload-index.c:131     performance: 0.146671391 s: read_index_preload
18:20:49.328039 read-cache.c:1254       performance: 0.007921957 s: refresh_index
18:20:49.334680 wt-status.c:623         performance: 0.006172020 s: wt_status_collect_changes_worktree
18:20:49.342526 wt-status.c:629         performance: 0.007731746 s: wt_status_collect_changes_index
18:20:50.257510 wt-status.c:632         performance: 0.914864222 s: wt_status_collect_untracked
18:20:50.338371 builtin/commit.c:1421   performance: 0.080776477 s: cmd_status:update_index
18:20:50.338900 trace.c:415             performance: 1.371462446 s: git command: 'git' 'status'
Populated status
----------------
18:20:50.351160 builtin/commit.c:1394   performance: 0.000000571 s: cmd_status:setup
18:20:50.577358 read-cache.c:1407       performance: 0.225917338 s: read_index
18:20:50.577794 read-cache.c:1407       performance: 0.000000617 s: read_index
18:20:50.734140 preload-index.c:131     performance: 0.156345564 s: read_index_preload
18:20:50.745717 read-cache.c:1254       performance: 0.011463075 s: refresh_index
18:20:50.755176 wt-status.c:623         performance: 0.008877929 s: wt_status_collect_changes_worktree
18:20:50.763768 wt-status.c:629         performance: 0.008471633 s: wt_status_collect_changes_index
18:20:50.854885 wt-status.c:632         performance: 0.090988721 s: wt_status_collect_untracked
18:20:50.857765 builtin/commit.c:1421   performance: 0.002789097 s: cmd_status:update_index
18:20:50.858411 trace.c:415             performance: 0.508647673 s: git command: 'git' 'status'

linux-2.6
=========

Reference repo. Not too big. .._collect_status() drops 84%. Total time
drops 42%.

Base status
-----------
18:34:09.870122 builtin/commit.c:1394   performance: 0.000000385 s: cmd_status:setup
18:34:09.943218 read-cache.c:1407       performance: 0.072871177 s: read_index
18:34:09.943614 read-cache.c:1407       performance: 0.000000491 s: read_index
18:34:10.004364 preload-index.c:131     performance: 0.060748102 s: read_index_preload
18:34:10.008190 read-cache.c:1254       performance: 0.003714285 s: refresh_index
18:34:10.012087 wt-status.c:623         performance: 0.002775446 s: wt_status_collect_changes_worktree
18:34:10.016054 wt-status.c:629         performance: 0.003862140 s: wt_status_collect_changes_index
18:34:10.214747 wt-status.c:632         performance: 0.198604837 s: wt_status_collect_untracked
18:34:10.216102 builtin/commit.c:1421   performance: 0.001244166 s: cmd_status:update_index
18:34:10.216817 trace.c:415             performance: 0.347670735 s: git command: 'git' 'status'
Populating status
-----------------
18:34:16.595102 builtin/commit.c:1394   performance: 0.000000456 s: cmd_status:setup
18:34:16.666600 read-cache.c:1407       performance: 0.070992413 s: read_index
18:34:16.667012 read-cache.c:1407       performance: 0.000000606 s: read_index
18:34:16.729375 preload-index.c:131     performance: 0.062362492 s: read_index_preload
18:34:16.732565 read-cache.c:1254       performance: 0.003075517 s: refresh_index
18:34:16.736148 wt-status.c:623         performance: 0.002422201 s: wt_status_collect_changes_worktree
18:34:16.739990 wt-status.c:629         performance: 0.003746618 s: wt_status_collect_changes_index
18:34:16.948505 wt-status.c:632         performance: 0.208426710 s: wt_status_collect_untracked
18:34:16.961744 builtin/commit.c:1421   performance: 0.013151887 s: cmd_status:update_index
18:34:16.962233 trace.c:415             performance: 0.368537535 s: git command: 'git' 'status'
Populated status
----------------
18:34:16.970026 builtin/commit.c:1394   performance: 0.000000631 s: cmd_status:setup
18:34:17.046235 read-cache.c:1407       performance: 0.075904673 s: read_index
18:34:17.046644 read-cache.c:1407       performance: 0.000000681 s: read_index
18:34:17.113564 preload-index.c:131     performance: 0.066920253 s: read_index_preload
18:34:17.117281 read-cache.c:1254       performance: 0.003604055 s: refresh_index
18:34:17.121115 wt-status.c:623         performance: 0.002508345 s: wt_status_collect_changes_worktree
18:34:17.125089 wt-status.c:629         performance: 0.003871636 s: wt_status_collect_changes_index
18:34:17.156089 wt-status.c:632         performance: 0.030895703 s: wt_status_collect_untracked
18:34:17.169861 builtin/commit.c:1421   performance: 0.013686404 s: cmd_status:update_index
18:34:17.170391 trace.c:415             performance: 0.201474531 s: git command: 'git' 'status'

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt |  8 ++++++++
 builtin/update-index.c             | 16 ++++++++++++++++
 2 files changed, 24 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index dfc09d9..f9a35cd 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -172,6 +172,14 @@ may not support it yet.
 	the shared index file. This mode is designed for very large
 	indexes that take a signficant amount of time to read or write.
 
+--untracked-cache::
+--no-untracked-cache::
+	Enable or disable untracked cache extension. This could speed
+	up for commands that involve determining untracked files such
+	as `git status`. The underlying operating system and file
+	system must change `st_mtime` field of a directory if files
+	are added or deleted in that directory.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index e8c7fd4..3d2dedd 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -740,6 +740,7 @@ static int reupdate_callback(struct parse_opt_ctx_t *ctx,
 int cmd_update_index(int argc, const char **argv, const char *prefix)
 {
 	int newfd, entries, has_errors = 0, line_termination = '\n';
+	int untracked_cache = -1;
 	int read_from_stdin = 0;
 	int prefix_length = prefix ? strlen(prefix) : 0;
 	int preferred_index_format = 0;
@@ -831,6 +832,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("write index in this format")),
 		OPT_BOOL(0, "split-index", &split_index,
 			N_("enable or disable split index")),
+		OPT_BOOL(0, "untracked-cache", &untracked_cache,
+			N_("enable/disable untracked cache")),
 		OPT_END()
 	};
 
@@ -937,6 +940,19 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 		the_index.split_index = NULL;
 		the_index.cache_changed |= SOMETHING_CHANGED;
 	}
+	if (untracked_cache > 0 && !the_index.untracked) {
+		struct untracked_cache *uc;
+
+		uc = xcalloc(1, sizeof(*uc));
+		uc->exclude_per_dir = ".gitignore";
+		/* should be the same flags used by git-status */
+		uc->dir_flags = DIR_SHOW_OTHER_DIRECTORIES | DIR_HIDE_EMPTY_DIRECTORIES;
+		the_index.untracked = uc;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
+	} else if (!untracked_cache && the_index.untracked) {
+		the_index.untracked = NULL;
+		the_index.cache_changed |= UNTRACKED_CHANGED;
+	}
 
 	if (active_cache_changed) {
 		if (newfd < 0) {
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 19/22] update-index: test the system before enabling untracked cache
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (17 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 18/22] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 20/22] t7063: tests for " Nguyễn Thái Ngọc Duy
                     ` (2 subsequent siblings)
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Helped-by: Eric Sunshine <sunshine@sunshineco.com>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/git-update-index.txt |   6 ++
 builtin/update-index.c             | 148 +++++++++++++++++++++++++++++++++++++
 2 files changed, 154 insertions(+)

diff --git a/Documentation/git-update-index.txt b/Documentation/git-update-index.txt
index f9a35cd..ed32bae 100644
--- a/Documentation/git-update-index.txt
+++ b/Documentation/git-update-index.txt
@@ -180,6 +180,12 @@ may not support it yet.
 	system must change `st_mtime` field of a directory if files
 	are added or deleted in that directory.
 
+--force-untracked-cache::
+	For safety, `--untracked-cache` performs tests on the working
+	directory to make sure untracked cache can be used. These
+	tests can take a few seconds. `--force-untracked-cache` can be
+	used to skip the tests.
+
 \--::
 	Do not interpret any more arguments as options.
 
diff --git a/builtin/update-index.c b/builtin/update-index.c
index 3d2dedd..f23ec83 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -47,6 +47,147 @@ static void report(const char *fmt, ...)
 	va_end(vp);
 }
 
+static void remove_test_directory(void)
+{
+	struct strbuf sb = STRBUF_INIT;
+	strbuf_addstr(&sb, "dir-mtime-test");
+	remove_dir_recursively(&sb, 0);
+	strbuf_release(&sb);
+}
+
+static void xmkdir(const char *path)
+{
+	if (mkdir(path, 0700))
+		die_errno(_("failed to create directory %s"), path);
+}
+
+static int xstat(const char *path, struct stat *st)
+{
+	if (stat(path, st))
+		die_errno(_("failed to stat %s"), path);
+	return 0;
+}
+
+static int create_file(const char *path)
+{
+	int fd = open(path, O_CREAT | O_RDWR, 0644);
+	if (fd < 0)
+		die_errno(_("failed to create file %s"), path);
+	return fd;
+}
+
+static void xunlink(const char *path)
+{
+	if (unlink(path))
+		die_errno(_("failed to delete file %s"), path);
+}
+
+static void xrmdir(const char *path)
+{
+	if (rmdir(path))
+		die_errno(_("failed to delete directory %s"), path);
+}
+
+static void avoid_racy(void)
+{
+	/*
+	 * not use if we could usleep(10) if USE_NSEC is defined. The
+	 * field nsec could be there, but the OS could choose to
+	 * ignore it?
+	 */
+	sleep(1);
+}
+
+static int test_if_untracked_cache_is_supported(void)
+{
+	struct stat st;
+	struct stat_data base;
+	int fd;
+
+	fprintf(stderr, _("Testing "));
+	xmkdir("dir-mtime-test");
+	atexit(remove_test_directory);
+	xstat("dir-mtime-test", &st);
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	fd = create_file("dir-mtime-test/newfile");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		close(fd);
+		fputc('\n', stderr);
+		fprintf_ln(stderr,_("directory stat info does not "
+				    "change after adding a new file"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xmkdir("dir-mtime-test/new-dir");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		close(fd);
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not change "
+				     "after adding a new directory"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	write_or_die(fd, "data", 4);
+	close(fd);
+	xstat("dir-mtime-test", &st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes "
+				     "after updating a file"));
+		return 0;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	close(create_file("dir-mtime-test/new-dir/new"));
+	xstat("dir-mtime-test", &st);
+	if (match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info changes after "
+				     "adding a file inside subdirectory"));
+		return 0;
+	}
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("dir-mtime-test/newfile");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a file"));
+		return 0;
+	}
+	fill_stat_data(&base, &st);
+	fputc('.', stderr);
+
+	avoid_racy();
+	xunlink("dir-mtime-test/new-dir/new");
+	xrmdir("dir-mtime-test/new-dir");
+	xstat("dir-mtime-test", &st);
+	if (!match_stat_data(&base, &st)) {
+		fputc('\n', stderr);
+		fprintf_ln(stderr, _("directory stat info does not "
+				     "change after deleting a directory"));
+		return 0;
+	}
+
+	xrmdir("dir-mtime-test");
+	fprintf_ln(stderr, _(" OK"));
+	return 1;
+}
+
 static int mark_ce_flags(const char *path, int flag, int mark)
 {
 	int namelen = strlen(path);
@@ -834,6 +975,8 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 			N_("enable or disable split index")),
 		OPT_BOOL(0, "untracked-cache", &untracked_cache,
 			N_("enable/disable untracked cache")),
+		OPT_SET_INT(0, "force-untracked-cache", &untracked_cache,
+			    N_("enable untracked cache without testing the filesystem"), 2),
 		OPT_END()
 	};
 
@@ -943,6 +1086,11 @@ int cmd_update_index(int argc, const char **argv, const char *prefix)
 	if (untracked_cache > 0 && !the_index.untracked) {
 		struct untracked_cache *uc;
 
+		if (untracked_cache < 2) {
+			setup_work_tree();
+			if (!test_if_untracked_cache_is_supported())
+				return 1;
+		}
 		uc = xcalloc(1, sizeof(*uc));
 		uc->exclude_per_dir = ".gitignore";
 		/* should be the same flags used by git-status */
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 20/22] t7063: tests for untracked cache
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (18 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 19/22] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 21/22] mingw32: add uname() Nguyễn Thái Ngọc Duy
  2014-11-08  9:39   ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
  21 siblings, 0 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 .gitignore                                 |   1 +
 Makefile                                   |   1 +
 t/t7063-status-untracked-cache.sh (new +x) | 353 +++++++++++++++++++++++++++++
 test-dump-untracked-cache.c (new)          |  61 +++++
 4 files changed, 416 insertions(+)
 create mode 100755 t/t7063-status-untracked-cache.sh
 create mode 100644 test-dump-untracked-cache.c

diff --git a/.gitignore b/.gitignore
index 81e12c0..e2bb375 100644
--- a/.gitignore
+++ b/.gitignore
@@ -182,6 +182,7 @@
 /test-delta
 /test-dump-cache-tree
 /test-dump-split-index
+/test-dump-untracked-cache
 /test-scrap-cache-tree
 /test-genrandom
 /test-hashmap
diff --git a/Makefile b/Makefile
index 9f984a9..fa58a53 100644
--- a/Makefile
+++ b/Makefile
@@ -555,6 +555,7 @@ TEST_PROGRAMS_NEED_X += test-date
 TEST_PROGRAMS_NEED_X += test-delta
 TEST_PROGRAMS_NEED_X += test-dump-cache-tree
 TEST_PROGRAMS_NEED_X += test-dump-split-index
+TEST_PROGRAMS_NEED_X += test-dump-untracked-cache
 TEST_PROGRAMS_NEED_X += test-genrandom
 TEST_PROGRAMS_NEED_X += test-hashmap
 TEST_PROGRAMS_NEED_X += test-index-version
diff --git a/t/t7063-status-untracked-cache.sh b/t/t7063-status-untracked-cache.sh
new file mode 100755
index 0000000..2b2ffd7
--- /dev/null
+++ b/t/t7063-status-untracked-cache.sh
@@ -0,0 +1,353 @@
+#!/bin/sh
+
+test_description='test untracked cache'
+
+. ./test-lib.sh
+
+avoid_racy() {
+	sleep 1
+}
+
+git update-index --untracked-cache
+# It's fine if git update-index returns an error code other than one,
+# it'll be caught in the first test.
+if test $? -eq 1; then
+	skip_all='This system does not support untracked cache'
+	test_done
+fi
+
+test_expect_success 'setup' '
+	git init worktree &&
+	cd worktree &&
+	mkdir done dtwo dthree &&
+	touch one two three done/one dtwo/two dthree/three &&
+	git add one two done/one &&
+	: >.git/info/exclude &&
+	git update-index --untracked-cache
+'
+
+test_expect_success 'untracked cache is empty' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 0000000000000000000000000000000000000000
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+EOF
+	test_cmp ../expect ../actual
+'
+
+cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? three
+EOF
+
+cat >../dump.expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+
+test_expect_success 'status first time (empty cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 3
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after first status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'status second time (fully populated cache)' '
+	avoid_racy &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 0
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'untracked cache after second status' '
+	test-dump-untracked-cache >../actual &&
+	test_cmp ../dump.expect ../actual
+'
+
+test_expect_success 'modify in root directory, one dir invalidation' '
+	avoid_racy &&
+	: >four &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? dthree/
+?? dtwo/
+?? four
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 1
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ 0000000000000000000000000000000000000000 recurse valid
+dthree/
+dtwo/
+four
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new .gitignore invalidates recursively' '
+	avoid_racy &&
+	echo four >.gitignore &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dthree/
+?? dtwo/
+?? three
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 1
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dthree/
+dtwo/
+three
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+three
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'new info/exclude invalidates everything' '
+	avoid_racy &&
+	echo three >>.git/info/exclude &&
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 1
+directory invalidation: 0
+opendir: 4
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from tracked to untracked' '
+	git rm --cached two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+?? .gitignore
+?? dtwo/
+?? two
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+two
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'move two from untracked to tracked' '
+	git add two &&
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_expect_success 'status after the move' '
+	: >../trace &&
+	GIT_TRACE_UNTRACKED_STATS="$TRASH_DIRECTORY/trace" \
+	git status --porcelain >../actual &&
+	cat >../status.expect <<EOF &&
+A  done/one
+A  one
+A  two
+?? .gitignore
+?? dtwo/
+EOF
+	test_cmp ../status.expect ../actual &&
+	cat >../trace.expect <<EOF &&
+node creation: 0
+gitignore invalidation: 0
+directory invalidation: 0
+opendir: 1
+EOF
+	test_cmp ../trace.expect ../trace
+'
+
+test_expect_success 'verify untracked cache dump' '
+	test-dump-untracked-cache >../actual &&
+	cat >../expect <<EOF &&
+info/exclude 13263c0978fb9fad16b2d580fb800b6d811c3ff0
+core.excludesfile 0000000000000000000000000000000000000000
+exclude_per_dir .gitignore
+flags 00000006
+/ e6fcc8f2ee31bae321d66afd183fcb7237afae6e recurse valid
+.gitignore
+dtwo/
+/done/ 0000000000000000000000000000000000000000 recurse valid
+/dthree/ 0000000000000000000000000000000000000000 recurse check_only valid
+/dtwo/ 0000000000000000000000000000000000000000 recurse check_only valid
+two
+EOF
+	test_cmp ../expect ../actual
+'
+
+test_done
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
new file mode 100644
index 0000000..710441e
--- /dev/null
+++ b/test-dump-untracked-cache.c
@@ -0,0 +1,61 @@
+#include "cache.h"
+#include "dir.h"
+
+static int compare_untracked(const void *a_, const void *b_)
+{
+	const char *const *a = a_;
+	const char *const *b = b_;
+	return strcmp(*a, *b);
+}
+
+static int compare_dir(const void *a_, const void *b_)
+{
+	const struct untracked_cache_dir *const *a = a_;
+	const struct untracked_cache_dir *const *b = b_;
+	return strcmp((*a)->name, (*b)->name);
+}
+
+static void dump(struct untracked_cache_dir *ucd, struct strbuf *base)
+{
+	int i, len;
+	qsort(ucd->untracked, ucd->untracked_nr, sizeof(*ucd->untracked),
+	      compare_untracked);
+	qsort(ucd->dirs, ucd->dirs_nr, sizeof(*ucd->dirs),
+	      compare_dir);
+	len = base->len;
+	strbuf_addf(base, "%s/", ucd->name);
+	printf("%s %s", base->buf,
+	       sha1_to_hex(ucd->exclude_sha1));
+	if (ucd->recurse)
+		fputs(" recurse", stdout);
+	if (ucd->check_only)
+		fputs(" check_only", stdout);
+	if (ucd->valid)
+		fputs(" valid", stdout);
+	printf("\n");
+	for (i = 0; i < ucd->untracked_nr; i++)
+		printf("%s\n", ucd->untracked[i]);
+	for (i = 0; i < ucd->dirs_nr; i++)
+		dump(ucd->dirs[i], base);
+	strbuf_setlen(base, len);
+}
+
+int main(int ac, char **av)
+{
+	struct untracked_cache *uc;
+	struct strbuf base = STRBUF_INIT;
+	if (read_cache() < 0)
+		die("unable to read index file");
+	uc = the_index.untracked;
+	if (!uc) {
+		printf("no untracked cache\n");
+		return 0;
+	}
+	printf("info/exclude %s\n", sha1_to_hex(uc->ss_info_exclude.sha1));
+	printf("core.excludesfile %s\n", sha1_to_hex(uc->ss_excludes_file.sha1));
+	printf("exclude_per_dir %s\n", uc->exclude_per_dir);
+	printf("flags %08x\n", uc->dir_flags);
+	if (uc->root)
+		dump(uc->root, &base);
+	return 0;
+}
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 21/22] mingw32: add uname()
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (19 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 20/22] t7063: tests for " Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-09  3:32     ` Eric Sunshine
  2014-11-08  9:39   ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
  21 siblings, 1 reply; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 compat/mingw.c | 11 +++++++++++
 compat/mingw.h |  9 +++++++++
 2 files changed, 20 insertions(+)

diff --git a/compat/mingw.c b/compat/mingw.c
index c5c37e5..b817678 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -2128,3 +2128,14 @@ void mingw_startup()
 	/* initialize Unicode console */
 	winansi_init();
 }
+
+int uname(struct utsname *buf)
+{
+	DWORD v = GetVersion();
+	memset(buf, 0, sizeof(*buf));
+	sprintf(buf->sysname, "Windows");
+	sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff);
+	/* assuming NT variants only.. */
+	sprintf(buf->version, "%u", (v >> 16) & 0x7fff);
+	return 0;
+}
diff --git a/compat/mingw.h b/compat/mingw.h
index df0e320..d00ba7a 100644
--- a/compat/mingw.h
+++ b/compat/mingw.h
@@ -77,6 +77,14 @@ struct itimerval {
 };
 #define ITIMER_REAL 0
 
+struct utsname {
+	char sysname[16];
+	char nodename[1];
+	char release[16];
+	char version[16];
+	char machine[1];
+};
+
 /*
  * sanitize preprocessor namespace polluted by Windows headers defining
  * macros which collide with git local versions
@@ -166,6 +174,7 @@ struct passwd *getpwuid(uid_t uid);
 int setitimer(int type, struct itimerval *in, struct itimerval *out);
 int sigaction(int sig, struct sigaction *in, struct sigaction *out);
 int link(const char *oldpath, const char *newpath);
+int uname(struct utsname *buf);
 
 /*
  * replacements of existing functions
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
                     ` (20 preceding siblings ...)
  2014-11-08  9:39   ` [PATCH v2 21/22] mingw32: add uname() Nguyễn Thái Ngọc Duy
@ 2014-11-08  9:39   ` Nguyễn Thái Ngọc Duy
  2014-11-09  3:39     ` Eric Sunshine
  2014-11-09 21:39     ` Torsten Bögershausen
  21 siblings, 2 replies; 65+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2014-11-08  9:39 UTC (permalink / raw)
  To: git; +Cc: Nguyễn Thái Ngọc Duy

If the user enables untracked cache, then

 - move worktree to an unsupported filesystem
 - or simply upgrade OS
 - or move the whole (portable) disk from one machine to another
 - or access a shared fs from another machine

there's no guarantee that untracked cache can still function properly.
Record the worktree location and OS footprint in the cache. If it
changes, err on the safe side and disable the cache. The user can
'update-index --untracked-cache' again to make sure all conditions are
met.

This change does not cover all bases, you can fool it if you try
hard. The point is to stop accidents.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c                       | 44 +++++++++++++++++++++++++++++++++++++++++---
 git-compat-util.h           |  1 +
 test-dump-untracked-cache.c |  1 +
 3 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index a99bd63..2324c52 100644
--- a/dir.c
+++ b/dir.c
@@ -2227,10 +2227,22 @@ static void write_one_dir(struct strbuf *out, struct untracked_cache_dir *untrac
 			write_one_dir(out, untracked->dirs[i]);
 }
 
+static void get_ident_string(struct strbuf *sb)
+{
+	struct utsname uts;
+
+	if (uname(&uts))
+		die_errno(_("failed to get kernel name and information"));
+	strbuf_addf(sb, "Location %s, system %s %s %s", get_git_work_tree(),
+		    uts.sysname, uts.release, uts.version);
+}
+
 void write_untracked_extension(struct strbuf *out, struct untracked_cache *untracked)
 {
 	struct ondisk_untracked_cache *ouc;
-	int len = 0;
+	struct strbuf sb = STRBUF_INIT;
+	unsigned char varbuf[16];
+	int len = 0, varint_len;
 	if (untracked->exclude_per_dir)
 		len = strlen(untracked->exclude_per_dir);
 	ouc = xmalloc(sizeof(*ouc) + len);
@@ -2240,6 +2252,13 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra
 	hashcpy(ouc->excludes_file_sha1, untracked->ss_excludes_file.sha1);
 	ouc->dir_flags = htonl(untracked->dir_flags);
 	memcpy(ouc->exclude_per_dir, untracked->exclude_per_dir, len + 1);
+
+	get_ident_string(&sb);
+	varint_len = encode_varint(sb.len + 1, varbuf);
+	strbuf_add(out, varbuf, varint_len);
+	strbuf_add(out, sb.buf, sb.len + 1);
+	strbuf_release(&sb);
+
 	strbuf_add(out, ouc, sizeof(*ouc) + len);
 	if (untracked->root)
 		write_one_dir(out, untracked->root);
@@ -2355,10 +2374,29 @@ static void load_sha1_stat(struct sha1_stat *sha1_stat,
 
 struct untracked_cache *read_untracked_extension(const void *data, unsigned long sz)
 {
-	const struct ondisk_untracked_cache *ouc = data;
+	const struct ondisk_untracked_cache *ouc;
 	struct untracked_cache *uc;
+	const unsigned char *next = data;
+	struct strbuf sb = STRBUF_INIT;
 	int len;
 
+	len = decode_varint(&next);
+	if (sz <= (next - (const unsigned char *)data) + len ||
+	    next[len - 1] != '\0')
+		return NULL;
+
+	get_ident_string(&sb);
+	if (strcmp(sb.buf, (const char *)next)) {
+		warning(_("system identification does not match, untracked cache disabled.\n"
+			  "Stored: %s\nCurrent: %s\n"),
+			next, sb.buf);
+		strbuf_release(&sb);
+		return NULL;
+	}
+	strbuf_release(&sb);
+	ouc = (const struct ondisk_untracked_cache *)(next + len);
+	sz -= (const char *)ouc - (const char *)data;
+
 	if (sz < sizeof(*ouc))
 		return NULL;
 
@@ -2373,7 +2411,7 @@ struct untracked_cache *read_untracked_extension(const void *data, unsigned long
 	if (sz == len)
 		return uc;
 	if (sz > len &&
-	    read_one_dir(&uc->root, (const unsigned char *)data + len,
+	    read_one_dir(&uc->root, (const unsigned char *)ouc + len,
 			 sz - len) == sz - len)
 		return uc;
 	free_untracked_cache(uc);
diff --git a/git-compat-util.h b/git-compat-util.h
index f587749..6b1f259 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -125,6 +125,7 @@
 #else
 #include <poll.h>
 #endif
+#include <sys/utsname.h>
 
 #if defined(__MINGW32__)
 /* pull in Windows compatibility stuff */
diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
index 710441e..25d855d 100644
--- a/test-dump-untracked-cache.c
+++ b/test-dump-untracked-cache.c
@@ -44,6 +44,7 @@ int main(int ac, char **av)
 {
 	struct untracked_cache *uc;
 	struct strbuf base = STRBUF_INIT;
+	setup_git_directory();
 	if (read_cache() < 0)
 		die("unable to read index file");
 	uc = the_index.untracked;
-- 
2.1.0.rc0.78.gc0d8480

^ permalink raw reply related	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy
  2014-11-08  9:39   ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
@ 2014-11-08 17:08     ` brian m. carlson
  2014-11-17 20:35     ` David Turner
  1 sibling, 0 replies; 65+ messages in thread
From: brian m. carlson @ 2014-11-08 17:08 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 804 bytes --]

On Sat, Nov 08, 2014 at 04:39:35PM +0700, Nguyễn Thái Ngọc Duy wrote:
> The requirement for this to work is stat info of a directory MUST
> change if an entry is added to or removed from that directory (and
> should not change often otherwise). If your OS and filesytem do not

Should be "filesystem" (or "file system").

> meet this requirement, untracked cache is not for you. Most file
> systems on *nix should be fine. On Windows, NTFS is fine while FAT may
> be not [1] even though FAT on Linux seems to be fine.

Tiny nit: "may be not" should probably be "may not be".
-- 
brian m. carlson / brian with sandals: Houston, Texas, US
+1 832 623 2791 | http://www.crustytoothpaste.net/~bmc | My opinion only
OpenPGP: RSA v4 4096b: 88AC E9B2 9196 305B A994 7552 F1BA 225C 0223 B187

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 819 bytes --]

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 21/22] mingw32: add uname()
  2014-11-08  9:39   ` [PATCH v2 21/22] mingw32: add uname() Nguyễn Thái Ngọc Duy
@ 2014-11-09  3:32     ` Eric Sunshine
  2014-11-09  8:36       ` Duy Nguyen
  0 siblings, 1 reply; 65+ messages in thread
From: Eric Sunshine @ 2014-11-09  3:32 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: Git List

On Sat, Nov 8, 2014 at 4:39 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
> diff --git a/compat/mingw.c b/compat/mingw.c
> index c5c37e5..b817678 100644
> --- a/compat/mingw.c
> +++ b/compat/mingw.c
> @@ -2128,3 +2128,14 @@ void mingw_startup()
>         /* initialize Unicode console */
>         winansi_init();
>  }
> +
> +int uname(struct utsname *buf)
> +{
> +       DWORD v = GetVersion();
> +       memset(buf, 0, sizeof(*buf));
> +       sprintf(buf->sysname, "Windows");

strcpy() maybe?

> +       sprintf(buf->release, "%u.%u", v & 0xff, (v >> 8) & 0xff);
> +       /* assuming NT variants only.. */
> +       sprintf(buf->version, "%u", (v >> 16) & 0x7fff);
> +       return 0;
> +}
> diff --git a/compat/mingw.h b/compat/mingw.h
> index df0e320..d00ba7a 100644
> --- a/compat/mingw.h
> +++ b/compat/mingw.h
> @@ -77,6 +77,14 @@ struct itimerval {
>  };
>  #define ITIMER_REAL 0
>
> +struct utsname {
> +       char sysname[16];
> +       char nodename[1];
> +       char release[16];
> +       char version[16];
> +       char machine[1];
> +};
> +
>  /*
>   * sanitize preprocessor namespace polluted by Windows headers defining
>   * macros which collide with git local versions
> @@ -166,6 +174,7 @@ struct passwd *getpwuid(uid_t uid);
>  int setitimer(int type, struct itimerval *in, struct itimerval *out);
>  int sigaction(int sig, struct sigaction *in, struct sigaction *out);
>  int link(const char *oldpath, const char *newpath);
> +int uname(struct utsname *buf);
>
>  /*
>   * replacements of existing functions
> --
> 2.1.0.rc0.78.gc0d8480

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-08  9:39   ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
@ 2014-11-09  3:39     ` Eric Sunshine
  2014-11-09  8:34       ` Duy Nguyen
  2014-11-09 21:39     ` Torsten Bögershausen
  1 sibling, 1 reply; 65+ messages in thread
From: Eric Sunshine @ 2014-11-09  3:39 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: Git List

On Sat, Nov 8, 2014 at 4:39 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
> If the user enables untracked cache, then
>
>  - move worktree to an unsupported filesystem
>  - or simply upgrade OS
>  - or move the whole (portable) disk from one machine to another
>  - or access a shared fs from another machine
>
> there's no guarantee that untracked cache can still function properly.
> Record the worktree location and OS footprint in the cache. If it
> changes, err on the safe side and disable the cache. The user can
> 'update-index --untracked-cache' again to make sure all conditions are
> met.
>
> This change does not cover all bases, you can fool it if you try
> hard. The point is to stop accidents.
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
> diff --git a/test-dump-untracked-cache.c b/test-dump-untracked-cache.c
> index 710441e..25d855d 100644
> --- a/test-dump-untracked-cache.c
> +++ b/test-dump-untracked-cache.c
> @@ -44,6 +44,7 @@ int main(int ac, char **av)
>  {
>         struct untracked_cache *uc;
>         struct strbuf base = STRBUF_INIT;
> +       setup_git_directory();

What is this change about? Is it related to the rest of this patch?

>         if (read_cache() < 0)
>                 die("unable to read index file");
>         uc = the_index.untracked;
> --
> 2.1.0.rc0.78.gc0d8480

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-09  3:39     ` Eric Sunshine
@ 2014-11-09  8:34       ` Duy Nguyen
  0 siblings, 0 replies; 65+ messages in thread
From: Duy Nguyen @ 2014-11-09  8:34 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List

On Sun, Nov 9, 2014 at 10:39 AM, Eric Sunshine <sunshine@sunshineco.com> wrote:
>> --- a/test-dump-untracked-cache.c
>> +++ b/test-dump-untracked-cache.c
>> @@ -44,6 +44,7 @@ int main(int ac, char **av)
>>  {
>>         struct untracked_cache *uc;
>>         struct strbuf base = STRBUF_INIT;
>> +       setup_git_directory();
>
> What is this change about? Is it related to the rest of this patch?

Yes. This patch makes use of get_git_work_tree() from read_cache()
below. Without setup_git_... worktree is not set up,
read_untracked_extension() thinks the repo is moved and returns no
cache. I'll make a note about this.

>
>>         if (read_cache() < 0)
>>                 die("unable to read index file");
>>         uc = the_index.untracked;




-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 21/22] mingw32: add uname()
  2014-11-09  3:32     ` Eric Sunshine
@ 2014-11-09  8:36       ` Duy Nguyen
  2014-11-09 11:46         ` Torsten Bögershausen
  0 siblings, 1 reply; 65+ messages in thread
From: Duy Nguyen @ 2014-11-09  8:36 UTC (permalink / raw)
  To: Eric Sunshine; +Cc: Git List

On Sun, Nov 9, 2014 at 10:32 AM, Eric Sunshine <sunshine@sunshineco.com> wrote:
> On Sat, Nov 8, 2014 at 4:39 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>> ---
>> diff --git a/compat/mingw.c b/compat/mingw.c
>> index c5c37e5..b817678 100644
>> --- a/compat/mingw.c
>> +++ b/compat/mingw.c
>> @@ -2128,3 +2128,14 @@ void mingw_startup()
>>         /* initialize Unicode console */
>>         winansi_init();
>>  }
>> +
>> +int uname(struct utsname *buf)
>> +{
>> +       DWORD v = GetVersion();
>> +       memset(buf, 0, sizeof(*buf));
>> +       sprintf(buf->sysname, "Windows");
>
> strcpy() maybe?

It was originally strcpy, then I wanted to get fancy and show Win3.1,
Win95... but it got complicated (couldn't just do it based on the last
bit of 'v'). Will revert.
-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 21/22] mingw32: add uname()
  2014-11-09  8:36       ` Duy Nguyen
@ 2014-11-09 11:46         ` Torsten Bögershausen
  2014-11-09 18:47           ` Junio C Hamano
  0 siblings, 1 reply; 65+ messages in thread
From: Torsten Bögershausen @ 2014-11-09 11:46 UTC (permalink / raw)
  To: Duy Nguyen, Eric Sunshine; +Cc: Git List

On 2014-11-09 09.36, Duy Nguyen wrote:
> On Sun, Nov 9, 2014 at 10:32 AM, Eric Sunshine <sunshine@sunshineco.com> wrote:
>> On Sat, Nov 8, 2014 at 4:39 AM, Nguyễn Thái Ngọc Duy <pclouds@gmail.com> wrote:
>>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>>> ---
>>> diff --git a/compat/mingw.c b/compat/mingw.c
>>> index c5c37e5..b817678 100644
>>> --- a/compat/mingw.c
>>> +++ b/compat/mingw.c
>>> @@ -2128,3 +2128,14 @@ void mingw_startup()
>>>         /* initialize Unicode console */
>>>         winansi_init();
>>>  }
>>> +
>>> +int uname(struct utsname *buf)
>>> +{
>>> +       DWORD v = GetVersion();
>>> +       memset(buf, 0, sizeof(*buf));
>>> +       sprintf(buf->sysname, "Windows");
>>
>> strcpy() maybe?
> 
> It was originally strcpy, then I wanted to get fancy and show Win3.1,
> Win95... but it got complicated (couldn't just do it based on the last
> bit of 'v'). Will revert.
> 
Why not use strlcpy() ?
(This feels little like an overkill, but on the other hand it is safe to use regardless
how long the buf is, and it is a good example how to avoid "overrunning" code)

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 21/22] mingw32: add uname()
  2014-11-09 11:46         ` Torsten Bögershausen
@ 2014-11-09 18:47           ` Junio C Hamano
  0 siblings, 0 replies; 65+ messages in thread
From: Junio C Hamano @ 2014-11-09 18:47 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Duy Nguyen, Eric Sunshine, Git List

Torsten Bögershausen <tboegi@web.de> writes:

>>> strcpy() maybe?
>> 
>> It was originally strcpy, then I wanted to get fancy and show Win3.1,
>> Win95... but it got complicated (couldn't just do it based on the last
>> bit of 'v'). Will revert.
>> 
> Why not use strlcpy() ?
> (This feels little like an overkill, but on the other hand it is safe to use regardless
> how long the buf is, and it is a good example how to avoid "overrunning" code)

I actually think such a blind belief in strlcpy() is a disease.

Surely it will help you avoid overwriting other unrelated data after
the string buffer you are writing into, but it does not help you
very much to make sure what you wanted to place in the string buffer
all got recorded in that buffer, unless you remember to check its
return value to make sure it did not get truncated.  Otherwise, you
may be valuing other people's data, but you are not valuing your own
data sufficiently enough.

And if you (1) know how big your own buffer is, and (2) are checking
if your call to strlcpy() copied everything you wanted to copy, it
would not be more work to do it without strlcpy()---you need to know
how big a string you are copying into anyway if you care about your
own data to the same degree you care about other people's data ;-)

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-08  9:39   ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
  2014-11-09  3:39     ` Eric Sunshine
@ 2014-11-09 21:39     ` Torsten Bögershausen
  2014-11-09 23:47       ` Duy Nguyen
  1 sibling, 1 reply; 65+ messages in thread
From: Torsten Bögershausen @ 2014-11-09 21:39 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy, git

On 2014-11-08 10.39, Nguyễn Thái Ngọc Duy wrote:
> If the user enables untracked cache, then
> 
>  - move worktree to an unsupported filesystem

How do we detect this move ?
Shouldn't we be able to detect an unsupported file system 
(by probing if stat(root_dir_of_repo) == stat(what_we_have_in_index_file))
>  - or simply upgrade OS
>  - or move the whole (portable) disk from one machine to another
How does this effect Git ?
I would rather expect an update of Git to be an issue,
but knowing that Git strongly tends to be backward compatible, there
shouldn't be a issue.

>  - or access a shared fs from another machine
This is interesting.
I have done some basic test on git.git using a medium fast laptop
talking to a medium fast server using a medium normal WLAN.
git status was is in a range of 2-3 seconds, with your patch 1-1.5 seconds.

(That all depends on the network load, some caching here or there)

But roughly twice the speed, very nice!

I will do some tests with networking file systems, like 
Linux+ext4 -- SMB -- Windows (cygwin/Git for Windows)
Linux+ext4 -- SMB -- Mac OS X
Linux+ext4 -- NFS -- Linux
Windows    -- SMB -- Linux
Windows    -- SMB -- Mac OS X
(and then we have some cases where a virtual machine runs a "shared" FS with a host file system,
where the untracked cache looks promising)

I am not really sure when we need this protection.
What I understand is that stat(dir).mtime must be reliable.
Another problem may be mixing old Git with new Git, but the old Git
should write an index file without UNTR, and we should be safe ?
The new Git will write an index file with UNTR, which the old Git will ignore.

What do I miss ?

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-09 21:39     ` Torsten Bögershausen
@ 2014-11-09 23:47       ` Duy Nguyen
  2014-11-10 20:48         ` Torsten Bögershausen
  0 siblings, 1 reply; 65+ messages in thread
From: Duy Nguyen @ 2014-11-09 23:47 UTC (permalink / raw)
  To: Torsten Bögershausen; +Cc: Git Mailing List

On Mon, Nov 10, 2014 at 4:39 AM, Torsten Bögershausen <tboegi@web.de> wrote:
> On 2014-11-08 10.39, Nguyễn Thái Ngọc Duy wrote:
>> If the user enables untracked cache, then
>>
>>  - move worktree to an unsupported filesystem
>
> How do we detect this move ?
> Shouldn't we be able to detect an unsupported file system
> (by probing if stat(root_dir_of_repo) == stat(what_we_have_in_index_file))

I don't see any generic way of detecting this. So I just save $(cwd)
and check if the repo is moved. False positive if you move your repo
within the same filesystem. If you move your stuff to a new filesystem
and mount it to the same place as before, my test fails.

>>  - or simply upgrade OS
>>  - or move the whole (portable) disk from one machine to another
> How does this effect Git ?
> I would rather expect an update of Git to be an issue,
> but knowing that Git strongly tends to be backward compatible, there
> shouldn't be a issue.

If this link [1] is true and you use vfat on Linux, then we should
disable the cache when moving it to Windows.

[1] http://support.microsoft.com/kb/299648

>>  - or access a shared fs from another machine
> This is interesting.
> I have done some basic test on git.git using a medium fast laptop
> talking to a medium fast server using a medium normal WLAN.
> git status was is in a range of 2-3 seconds, with your patch 1-1.5 seconds.
>
> (That all depends on the network load, some caching here or there)
>
> But roughly twice the speed, very nice!

For network fs, that's probably about it. For local fs, we still have
watchman option to speed it up a little more. Still not sure if I can
beat ".. made Mercurial's status command more than 5x faster than
Git's status command."

> I am not really sure when we need this protection.
> What I understand is that stat(dir).mtime must be reliable.

Yes and [1] shows that mtime is not reliable, at least on Windows+vfat.

> Another problem may be mixing old Git with new Git, but the old Git
> should write an index file without UNTR, and we should be safe ?
> The new Git will write an index file with UNTR, which the old Git will ignore.

Old git should ignore (and discard) untr extension and go with the slow old way.
-- 
Duy

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 22/22] untracked cache: guard and disable on system changes
  2014-11-09 23:47       ` Duy Nguyen
@ 2014-11-10 20:48         ` Torsten Bögershausen
  0 siblings, 0 replies; 65+ messages in thread
From: Torsten Bögershausen @ 2014-11-10 20:48 UTC (permalink / raw)
  To: Duy Nguyen, Torsten Bögershausen; +Cc: Git Mailing List

On 11/10/2014 12:47 AM, Duy Nguyen wrote:

Some updates from the test lab, Windows 7
"Working" means git update-index --untracked-cache reports Testing...OK
"Rejected" means "..does not change.."
cygwin + NTFS: Working
cygwin + VFAT: Rejected

The same good news for Msysgit, running your github branch
(I needed to create an empty compat/win32/sys/utsname.h to get it compiled)

And I'm still not sure if we need to store the OS in the index, or if it 
is enough
to store the $pwd.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file
  2014-11-08  9:39   ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
@ 2014-11-17 19:31     ` David Turner
  0 siblings, 0 replies; 65+ messages in thread
From: David Turner @ 2014-11-17 19:31 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

On Sat, 2014-11-08 at 16:39 +0700, Nguyễn Thái Ngọc Duy wrote:
> + * If "ss" is not NULL, compute SHA-1 of the exclude file and fill
> + * stat data from disk (only valid if add_excludes returns zero). If
> + * ss_valid is non-zero, "ss" must contain good value as input.

ss and ss_valid should be sha1_stat and sha1_stat.valid

> +struct sha1_stat {
> +	struct stat_data stat;
> +	unsigned char sha1[20];
> +	int valid;
> +};

It might be good to document what "valid" means here e.g. "a sha1_stat
is valid if both sha1 and stat_data match the working tree's version of
the file" or whatever.

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy
  2014-11-08  9:39   ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
  2014-11-08 17:08     ` brian m. carlson
@ 2014-11-17 20:35     ` David Turner
  1 sibling, 0 replies; 65+ messages in thread
From: David Turner @ 2014-11-17 20:35 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

On Sat, 2014-11-08 at 16:39 +0700, Nguyễn Thái Ngọc Duy wrote:
> +	d = xmalloc(sizeof(*d) + len);
> +	memset(d, 0, sizeof(*d) + len);
>+	memcpy(d->name, name, len);

calloc instead of malloc+memset?  But do we really need this memset to
include name if we're about to use a memcpy? Couldn't we just add a
trailing zero?

> + *   - The list of files and directories of the direction in question

s/direction/directory/

> +struct untracked_cache_dir {
> +	struct untracked_cache_dir **dirs;
> +	char **untracked;
> +	struct stat_data stat_data;
> +	unsigned int untracked_alloc, dirs_nr, dirs_alloc;
> +	unsigned int untracked_nr;
> +	unsigned int check_only : 1;
> +	/* null SHA-1 means this directory does not have .gitignore */
> +	unsigned char exclude_sha1[20];
> +	char name[1];

For consistency, should this be char name[FLEX_ARRAY]? (this will entail
some changes when allocating these, of course)

^ permalink raw reply	[flat|nested] 65+ messages in thread

end of thread, other threads:[~2014-11-17 20:35 UTC | newest]

Thread overview: 65+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2014-10-27 12:10 [PATCH 00/19] Untracked cache to speed up "git status" Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 01/19] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2014-10-27 22:46   ` Junio C Hamano
2014-10-28  0:12     ` Duy Nguyen
2014-10-28 17:37   ` Torsten Bögershausen
2014-11-02  1:25     ` Duy Nguyen
2014-10-27 12:10 ` [PATCH 02/19] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2014-10-28 17:37   ` Torsten Bögershausen
2014-10-27 12:10 ` [PATCH 03/19] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 04/19] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 05/19] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 06/19] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
2014-10-30 16:19   ` Eric Sunshine
2014-10-27 12:10 ` [PATCH 07/19] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 08/19] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 09/19] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 10/19] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 11/19] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 12/19] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 13/19] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 14/19] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 15/19] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 16/19] status: enable untracked cache Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 17/19] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2014-10-27 12:10 ` [PATCH 18/19] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2014-10-28 17:37   ` Torsten Bögershausen
2014-11-03 12:16     ` Duy Nguyen
2014-11-03 18:09     ` Junio C Hamano
2014-10-28 23:25   ` Eric Sunshine
2014-10-27 12:10 ` [PATCH 19/19] t7063: tests for " Nguyễn Thái Ngọc Duy
2014-11-08  9:39 ` [PATCH v2 00/22] untracked cache updates Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 01/22] dir.c: optionally compute sha-1 of a .gitignore file Nguyễn Thái Ngọc Duy
2014-11-17 19:31     ` David Turner
2014-11-08  9:39   ` [PATCH v2 02/22] untracked cache: record .gitignore information and dir hierarchy Nguyễn Thái Ngọc Duy
2014-11-08 17:08     ` brian m. carlson
2014-11-17 20:35     ` David Turner
2014-11-08  9:39   ` [PATCH v2 03/22] untracked cache: initial untracked cache validation Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 04/22] untracked cache: invalidate dirs recursively if .gitignore changes Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 05/22] untracked cache: make a wrapper around {open,read,close}dir() Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 06/22] untracked cache: record/validate dir mtime and reuse cached output Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 07/22] untracked cache: mark what dirs should be recursed/saved Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 08/22] untracked cache: don't open non-existent .gitignore Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 09/22] untracked cache: save to an index extension Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 10/22] untracked cache: load from UNTR " Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 11/22] untracked cache: invalidate at index addition or removal Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 12/22] read-cache.c: split racy stat test to a separate function Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 13/22] untracked cache: avoid racy timestamps Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 14/22] untracked cache: print stats with $GIT_TRACE_UNTRACKED_STATS Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 15/22] untracked cache: mark index dirty if untracked cache is updated Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 16/22] untracked-cache: temporarily disable with $GIT_DISABLE_UNTRACKED_CACHE Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 17/22] status: enable untracked cache Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 18/22] update-index: manually enable or disable " Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 19/22] update-index: test the system before enabling " Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 20/22] t7063: tests for " Nguyễn Thái Ngọc Duy
2014-11-08  9:39   ` [PATCH v2 21/22] mingw32: add uname() Nguyễn Thái Ngọc Duy
2014-11-09  3:32     ` Eric Sunshine
2014-11-09  8:36       ` Duy Nguyen
2014-11-09 11:46         ` Torsten Bögershausen
2014-11-09 18:47           ` Junio C Hamano
2014-11-08  9:39   ` [PATCH v2 22/22] untracked cache: guard and disable on system changes Nguyễn Thái Ngọc Duy
2014-11-09  3:39     ` Eric Sunshine
2014-11-09  8:34       ` Duy Nguyen
2014-11-09 21:39     ` Torsten Bögershausen
2014-11-09 23:47       ` Duy Nguyen
2014-11-10 20:48         ` Torsten Bögershausen

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.