All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 4/4] dir.c: don't exclude whole dir prematurely
Date: Mon, 15 Feb 2016 16:03:39 +0700	[thread overview]
Message-ID: <1455527019-7787-5-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1455527019-7787-1-git-send-email-pclouds@gmail.com>

If there is a pattern "!foo/bar", this patch makes it not exclude
"foo" right away. This gives us a chance to examine "foo" and
re-include "foo/bar".

Helped-by: brian m. carlson <sandals@crustytoothpaste.net>
Helped-by: Micha Wiedenmann <mw-u2@gmx.de>
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/gitignore.txt                 |  17 +++-
 dir.c                                       | 109 +++++++++++++++++++-
 t/t3001-ls-files-others-exclude.sh          |   7 +-
 t/t3007-ls-files-other-negative.sh (new +x) | 153 ++++++++++++++++++++++++++++
 4 files changed, 276 insertions(+), 10 deletions(-)
 create mode 100755 t/t3007-ls-files-other-negative.sh

diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 473623d..3ded6fd 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -82,12 +82,12 @@ PATTERN FORMAT
 
  - An optional prefix "`!`" which negates the pattern; any
    matching file excluded by a previous pattern will become
-   included again. It is not possible to re-include a file if a parent
-   directory of that file is excluded. Git doesn't list excluded
-   directories for performance reasons, so any patterns on contained
-   files have no effect, no matter where they are defined.
+   included again.
    Put a backslash ("`\`") in front of the first "`!`" for patterns
    that begin with a literal "`!`", for example, "`\!important!.txt`".
+   It is possible to re-include a file if a parent directory of that
+   file is excluded if certain conditions are met. See section NOTES
+   for detail.
 
  - If the pattern ends with a slash, it is removed for the
    purpose of the following description, but it would only find
@@ -141,6 +141,15 @@ not tracked by Git remain untracked.
 To stop tracking a file that is currently tracked, use
 'git rm --cached'.
 
+To re-include files or directories when their parent directory is
+excluded, the following conditions must be met:
+
+ - The rules to exclude a directory and re-include a subset back must
+   be in the same .gitignore file.
+
+ - The directory part in the re-include rules must be literal (i.e. no
+   wildcards)
+
 EXAMPLES
 --------
 
diff --git a/dir.c b/dir.c
index 8a9d8c0..552af23 100644
--- a/dir.c
+++ b/dir.c
@@ -930,6 +930,75 @@ static int match_sticky(struct exclude *exc, const char *pathname, int pathlen,
 	return 0;
 }
 
+static inline int different_decisions(const struct exclude *a,
+				      const struct exclude *b)
+{
+	return (a->flags & EXC_FLAG_NEGATIVE) != (b->flags & EXC_FLAG_NEGATIVE);
+}
+
+/*
+ * Return non-zero if pathname is a directory and an ancestor of the
+ * literal path in a pattern.
+ */
+static int match_directory_part(const char *pathname, int pathlen,
+				int *dtype, struct exclude *x)
+{
+	const char	*base	    = x->base;
+	int		 baselen    = x->baselen ? x->baselen - 1 : 0;
+	const char	*pattern    = x->pattern;
+	int		 prefix	    = x->nowildcardlen;
+	int		 patternlen = x->patternlen;
+
+	if (*dtype == DT_UNKNOWN)
+		*dtype = get_dtype(NULL, pathname, pathlen);
+	if (*dtype != DT_DIR)
+		return 0;
+
+	if (*pattern == '/') {
+		pattern++;
+		patternlen--;
+		prefix--;
+	}
+
+	if (baselen) {
+		if (((pathlen < baselen && base[pathlen] == '/') ||
+		     pathlen == baselen) &&
+		    !strncmp_icase(pathname, base, pathlen))
+			return 1;
+		pathname += baselen + 1;
+		pathlen  -= baselen + 1;
+	}
+
+
+	if (prefix &&
+	    (((pathlen < prefix && pattern[pathlen] == '/') ||
+	      pathlen == prefix) &&
+	     !strncmp_icase(pathname, pattern, pathlen)))
+		return 1;
+
+	return 0;
+}
+
+static struct exclude *should_descend(const char *pathname, int pathlen,
+				      int *dtype, struct exclude_list *el,
+				      struct exclude *exc)
+{
+	int i;
+
+	for (i = el->nr - 1; 0 <= i; i--) {
+		struct exclude *x = el->excludes[i];
+
+		if (x == exc)
+			break;
+
+		if (!(x->flags & EXC_FLAG_NODIR) &&
+		    different_decisions(x, exc) &&
+		    match_directory_part(pathname, pathlen, dtype, x))
+			return x;
+	}
+	return NULL;
+}
+
 /*
  * Scan the given exclude list in reverse to see whether pathname
  * should be ignored.  The first match (i.e. the last on the list), if
@@ -943,7 +1012,7 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
 						       struct exclude_list *el)
 {
 	struct exclude *exc = NULL; /* undecided */
-	int i;
+	int i, maybe_descend = 0;
 
 	if (!el->nr)
 		return NULL;	/* undefined */
@@ -955,6 +1024,10 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
 		const char *exclude = x->pattern;
 		int prefix = x->nowildcardlen;
 
+		if (!maybe_descend && i < el->nr - 1 &&
+		    different_decisions(x, el->excludes[i+1]))
+			maybe_descend = 1;
+
 		if (x->sticky_paths.nr) {
 			if (*dtype == DT_UNKNOWN)
 				*dtype = get_dtype(NULL, pathname, pathlen);
@@ -998,6 +1071,34 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
 		return NULL;
 	}
 
+	/*
+	 * We have found a matching pattern "exc" that may exclude whole
+	 * directory. We also found that there may be a pattern that matches
+	 * something inside the directory and reincludes stuff.
+	 *
+	 * Go through the patterns again, find that pattern and double check.
+	 * If it's true, return "undecided" and keep descending in. "exc" is
+	 * marked sticky so that it continues to match inside the directory.
+	 */
+	if (!(exc->flags & EXC_FLAG_NEGATIVE) && maybe_descend) {
+		struct exclude *x;
+
+		if (*dtype == DT_UNKNOWN)
+			*dtype = get_dtype(NULL, pathname, pathlen);
+
+		if (*dtype == DT_DIR &&
+		    (x = should_descend(pathname, pathlen, dtype, el, exc))) {
+			add_sticky(exc, pathname, pathlen);
+			trace_printf_key(&trace_exclude,
+					 "exclude: %.*s vs %s at line %d => %s,"
+					 " forced open by %s at line %d => n/a\n",
+					 pathlen, pathname, exc->pattern, exc->srcpos,
+					 exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes",
+					 x->pattern, x->srcpos);
+			return NULL;
+		}
+	}
+
 	trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s%s\n",
 			 pathlen, pathname, exc->pattern, exc->srcpos,
 			 exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes",
@@ -2097,6 +2198,12 @@ int read_directory(struct dir_struct *dir, const char *path, int len, const stru
 		return dir->nr;
 
 	/*
+	 * Stay on the safe side. if read_directory() has run once on
+	 * "dir", some sticky flag may have been left. Clear them all.
+	 */
+	clear_sticky(dir);
+
+	/*
 	 * exclude patterns are treated like positive ones in
 	 * create_simplify. Usually exclude patterns should be a
 	 * subset of positive ones, which has no impacts on
diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh
index 3fc484e..d043078 100755
--- a/t/t3001-ls-files-others-exclude.sh
+++ b/t/t3001-ls-files-others-exclude.sh
@@ -175,13 +175,10 @@ test_expect_success 'negated exclude matches can override previous ones' '
 	grep "^a.1" output
 '
 
-test_expect_success 'excluded directory overrides content patterns' '
+test_expect_success 'excluded directory does not override content patterns' '
 
 	git ls-files --others --exclude="one" --exclude="!one/a.1" >output &&
-	if grep "^one/a.1" output
-	then
-		false
-	fi
+	grep "^one/a.1" output
 '
 
 test_expect_success 'negated directory doesn'\''t affect content patterns' '
diff --git a/t/t3007-ls-files-other-negative.sh b/t/t3007-ls-files-other-negative.sh
new file mode 100755
index 0000000..0797b86
--- /dev/null
+++ b/t/t3007-ls-files-other-negative.sh
@@ -0,0 +1,153 @@
+#!/bin/sh
+
+test_description='test re-include patterns'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	mkdir -p fooo foo/bar tmp &&
+	touch abc foo/def foo/bar/ghi foo/bar/bar
+'
+
+test_expect_success 'no match, do not enter subdir and waste cycles' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/foo
+	!fooo/bar/bar
+	EOF
+	GIT_TRACE_EXCLUDE="$(pwd)/tmp/trace" git ls-files -o --exclude-standard >tmp/actual &&
+	! grep "enter .foo/.\$" tmp/trace &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by literal pathname pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	/foo
+	!foo/bar/bar
+	EOF
+	cat >fooo/.gitignore <<-\EOF &&
+	!/*
+	EOF	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by wildcard pathname pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	/fo?
+	!foo/bar/bar
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by literal basename pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	foo
+	!foo/bar/bar
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by wildcard basename pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	fo?
+	!foo/bar/bar
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by literal mustbedir, basename pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	foo/
+	!foo/bar/bar
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'match, excluded by literal mustbedir, pathname pattern' '
+	cat >.gitignore <<-\EOF &&
+	/tmp
+	/fooo
+	/foo/
+	!foo/bar/bar
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	.gitignore
+	abc
+	foo/bar/bar
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_expect_success 'prepare for nested negatives' '
+	cat >.git/info/exclude <<-\EOF &&
+	/.gitignore
+	/tmp
+	/foo
+	/abc
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	test_must_be_empty tmp/actual &&
+	mkdir -p 1/2/3/4 &&
+	touch 1/f 1/2/f 1/2/3/f 1/2/3/4/f
+'
+
+test_expect_success 'match, literal pathname, nested negatives' '
+	cat >.gitignore <<-\EOF &&
+	/1
+	!1/2
+	1/2/3
+	!1/2/3/4
+	EOF
+	git ls-files -o --exclude-standard >tmp/actual &&
+	cat >tmp/expected <<-\EOF &&
+	1/2/3/4/f
+	1/2/f
+	EOF
+	test_cmp tmp/expected tmp/actual
+'
+
+test_done
-- 
2.7.0.377.g4cd97dd

  parent reply	other threads:[~2016-02-15  9:04 UTC|newest]

Thread overview: 10+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-15  9:03 [PATCH 0/4] .gitignore, reinclude rules, take 2 Nguyễn Thái Ngọc Duy
2016-02-15  9:03 ` [PATCH 1/4] dir.c: fix match_pathname() Nguyễn Thái Ngọc Duy
2016-02-15 23:29   ` Junio C Hamano
2016-02-16  1:17     ` Duy Nguyen
2016-02-15  9:03 ` [PATCH 2/4] dir.c: support tracing exclude Nguyễn Thái Ngọc Duy
2016-02-15  9:03 ` [PATCH 3/4] dir.c: support marking some patterns already matched Nguyễn Thái Ngọc Duy
2016-02-15 23:47   ` Junio C Hamano
2016-02-16  1:36     ` Duy Nguyen
2016-02-15  9:03 ` Nguyễn Thái Ngọc Duy [this message]
2016-02-15 23:49 ` [PATCH 0/4] .gitignore, reinclude rules, take 2 Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1455527019-7787-5-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.