git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] Unindent excluded_from_list()
@ 2012-06-07  7:53 Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 2/4] dir.c: get rid of the wildcard symbol set in no_wildcard() Nguyễn Thái Ngọc Duy
                   ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-06-07  7:53 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

Return early if el->nr == 0. Unindent one more level for FNM_PATHNAME
code block as this block is getting complex and may need more
indentation.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 nd/exclude-workaround-top-heavy updates. no changes on this patch.

 dir.c | 96 +++++++++++++++++++++++++++++++++----------------------------------
 1 file changed, 48 insertions(+), 48 deletions(-)

diff --git a/dir.c b/dir.c
index ed1510f..e756de1 100644
--- a/dir.c
+++ b/dir.c
@@ -498,56 +498,56 @@ int excluded_from_list(const char *pathname,
 {
 	int i;
 
-	if (el->nr) {
-		for (i = el->nr - 1; 0 <= i; i--) {
-			struct exclude *x = el->excludes[i];
-			const char *exclude = x->pattern;
-			int to_exclude = x->to_exclude;
-
-			if (x->flags & EXC_FLAG_MUSTBEDIR) {
-				if (*dtype == DT_UNKNOWN)
-					*dtype = get_dtype(NULL, pathname, pathlen);
-				if (*dtype != DT_DIR)
-					continue;
-			}
+	if (!el->nr)
+		return -1;	/* undefined */
+
+	for (i = el->nr - 1; 0 <= i; i--) {
+		struct exclude *x = el->excludes[i];
+		const char *exclude = x->pattern;
+		int to_exclude = x->to_exclude;
+
+		if (x->flags & EXC_FLAG_MUSTBEDIR) {
+			if (*dtype == DT_UNKNOWN)
+				*dtype = get_dtype(NULL, pathname, pathlen);
+			if (*dtype != DT_DIR)
+				continue;
+		}
 
-			if (x->flags & EXC_FLAG_NODIR) {
-				/* match basename */
-				if (x->flags & EXC_FLAG_NOWILDCARD) {
-					if (!strcmp_icase(exclude, basename))
-						return to_exclude;
-				} else if (x->flags & EXC_FLAG_ENDSWITH) {
-					if (x->patternlen - 1 <= pathlen &&
-					    !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
-						return to_exclude;
-				} else {
-					if (fnmatch_icase(exclude, basename, 0) == 0)
-						return to_exclude;
-				}
-			}
-			else {
-				/* match with FNM_PATHNAME:
-				 * exclude has base (baselen long) implicitly
-				 * in front of it.
-				 */
-				int baselen = x->baselen;
-				if (*exclude == '/')
-					exclude++;
-
-				if (pathlen < baselen ||
-				    (baselen && pathname[baselen-1] != '/') ||
-				    strncmp_icase(pathname, x->base, baselen))
-				    continue;
-
-				if (x->flags & EXC_FLAG_NOWILDCARD) {
-					if (!strcmp_icase(exclude, pathname + baselen))
-						return to_exclude;
-				} else {
-					if (fnmatch_icase(exclude, pathname+baselen,
-						    FNM_PATHNAME) == 0)
-					    return to_exclude;
-				}
+		if (x->flags & EXC_FLAG_NODIR) {
+			/* match basename */
+			if (x->flags & EXC_FLAG_NOWILDCARD) {
+				if (!strcmp_icase(exclude, basename))
+					return to_exclude;
+			} else if (x->flags & EXC_FLAG_ENDSWITH) {
+				if (x->patternlen - 1 <= pathlen &&
+				    !strcmp_icase(exclude + 1, pathname + pathlen - x->patternlen + 1))
+					return to_exclude;
+			} else {
+				if (fnmatch_icase(exclude, basename, 0) == 0)
+					return to_exclude;
 			}
+			continue;
+		}
+
+
+		/* match with FNM_PATHNAME:
+		 * exclude has base (baselen long) implicitly in front of it.
+		 */
+		if (*exclude == '/')
+			exclude++;
+
+		if (pathlen < x->baselen ||
+		    (x->baselen && pathname[x->baselen-1] != '/') ||
+		    strncmp_icase(pathname, x->base, x->baselen))
+			continue;
+
+		if (x->flags & EXC_FLAG_NOWILDCARD) {
+			if (!strcmp_icase(exclude, pathname + x->baselen))
+				return to_exclude;
+		} else {
+			if (fnmatch_icase(exclude, pathname+x->baselen,
+					  FNM_PATHNAME) == 0)
+				return to_exclude;
 		}
 	}
 	return -1; /* undecided */
-- 
1.7.11.rc1.185.g281ad67

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 2/4] dir.c: get rid of the wildcard symbol set in no_wildcard()
  2012-06-07  7:53 [PATCH 1/4] Unindent excluded_from_list() Nguyễn Thái Ngọc Duy
@ 2012-06-07  7:53 ` Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 3/4] exclude: do strcmp as much as possible before fnmatch Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 4/4] exclude: reuse last basename comparison Nguyễn Thái Ngọc Duy
  2 siblings, 0 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-06-07  7:53 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

Elsewhere in this file is_glob_special() is also used to check for
wildcards, which is defined in ctype. Make no_wildcard() also use this
function (indirectly via simple_length())

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 dir.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/dir.c b/dir.c
index e756de1..bbf958c 100644
--- a/dir.c
+++ b/dir.c
@@ -288,9 +288,24 @@ int match_pathspec_depth(const struct pathspec *ps,
 	return retval;
 }
 
+/*
+ * Return the length of the "simple" part of a path match limiter.
+ */
+static int simple_length(const char *match)
+{
+	int len = -1;
+
+	for (;;) {
+		unsigned char c = *match++;
+		len++;
+		if (c == '\0' || is_glob_special(c))
+			return len;
+	}
+}
+
 static int no_wildcard(const char *string)
 {
-	return string[strcspn(string, "*?[{\\")] == '\0';
+	return string[simple_length(string)] == '\0';
 }
 
 void add_exclude(const char *string, const char *base,
@@ -997,21 +1012,6 @@ static int cmp_name(const void *p1, const void *p2)
 				  e2->name, e2->len);
 }
 
-/*
- * Return the length of the "simple" part of a path match limiter.
- */
-static int simple_length(const char *match)
-{
-	int len = -1;
-
-	for (;;) {
-		unsigned char c = *match++;
-		len++;
-		if (c == '\0' || is_glob_special(c))
-			return len;
-	}
-}
-
 static struct path_simplify *create_simplify(const char **pathspec)
 {
 	int nr, alloc = 0;
-- 
1.7.11.rc1.185.g281ad67

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 3/4] exclude: do strcmp as much as possible before fnmatch
  2012-06-07  7:53 [PATCH 1/4] Unindent excluded_from_list() Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 2/4] dir.c: get rid of the wildcard symbol set in no_wildcard() Nguyễn Thái Ngọc Duy
@ 2012-06-07  7:53 ` Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 4/4] exclude: reuse last basename comparison Nguyễn Thái Ngọc Duy
  2 siblings, 0 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-06-07  7:53 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

this also avoids calling fnmatch() if the non-wildcard prefix is
longer than basename

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Fixed the global "wildcards" string thing by using simple_length().

 dir.c | 37 ++++++++++++++++++++++++-------------
 dir.h |  2 +-
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/dir.c b/dir.c
index bbf958c..94fe9f8 100644
--- a/dir.c
+++ b/dir.c
@@ -341,8 +341,7 @@ void add_exclude(const char *string, const char *base,
 	x->flags = flags;
 	if (!strchr(string, '/'))
 		x->flags |= EXC_FLAG_NODIR;
-	if (no_wildcard(string))
-		x->flags |= EXC_FLAG_NOWILDCARD;
+	x->nowildcardlen = simple_length(string);
 	if (*string == '*' && no_wildcard(string+1))
 		x->flags |= EXC_FLAG_ENDSWITH;
 	ALLOC_GROW(which->excludes, which->nr + 1, which->alloc);
@@ -518,8 +517,9 @@ int excluded_from_list(const char *pathname,
 
 	for (i = el->nr - 1; 0 <= i; i--) {
 		struct exclude *x = el->excludes[i];
-		const char *exclude = x->pattern;
+		const char *name, *exclude = x->pattern;
 		int to_exclude = x->to_exclude;
+		int namelen, prefix = x->nowildcardlen;
 
 		if (x->flags & EXC_FLAG_MUSTBEDIR) {
 			if (*dtype == DT_UNKNOWN)
@@ -530,7 +530,7 @@ int excluded_from_list(const char *pathname,
 
 		if (x->flags & EXC_FLAG_NODIR) {
 			/* match basename */
-			if (x->flags & EXC_FLAG_NOWILDCARD) {
+			if (prefix == x->patternlen) {
 				if (!strcmp_icase(exclude, basename))
 					return to_exclude;
 			} else if (x->flags & EXC_FLAG_ENDSWITH) {
@@ -544,26 +544,37 @@ int excluded_from_list(const char *pathname,
 			continue;
 		}
 
-
 		/* match with FNM_PATHNAME:
 		 * exclude has base (baselen long) implicitly in front of it.
 		 */
-		if (*exclude == '/')
+		if (*exclude == '/') {
 			exclude++;
+			prefix--;
+		}
 
 		if (pathlen < x->baselen ||
 		    (x->baselen && pathname[x->baselen-1] != '/') ||
 		    strncmp_icase(pathname, x->base, x->baselen))
 			continue;
 
-		if (x->flags & EXC_FLAG_NOWILDCARD) {
-			if (!strcmp_icase(exclude, pathname + x->baselen))
-				return to_exclude;
-		} else {
-			if (fnmatch_icase(exclude, pathname+x->baselen,
-					  FNM_PATHNAME) == 0)
-				return to_exclude;
+		namelen = x->baselen ? pathlen - x->baselen : pathlen;
+		name = pathname + pathlen  - namelen;
+
+		/* if the non-wildcard part is longer than the
+		   remaining pathname, surely it cannot match */
+		if (prefix > namelen)
+			continue;
+
+		if (prefix) {
+			if (strncmp_icase(exclude, name, prefix))
+				continue;
+			exclude += prefix;
+			name    += prefix;
+			namelen -= prefix;
 		}
+
+		if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
+			return to_exclude;
 	}
 	return -1; /* undecided */
 }
diff --git a/dir.h b/dir.h
index 58b6fc7..39fc145 100644
--- a/dir.h
+++ b/dir.h
@@ -7,7 +7,6 @@ struct dir_entry {
 };
 
 #define EXC_FLAG_NODIR 1
-#define EXC_FLAG_NOWILDCARD 2
 #define EXC_FLAG_ENDSWITH 4
 #define EXC_FLAG_MUSTBEDIR 8
 
@@ -17,6 +16,7 @@ struct exclude_list {
 	struct exclude {
 		const char *pattern;
 		int patternlen;
+		int nowildcardlen;
 		const char *base;
 		int baselen;
 		int to_exclude;
-- 
1.7.11.rc1.185.g281ad67

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH 4/4] exclude: reuse last basename comparison
  2012-06-07  7:53 [PATCH 1/4] Unindent excluded_from_list() Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 2/4] dir.c: get rid of the wildcard symbol set in no_wildcard() Nguyễn Thái Ngọc Duy
  2012-06-07  7:53 ` [PATCH 3/4] exclude: do strcmp as much as possible before fnmatch Nguyễn Thái Ngọc Duy
@ 2012-06-07  7:53 ` Nguyễn Thái Ngọc Duy
  2012-06-07 18:28   ` Junio C Hamano
  2012-06-07 18:40   ` Junio C Hamano
  2 siblings, 2 replies; 7+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-06-07  7:53 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

If two consecutive patterns share the same "base" and we just compared
base against pathname last time, we can just reuse the last comparison
result.

This optmization is made with read_directory() in mind. Notice that
all exclude patterns share the same "base" pointer, which is basebuf[]
from "struct dir" (given indirectly by prep_exclude()) and patterns
from the same .gitignore will stay in the same order. This opens an
opportunity for this optimization when there are a lot of patterns in
subdirectories-with-long-path-name/.gitignore.

Other users of excluded_from_list() unlikely take advantage of this,
unless add_excludes() learns to pre-compare two consecutive bases and
save the result, so excluded_from_list() can perform a cheap "are
these two bases the same" check.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 I haven't put more thought/work on optimizing the big top .gitignore
 case yet.  But something like this is probably worth doing anyway.
 Pathspec might use the same optimization. If a user does (notice no
 quotes)
 
 git something long/path/to/here/*.[ch]

 we would need to compare "long/path/to/here" all over again (I
 haven't checked the code though).

 dir.c | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 94fe9f8..2964076 100644
--- a/dir.c
+++ b/dir.c
@@ -511,6 +511,7 @@ int excluded_from_list(const char *pathname,
 		       struct exclude_list *el)
 {
 	int i;
+	int last_basecmp = el->nr, basecmp_result;
 
 	if (!el->nr)
 		return -1;	/* undefined */
@@ -552,9 +553,25 @@ int excluded_from_list(const char *pathname,
 			prefix--;
 		}
 
-		if (pathlen < x->baselen ||
-		    (x->baselen && pathname[x->baselen-1] != '/') ||
-		    strncmp_icase(pathname, x->base, x->baselen))
+		if (i < el->nr - 1 &&
+		    last_basecmp == i + 1 &&
+		    x->base    == el->excludes[last_basecmp]->base &&
+		    x->baselen == el->excludes[last_basecmp]->baselen)
+			/*
+			 * we have the same "base" as last time and
+			 * last time we came here too (i.e. no break
+			 * or continue from the above code), reuse
+			 * basecmp_result
+			 */
+			;
+		else
+			/* anything other than zero is ok, we don't
+			   really care about the sorting order */
+			basecmp_result = pathlen < x->baselen ||
+				(x->baselen && pathname[x->baselen - 1] != '/') ||
+				strncmp_icase(pathname, x->base, x->baselen);
+		last_basecmp = i;
+		if (basecmp_result)
 			continue;
 
 		namelen = x->baselen ? pathlen - x->baselen : pathlen;
-- 
1.7.11.rc1.185.g281ad67

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] exclude: reuse last basename comparison
  2012-06-07  7:53 ` [PATCH 4/4] exclude: reuse last basename comparison Nguyễn Thái Ngọc Duy
@ 2012-06-07 18:28   ` Junio C Hamano
  2012-06-07 18:40   ` Junio C Hamano
  1 sibling, 0 replies; 7+ messages in thread
From: Junio C Hamano @ 2012-06-07 18:28 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> If two consecutive patterns share the same "base" and we just compared
> base against pathname last time, we can just reuse the last comparison
> result.
>
> This optmization is made with read_directory() in mind. Notice that

s/optm/optim/; just in case you need to reroll this one later.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] exclude: reuse last basename comparison
  2012-06-07  7:53 ` [PATCH 4/4] exclude: reuse last basename comparison Nguyễn Thái Ngọc Duy
  2012-06-07 18:28   ` Junio C Hamano
@ 2012-06-07 18:40   ` Junio C Hamano
  2012-06-08  2:37     ` Nguyen Thai Ngoc Duy
  1 sibling, 1 reply; 7+ messages in thread
From: Junio C Hamano @ 2012-06-07 18:40 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:

> diff --git a/dir.c b/dir.c
> index 94fe9f8..2964076 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -511,6 +511,7 @@ int excluded_from_list(const char *pathname,
>  		       struct exclude_list *el)
>  {
>  	int i;
> +	int last_basecmp = el->nr, basecmp_result;

Gcc complains that basecmp_result may be used without initialized in
this function.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 4/4] exclude: reuse last basename comparison
  2012-06-07 18:40   ` Junio C Hamano
@ 2012-06-08  2:37     ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 7+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-06-08  2:37 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Fri, Jun 8, 2012 at 1:40 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy <pclouds@gmail.com> writes:
>
>> diff --git a/dir.c b/dir.c
>> index 94fe9f8..2964076 100644
>> --- a/dir.c
>> +++ b/dir.c
>> @@ -511,6 +511,7 @@ int excluded_from_list(const char *pathname,
>>                      struct exclude_list *el)
>>  {
>>       int i;
>> +     int last_basecmp = el->nr, basecmp_result;
>
> Gcc complains that basecmp_result may be used without initialized in
> this function.

I'm pretty sure it's fault positive.
-- 
Duy

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2012-06-08  2:38 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-07  7:53 [PATCH 1/4] Unindent excluded_from_list() Nguyễn Thái Ngọc Duy
2012-06-07  7:53 ` [PATCH 2/4] dir.c: get rid of the wildcard symbol set in no_wildcard() Nguyễn Thái Ngọc Duy
2012-06-07  7:53 ` [PATCH 3/4] exclude: do strcmp as much as possible before fnmatch Nguyễn Thái Ngọc Duy
2012-06-07  7:53 ` [PATCH 4/4] exclude: reuse last basename comparison Nguyễn Thái Ngọc Duy
2012-06-07 18:28   ` Junio C Hamano
2012-06-07 18:40   ` Junio C Hamano
2012-06-08  2:37     ` Nguyen Thai Ngoc Duy

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).