All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 00/12] nd/wildmatch
@ 2012-10-14  2:34 Nguyễn Thái Ngọc Duy
  2012-10-14  2:34 ` [PATCH v5 01/12] ctype: make sane_ctype[] const array Nguyễn Thái Ngọc Duy
                   ` (11 more replies)
  0 siblings, 12 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:34 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

This version splits fnmatch/wildmatch tests separately in t3070 and
disables a lot more fnmatch tests. It also fixes the "cd .." in t0003
test and a comment in front of dowild(). No functional changes.

Nguyễn Thái Ngọc Duy (12):
  ctype: make sane_ctype[] const array
  ctype: support iscntrl, ispunct, isxdigit and isprint
  Import wildmatch from rsync
  wildmatch: remove unnecessary functions
  Integrate wildmatch to git
  t3070: disable unreliable fnmatch tests
  wildmatch: make wildmatch's return value compatible with fnmatch
  wildmatch: remove static variable force_lower_case
  wildmatch: fix case-insensitive matching
  wildmatch: adjust "**" behavior
  wildmatch: make /**/ match zero or more directories
  Support "**" wildcard in .gitignore and .gitattributes

 .gitignore                         |   1 +
 Documentation/gitignore.txt        |  19 +++
 Makefile                           |   3 +
 attr.c                             |   4 +-
 ctype.c                            |  20 +++-
 dir.c                              |   4 +-
 git-compat-util.h                  |  15 ++-
 t/t0003-attributes.sh              |  37 ++++++
 t/t3001-ls-files-others-exclude.sh |  19 +++
 t/t3070-wildmatch.sh               | 195 ++++++++++++++++++++++++++++++
 test-wildmatch.c                   |  14 +++
 wildmatch.c                        | 239 +++++++++++++++++++++++++++++++++++++
 wildmatch.h                        |   9 ++
 13 files changed, 575 insertions(+), 4 deletions(-)
 create mode 100755 t/t3070-wildmatch.sh
 create mode 100644 test-wildmatch.c
 create mode 100644 wildmatch.c
 create mode 100644 wildmatch.h

-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH v5 01/12] ctype: make sane_ctype[] const array
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:34 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
                   ` (10 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:34 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 ctype.c           | 2 +-
 git-compat-util.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ctype.c b/ctype.c
index 9353271..faeaf34 100644
--- a/ctype.c
+++ b/ctype.c
@@ -14,7 +14,7 @@ enum {
 	P = GIT_PATHSPEC_MAGIC  /* other non-alnum, except for ] and } */
 };
 
-unsigned char sane_ctype[256] = {
+const unsigned char sane_ctype[256] = {
 	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
 	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
diff --git a/git-compat-util.h b/git-compat-util.h
index 2fbf1fd..f8b859c 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -510,7 +510,7 @@ extern const char tolower_trans_tbl[256];
 #undef isupper
 #undef tolower
 #undef toupper
-extern unsigned char sane_ctype[256];
+extern const unsigned char sane_ctype[256];
 #define GIT_SPACE 0x01
 #define GIT_DIGIT 0x02
 #define GIT_ALPHA 0x04
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
  2012-10-14  2:34 ` [PATCH v5 01/12] ctype: make sane_ctype[] const array Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  5:02   ` Junio C Hamano
  2012-10-14 12:59   ` René Scharfe
  2012-10-14  2:35 ` [PATCH v5 03/12] Import wildmatch from rsync Nguyễn Thái Ngọc Duy
                   ` (9 subsequent siblings)
  11 siblings, 2 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 ctype.c           | 18 ++++++++++++++++++
 git-compat-util.h | 13 +++++++++++++
 2 files changed, 31 insertions(+)

diff --git a/ctype.c b/ctype.c
index faeaf34..b4bf48a 100644
--- a/ctype.c
+++ b/ctype.c
@@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
 	/* Nothing in the 128.. range */
 };
 
+enum {
+	CN = GIT_CNTRL,
+	PU = GIT_PUNCT,
+	XD = GIT_XDIGIT,
+};
+
+const unsigned char sane_ctype2[256] = {
+	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*    0..15 */
+	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   16..31 */
+	0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   32..47 */
+	XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   48..63 */
+	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*   64..79 */
+	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   80..95 */
+	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*  96..111 */
+	0,  0,	0,  0,	0,  0,	0,  0,	0,  0,	0,  PU, PU, PU, PU, CN, /* 112..127 */
+	/* Nothing in the 128.. range */
+};
+
 /* For case-insensitive kwset */
 const char tolower_trans_tbl[256] = {
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
diff --git a/git-compat-util.h b/git-compat-util.h
index f8b859c..ea11694 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -510,14 +510,23 @@ extern const char tolower_trans_tbl[256];
 #undef isupper
 #undef tolower
 #undef toupper
+#undef iscntrl
+#undef ispunct
+#undef isxdigit
+#undef isprint
 extern const unsigned char sane_ctype[256];
+extern const unsigned char sane_ctype2[256];
 #define GIT_SPACE 0x01
 #define GIT_DIGIT 0x02
 #define GIT_ALPHA 0x04
 #define GIT_GLOB_SPECIAL 0x08
 #define GIT_REGEX_SPECIAL 0x10
 #define GIT_PATHSPEC_MAGIC 0x20
+#define GIT_CNTRL 0x01
+#define GIT_PUNCT 0x02
+#define GIT_XDIGIT 0x04
 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define sane_istest2(x,mask) ((sane_ctype2[(unsigned char)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
 #define isdigit(x) sane_istest(x,GIT_DIGIT)
@@ -527,6 +536,10 @@ extern const unsigned char sane_ctype[256];
 #define isupper(x) sane_iscase(x, 0)
 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
+#define ispunct(x) sane_istest2(x, GIT_PUNCT)
+#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
+#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 03/12] Import wildmatch from rsync
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
  2012-10-14  2:34 ` [PATCH v5 01/12] ctype: make sane_ctype[] const array Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 04/12] wildmatch: remove unnecessary functions Nguyễn Thái Ngọc Duy
                   ` (8 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 16393 bytes --]

These files are from rsync.git commit
f92f5b166e3019db42bc7fe1aa2f1a9178cd215d, which was the last commit
before rsync turned GPL-3. All files are imported as-is and
no-op. Adaptation is done in a separate patch.

rsync.git           ->  git.git
lib/wildmatch.[ch]      wildmatch.[ch]
wildtest.txt            t/t3070/wildtest.txt

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 t/t3070/wildtest.txt | 165 +++++++++++++++++++++++
 wildmatch.c          | 368 +++++++++++++++++++++++++++++++++++++++++++++++++++
 wildmatch.h          |   6 +
 3 files changed, 539 insertions(+)
 create mode 100644 t/t3070/wildtest.txt
 create mode 100644 wildmatch.c
 create mode 100644 wildmatch.h

diff --git a/t/t3070/wildtest.txt b/t/t3070/wildtest.txt
new file mode 100644
index 0000000..42c1678
--- /dev/null
+++ b/t/t3070/wildtest.txt
@@ -0,0 +1,165 @@
+# Input is in the following format (all items white-space separated):
+#
+# The first two items are 1 or 0 indicating if the wildmat call is expected to
+# succeed and if fnmatch works the same way as wildmat, respectively.  After
+# that is a text string for the match, and a pattern string.  Strings can be
+# quoted (if desired) in either double or single quotes, as well as backticks.
+#
+# MATCH FNMATCH_SAME "text to match" 'pattern to use'
+
+# Basic wildmat features
+1 1 foo			foo
+0 1 foo			bar
+1 1 ''			""
+1 1 foo			???
+0 1 foo			??
+1 1 foo			*
+1 1 foo			f*
+0 1 foo			*f
+1 1 foo			*foo*
+1 1 foobar		*ob*a*r*
+1 1 aaaaaaabababab	*ab
+1 1 foo*		foo\*
+0 1 foobar		foo\*bar
+1 1 f\oo		f\\oo
+1 1 ball		*[al]?
+0 1 ten			[ten]
+1 1 ten			**[!te]
+0 1 ten			**[!ten]
+1 1 ten			t[a-g]n
+0 1 ten			t[!a-g]n
+1 1 ton			t[!a-g]n
+1 1 ton			t[^a-g]n
+1 1 a]b			a[]]b
+1 1 a-b			a[]-]b
+1 1 a]b			a[]-]b
+0 1 aab			a[]-]b
+1 1 aab			a[]a-]b
+1 1 ]			]
+
+# Extended slash-matching features
+0 1 foo/baz/bar		foo*bar
+1 1 foo/baz/bar		foo**bar
+0 1 foo/bar		foo?bar
+0 1 foo/bar		foo[/]bar
+0 1 foo/bar		f[^eiu][^eiu][^eiu][^eiu][^eiu]r
+1 1 foo-bar		f[^eiu][^eiu][^eiu][^eiu][^eiu]r
+0 1 foo			**/foo
+1 1 /foo		**/foo
+1 1 bar/baz/foo		**/foo
+0 1 bar/baz/foo		*/foo
+0 0 foo/bar/baz		**/bar*
+1 1 deep/foo/bar/baz	**/bar/*
+0 1 deep/foo/bar/baz/	**/bar/*
+1 1 deep/foo/bar/baz/	**/bar/**
+0 1 deep/foo/bar	**/bar/*
+1 1 deep/foo/bar/	**/bar/**
+1 1 foo/bar/baz		**/bar**
+1 1 foo/bar/baz/x	*/bar/**
+0 0 deep/foo/bar/baz/x	*/bar/**
+1 1 deep/foo/bar/baz/x	**/bar/*/*
+
+# Various additional tests
+0 1 acrt		a[c-c]st
+1 1 acrt		a[c-c]rt
+0 1 ]			[!]-]
+1 1 a			[!]-]
+0 1 ''			\
+0 1 \			\
+0 1 /\			*/\
+1 1 /\			*/\\
+1 1 foo			foo
+1 1 @foo		@foo
+0 1 foo			@foo
+1 1 [ab]		\[ab]
+1 1 [ab]		[[]ab]
+1 1 [ab]		[[:]ab]
+0 1 [ab]		[[::]ab]
+1 1 [ab]		[[:digit]ab]
+1 1 [ab]		[\[:]ab]
+1 1 ?a?b		\??\?b
+1 1 abc			\a\b\c
+0 1 foo			''
+1 1 foo/bar/baz/to	**/t[o]
+
+# Character class tests
+1 1 a1B		[[:alpha:]][[:digit:]][[:upper:]]
+0 1 a		[[:digit:][:upper:][:space:]]
+1 1 A		[[:digit:][:upper:][:space:]]
+1 1 1		[[:digit:][:upper:][:space:]]
+0 1 1		[[:digit:][:upper:][:spaci:]]
+1 1 ' '		[[:digit:][:upper:][:space:]]
+0 1 .		[[:digit:][:upper:][:space:]]
+1 1 .		[[:digit:][:punct:][:space:]]
+1 1 5		[[:xdigit:]]
+1 1 f		[[:xdigit:]]
+1 1 D		[[:xdigit:]]
+1 1 _		[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
+#1 1 …		[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
+1 1 \x7f		[^[:alnum:][:alpha:][:blank:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
+1 1 .		[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]
+1 1 5		[a-c[:digit:]x-z]
+1 1 b		[a-c[:digit:]x-z]
+1 1 y		[a-c[:digit:]x-z]
+0 1 q		[a-c[:digit:]x-z]
+
+# Additional tests, including some malformed wildmats
+1 1 ]		[\\-^]
+0 1 [		[\\-^]
+1 1 -		[\-_]
+1 1 ]		[\]]
+0 1 \]		[\]]
+0 1 \		[\]]
+0 1 ab		a[]b
+0 1 a[]b	a[]b
+0 1 ab[		ab[
+0 1 ab		[!
+0 1 ab		[-
+1 1 -		[-]
+0 1 -		[a-
+0 1 -		[!a-
+1 1 -		[--A]
+1 1 5		[--A]
+1 1 ' '		'[ --]'
+1 1 $		'[ --]'
+1 1 -		'[ --]'
+0 1 0		'[ --]'
+1 1 -		[---]
+1 1 -		[------]
+0 1 j		[a-e-n]
+1 1 -		[a-e-n]
+1 1 a		[!------]
+0 1 [		[]-a]
+1 1 ^		[]-a]
+0 1 ^		[!]-a]
+1 1 [		[!]-a]
+1 1 ^		[a^bc]
+1 1 -b]		[a-]b]
+0 1 \		[\]
+1 1 \		[\\]
+0 1 \		[!\\]
+1 1 G		[A-\\]
+0 1 aaabbb	b*a
+0 1 aabcaa	*ba*
+1 1 ,		[,]
+1 1 ,		[\\,]
+1 1 \		[\\,]
+1 1 -		[,-.]
+0 1 +		[,-.]
+0 1 -.]		[,-.]
+1 1 2		[\1-\3]
+1 1 3		[\1-\3]
+0 1 4		[\1-\3]
+1 1 \		[[-\]]
+1 1 [		[[-\]]
+1 1 ]		[[-\]]
+0 1 -		[[-\]]
+
+# Test recursion and the abort code (use "wildtest -i" to see iteration counts)
+1 1 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
+0 1 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
+0 1 -adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
+1 1 /adobe/courier/bold/o/normal//12/120/75/75/m/70/iso8859/1	/*/*/*/*/*/*/12/*/*/*/m/*/*/*
+0 1 /adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1	/*/*/*/*/*/*/12/*/*/*/m/*/*/*
+1 1 abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt		**/*a*b*g*n*t
+0 1 abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz		**/*a*b*g*n*t
diff --git a/wildmatch.c b/wildmatch.c
new file mode 100644
index 0000000..f3a1731
--- /dev/null
+++ b/wildmatch.c
@@ -0,0 +1,368 @@
+/*
+**  Do shell-style pattern matching for ?, \, [], and * characters.
+**  It is 8bit clean.
+**
+**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
+**  Rich $alz is now <rsalz@bbn.com>.
+**
+**  Modified by Wayne Davison to special-case '/' matching, to make '**'
+**  work differently than '*', and to fix the character-class code.
+*/
+
+#include "rsync.h"
+
+/* What character marks an inverted character class? */
+#define NEGATE_CLASS	'!'
+#define NEGATE_CLASS2	'^'
+
+#define FALSE 0
+#define TRUE 1
+#define ABORT_ALL -1
+#define ABORT_TO_STARSTAR -2
+
+#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
+				    && *(class) == *(litmatch) \
+				    && strncmp((char*)class, litmatch, len) == 0)
+
+#if defined STDC_HEADERS || !defined isascii
+# define ISASCII(c) 1
+#else
+# define ISASCII(c) isascii(c)
+#endif
+
+#ifdef isblank
+# define ISBLANK(c) (ISASCII(c) && isblank(c))
+#else
+# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
+#endif
+
+#ifdef isgraph
+# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
+#else
+# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
+#endif
+
+#define ISPRINT(c) (ISASCII(c) && isprint(c))
+#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
+#define ISALNUM(c) (ISASCII(c) && isalnum(c))
+#define ISALPHA(c) (ISASCII(c) && isalpha(c))
+#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
+#define ISLOWER(c) (ISASCII(c) && islower(c))
+#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
+#define ISSPACE(c) (ISASCII(c) && isspace(c))
+#define ISUPPER(c) (ISASCII(c) && isupper(c))
+#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
+
+#ifdef WILD_TEST_ITERATIONS
+int wildmatch_iteration_count;
+#endif
+
+static int force_lower_case = 0;
+
+/* Match pattern "p" against the a virtually-joined string consisting
+ * of "text" and any strings in array "a". */
+static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
+{
+    uchar p_ch;
+
+#ifdef WILD_TEST_ITERATIONS
+    wildmatch_iteration_count++;
+#endif
+
+    for ( ; (p_ch = *p) != '\0'; text++, p++) {
+	int matched, special;
+	uchar t_ch, prev_ch;
+	while ((t_ch = *text) == '\0') {
+	    if (*a == NULL) {
+		if (p_ch != '*')
+		    return ABORT_ALL;
+		break;
+	    }
+	    text = *a++;
+	}
+	if (force_lower_case && ISUPPER(t_ch))
+	    t_ch = tolower(t_ch);
+	switch (p_ch) {
+	  case '\\':
+	    /* Literal match with following character.  Note that the test
+	     * in "default" handles the p[1] == '\0' failure case. */
+	    p_ch = *++p;
+	    /* FALLTHROUGH */
+	  default:
+	    if (t_ch != p_ch)
+		return FALSE;
+	    continue;
+	  case '?':
+	    /* Match anything but '/'. */
+	    if (t_ch == '/')
+		return FALSE;
+	    continue;
+	  case '*':
+	    if (*++p == '*') {
+		while (*++p == '*') {}
+		special = TRUE;
+	    } else
+		special = FALSE;
+	    if (*p == '\0') {
+		/* Trailing "**" matches everything.  Trailing "*" matches
+		 * only if there are no more slash characters. */
+		if (!special) {
+		    do {
+			if (strchr((char*)text, '/') != NULL)
+			    return FALSE;
+		    } while ((text = *a++) != NULL);
+		}
+		return TRUE;
+	    }
+	    while (1) {
+		if (t_ch == '\0') {
+		    if ((text = *a++) == NULL)
+			break;
+		    t_ch = *text;
+		    continue;
+		}
+		if ((matched = dowild(p, text, a)) != FALSE) {
+		    if (!special || matched != ABORT_TO_STARSTAR)
+			return matched;
+		} else if (!special && t_ch == '/')
+		    return ABORT_TO_STARSTAR;
+		t_ch = *++text;
+	    }
+	    return ABORT_ALL;
+	  case '[':
+	    p_ch = *++p;
+#ifdef NEGATE_CLASS2
+	    if (p_ch == NEGATE_CLASS2)
+		p_ch = NEGATE_CLASS;
+#endif
+	    /* Assign literal TRUE/FALSE because of "matched" comparison. */
+	    special = p_ch == NEGATE_CLASS? TRUE : FALSE;
+	    if (special) {
+		/* Inverted character class. */
+		p_ch = *++p;
+	    }
+	    prev_ch = 0;
+	    matched = FALSE;
+	    do {
+		if (!p_ch)
+		    return ABORT_ALL;
+		if (p_ch == '\\') {
+		    p_ch = *++p;
+		    if (!p_ch)
+			return ABORT_ALL;
+		    if (t_ch == p_ch)
+			matched = TRUE;
+		} else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
+		    p_ch = *++p;
+		    if (p_ch == '\\') {
+			p_ch = *++p;
+			if (!p_ch)
+			    return ABORT_ALL;
+		    }
+		    if (t_ch <= p_ch && t_ch >= prev_ch)
+			matched = TRUE;
+		    p_ch = 0; /* This makes "prev_ch" get set to 0. */
+		} else if (p_ch == '[' && p[1] == ':') {
+		    const uchar *s;
+		    int i;
+		    for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
+		    if (!p_ch)
+			return ABORT_ALL;
+		    i = p - s - 1;
+		    if (i < 0 || p[-1] != ':') {
+			/* Didn't find ":]", so treat like a normal set. */
+			p = s - 2;
+			p_ch = '[';
+			if (t_ch == p_ch)
+			    matched = TRUE;
+			continue;
+		    }
+		    if (CC_EQ(s,i, "alnum")) {
+			if (ISALNUM(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "alpha")) {
+			if (ISALPHA(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "blank")) {
+			if (ISBLANK(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "cntrl")) {
+			if (ISCNTRL(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "digit")) {
+			if (ISDIGIT(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "graph")) {
+			if (ISGRAPH(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "lower")) {
+			if (ISLOWER(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "print")) {
+			if (ISPRINT(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "punct")) {
+			if (ISPUNCT(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "space")) {
+			if (ISSPACE(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "upper")) {
+			if (ISUPPER(t_ch))
+			    matched = TRUE;
+		    } else if (CC_EQ(s,i, "xdigit")) {
+			if (ISXDIGIT(t_ch))
+			    matched = TRUE;
+		    } else /* malformed [:class:] string */
+			return ABORT_ALL;
+		    p_ch = 0; /* This makes "prev_ch" get set to 0. */
+		} else if (t_ch == p_ch)
+		    matched = TRUE;
+	    } while (prev_ch = p_ch, (p_ch = *++p) != ']');
+	    if (matched == special || t_ch == '/')
+		return FALSE;
+	    continue;
+	}
+    }
+
+    do {
+	if (*text)
+	    return FALSE;
+    } while ((text = *a++) != NULL);
+
+    return TRUE;
+}
+
+/* Match literal string "s" against the a virtually-joined string consisting
+ * of "text" and any strings in array "a". */
+static int doliteral(const uchar *s, const uchar *text, const uchar*const *a)
+{
+    for ( ; *s != '\0'; text++, s++) {
+	while (*text == '\0') {
+	    if ((text = *a++) == NULL)
+		return FALSE;
+	}
+	if (*text != *s)
+	    return FALSE;
+    }
+
+    do {
+	if (*text)
+	    return FALSE;
+    } while ((text = *a++) != NULL);
+
+    return TRUE;
+}
+
+/* Return the last "count" path elements from the concatenated string.
+ * We return a string pointer to the start of the string, and update the
+ * array pointer-pointer to point to any remaining string elements. */
+static const uchar *trailing_N_elements(const uchar*const **a_ptr, int count)
+{
+    const uchar*const *a = *a_ptr;
+    const uchar*const *first_a = a;
+
+    while (*a)
+	    a++;
+
+    while (a != first_a) {
+	const uchar *s = *--a;
+	s += strlen((char*)s);
+	while (--s >= *a) {
+	    if (*s == '/' && !--count) {
+		*a_ptr = a+1;
+		return s+1;
+	    }
+	}
+    }
+
+    if (count == 1) {
+	*a_ptr = a+1;
+	return *a;
+    }
+
+    return NULL;
+}
+
+/* Match the "pattern" against the "text" string. */
+int wildmatch(const char *pattern, const char *text)
+{
+    static const uchar *nomore[1]; /* A NULL pointer. */
+#ifdef WILD_TEST_ITERATIONS
+    wildmatch_iteration_count = 0;
+#endif
+    return dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
+}
+
+/* Match the "pattern" against the forced-to-lower-case "text" string. */
+int iwildmatch(const char *pattern, const char *text)
+{
+    static const uchar *nomore[1]; /* A NULL pointer. */
+    int ret;
+#ifdef WILD_TEST_ITERATIONS
+    wildmatch_iteration_count = 0;
+#endif
+    force_lower_case = 1;
+    ret = dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
+    force_lower_case = 0;
+    return ret;
+}
+
+/* Match pattern "p" against the a virtually-joined string consisting
+ * of all the pointers in array "texts" (which has a NULL pointer at the
+ * end).  The int "where" can be 0 (normal matching), > 0 (match only
+ * the trailing N slash-separated filename components of "texts"), or < 0
+ * (match the "pattern" at the start or after any slash in "texts"). */
+int wildmatch_array(const char *pattern, const char*const *texts, int where)
+{
+    const uchar *p = (const uchar*)pattern;
+    const uchar*const *a = (const uchar*const*)texts;
+    const uchar *text;
+    int matched;
+
+#ifdef WILD_TEST_ITERATIONS
+    wildmatch_iteration_count = 0;
+#endif
+
+    if (where > 0)
+	text = trailing_N_elements(&a, where);
+    else
+	text = *a++;
+    if (!text)
+	return FALSE;
+
+    if ((matched = dowild(p, text, a)) != TRUE && where < 0
+     && matched != ABORT_ALL) {
+	while (1) {
+	    if (*text == '\0') {
+		if ((text = (uchar*)*a++) == NULL)
+		    return FALSE;
+		continue;
+	    }
+	    if (*text++ == '/' && (matched = dowild(p, text, a)) != FALSE
+	     && matched != ABORT_TO_STARSTAR)
+		break;
+	}
+    }
+    return matched == TRUE;
+}
+
+/* Match literal string "s" against the a virtually-joined string consisting
+ * of all the pointers in array "texts" (which has a NULL pointer at the
+ * end).  The int "where" can be 0 (normal matching), or > 0 (match
+ * only the trailing N slash-separated filename components of "texts"). */
+int litmatch_array(const char *string, const char*const *texts, int where)
+{
+    const uchar *s = (const uchar*)string;
+    const uchar*const *a = (const uchar* const*)texts;
+    const uchar *text;
+
+    if (where > 0)
+	text = trailing_N_elements(&a, where);
+    else
+	text = *a++;
+    if (!text)
+	return FALSE;
+
+    return doliteral(s, text, a) == TRUE;
+}
diff --git a/wildmatch.h b/wildmatch.h
new file mode 100644
index 0000000..e7f1a35
--- /dev/null
+++ b/wildmatch.h
@@ -0,0 +1,6 @@
+/* wildmatch.h */
+
+int wildmatch(const char *pattern, const char *text);
+int iwildmatch(const char *pattern, const char *text);
+int wildmatch_array(const char *pattern, const char*const *texts, int where);
+int litmatch_array(const char *string, const char*const *texts, int where);
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 04/12] wildmatch: remove unnecessary functions
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (2 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 03/12] Import wildmatch from rsync Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  5:04   ` Junio C Hamano
  2012-10-14  2:35 ` [PATCH v5 05/12] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
                   ` (7 subsequent siblings)
  11 siblings, 1 reply; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 wildmatch.c | 164 ++++--------------------------------------------------------
 wildmatch.h |   2 -
 2 files changed, 10 insertions(+), 156 deletions(-)

diff --git a/wildmatch.c b/wildmatch.c
index f3a1731..fae7397 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -53,33 +53,18 @@
 #define ISUPPER(c) (ISASCII(c) && isupper(c))
 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
 
-#ifdef WILD_TEST_ITERATIONS
-int wildmatch_iteration_count;
-#endif
-
 static int force_lower_case = 0;
 
-/* Match pattern "p" against the a virtually-joined string consisting
- * of "text" and any strings in array "a". */
-static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
+/* Match pattern "p" against "text" */
+static int dowild(const uchar *p, const uchar *text)
 {
     uchar p_ch;
 
-#ifdef WILD_TEST_ITERATIONS
-    wildmatch_iteration_count++;
-#endif
-
     for ( ; (p_ch = *p) != '\0'; text++, p++) {
 	int matched, special;
 	uchar t_ch, prev_ch;
-	while ((t_ch = *text) == '\0') {
-	    if (*a == NULL) {
-		if (p_ch != '*')
-		    return ABORT_ALL;
-		break;
-	    }
-	    text = *a++;
-	}
+	if ((t_ch = *text) == '\0' && p_ch != '*')
+		return ABORT_ALL;
 	if (force_lower_case && ISUPPER(t_ch))
 	    t_ch = tolower(t_ch);
 	switch (p_ch) {
@@ -107,21 +92,15 @@ static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
 		/* Trailing "**" matches everything.  Trailing "*" matches
 		 * only if there are no more slash characters. */
 		if (!special) {
-		    do {
 			if (strchr((char*)text, '/') != NULL)
 			    return FALSE;
-		    } while ((text = *a++) != NULL);
 		}
 		return TRUE;
 	    }
 	    while (1) {
-		if (t_ch == '\0') {
-		    if ((text = *a++) == NULL)
-			break;
-		    t_ch = *text;
-		    continue;
-		}
-		if ((matched = dowild(p, text, a)) != FALSE) {
+		if (t_ch == '\0')
+		    break;
+		if ((matched = dowild(p, text)) != FALSE) {
 		    if (!special || matched != ABORT_TO_STARSTAR)
 			return matched;
 		} else if (!special && t_ch == '/')
@@ -225,144 +204,21 @@ static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
 	}
     }
 
-    do {
-	if (*text)
-	    return FALSE;
-    } while ((text = *a++) != NULL);
-
-    return TRUE;
-}
-
-/* Match literal string "s" against the a virtually-joined string consisting
- * of "text" and any strings in array "a". */
-static int doliteral(const uchar *s, const uchar *text, const uchar*const *a)
-{
-    for ( ; *s != '\0'; text++, s++) {
-	while (*text == '\0') {
-	    if ((text = *a++) == NULL)
-		return FALSE;
-	}
-	if (*text != *s)
-	    return FALSE;
-    }
-
-    do {
-	if (*text)
-	    return FALSE;
-    } while ((text = *a++) != NULL);
-
-    return TRUE;
-}
-
-/* Return the last "count" path elements from the concatenated string.
- * We return a string pointer to the start of the string, and update the
- * array pointer-pointer to point to any remaining string elements. */
-static const uchar *trailing_N_elements(const uchar*const **a_ptr, int count)
-{
-    const uchar*const *a = *a_ptr;
-    const uchar*const *first_a = a;
-
-    while (*a)
-	    a++;
-
-    while (a != first_a) {
-	const uchar *s = *--a;
-	s += strlen((char*)s);
-	while (--s >= *a) {
-	    if (*s == '/' && !--count) {
-		*a_ptr = a+1;
-		return s+1;
-	    }
-	}
-    }
-
-    if (count == 1) {
-	*a_ptr = a+1;
-	return *a;
-    }
-
-    return NULL;
+    return *text ? FALSE : TRUE;
 }
 
 /* Match the "pattern" against the "text" string. */
 int wildmatch(const char *pattern, const char *text)
 {
-    static const uchar *nomore[1]; /* A NULL pointer. */
-#ifdef WILD_TEST_ITERATIONS
-    wildmatch_iteration_count = 0;
-#endif
-    return dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
+    return dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
 }
 
 /* Match the "pattern" against the forced-to-lower-case "text" string. */
 int iwildmatch(const char *pattern, const char *text)
 {
-    static const uchar *nomore[1]; /* A NULL pointer. */
     int ret;
-#ifdef WILD_TEST_ITERATIONS
-    wildmatch_iteration_count = 0;
-#endif
     force_lower_case = 1;
-    ret = dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
+    ret = dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
     force_lower_case = 0;
     return ret;
 }
-
-/* Match pattern "p" against the a virtually-joined string consisting
- * of all the pointers in array "texts" (which has a NULL pointer at the
- * end).  The int "where" can be 0 (normal matching), > 0 (match only
- * the trailing N slash-separated filename components of "texts"), or < 0
- * (match the "pattern" at the start or after any slash in "texts"). */
-int wildmatch_array(const char *pattern, const char*const *texts, int where)
-{
-    const uchar *p = (const uchar*)pattern;
-    const uchar*const *a = (const uchar*const*)texts;
-    const uchar *text;
-    int matched;
-
-#ifdef WILD_TEST_ITERATIONS
-    wildmatch_iteration_count = 0;
-#endif
-
-    if (where > 0)
-	text = trailing_N_elements(&a, where);
-    else
-	text = *a++;
-    if (!text)
-	return FALSE;
-
-    if ((matched = dowild(p, text, a)) != TRUE && where < 0
-     && matched != ABORT_ALL) {
-	while (1) {
-	    if (*text == '\0') {
-		if ((text = (uchar*)*a++) == NULL)
-		    return FALSE;
-		continue;
-	    }
-	    if (*text++ == '/' && (matched = dowild(p, text, a)) != FALSE
-	     && matched != ABORT_TO_STARSTAR)
-		break;
-	}
-    }
-    return matched == TRUE;
-}
-
-/* Match literal string "s" against the a virtually-joined string consisting
- * of all the pointers in array "texts" (which has a NULL pointer at the
- * end).  The int "where" can be 0 (normal matching), or > 0 (match
- * only the trailing N slash-separated filename components of "texts"). */
-int litmatch_array(const char *string, const char*const *texts, int where)
-{
-    const uchar *s = (const uchar*)string;
-    const uchar*const *a = (const uchar* const*)texts;
-    const uchar *text;
-
-    if (where > 0)
-	text = trailing_N_elements(&a, where);
-    else
-	text = *a++;
-    if (!text)
-	return FALSE;
-
-    return doliteral(s, text, a) == TRUE;
-}
diff --git a/wildmatch.h b/wildmatch.h
index e7f1a35..562faa3 100644
--- a/wildmatch.h
+++ b/wildmatch.h
@@ -2,5 +2,3 @@
 
 int wildmatch(const char *pattern, const char *text);
 int iwildmatch(const char *pattern, const char *text);
-int wildmatch_array(const char *pattern, const char*const *texts, int where);
-int litmatch_array(const char *string, const char*const *texts, int where);
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 05/12] Integrate wildmatch to git
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (3 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 04/12] wildmatch: remove unnecessary functions Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  5:06   ` Junio C Hamano
  2012-10-14 11:07   ` Torsten Bögershausen
  2012-10-14  2:35 ` [PATCH v5 06/12] t3070: disable unreliable fnmatch tests Nguyễn Thái Ngọc Duy
                   ` (6 subsequent siblings)
  11 siblings, 2 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 13383 bytes --]

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 .gitignore           |   1 +
 Makefile             |   3 +
 t/t3070-wildmatch.sh | 188 +++++++++++++++++++++++++++++++++++++++++++++++++++
 t/t3070/wildtest.txt | 165 --------------------------------------------
 test-wildmatch.c     |  14 ++++
 wildmatch.c          |   5 +-
 6 files changed, 210 insertions(+), 166 deletions(-)
 create mode 100755 t/t3070-wildmatch.sh
 delete mode 100644 t/t3070/wildtest.txt
 create mode 100644 test-wildmatch.c

diff --git a/.gitignore b/.gitignore
index a188a82..37c3507 100644
--- a/.gitignore
+++ b/.gitignore
@@ -197,6 +197,7 @@
 /test-string-list
 /test-subprocess
 /test-svn-fe
+/test-wildmatch
 /common-cmds.h
 *.tar.gz
 *.dsc
diff --git a/Makefile b/Makefile
index f69979e..c752673 100644
--- a/Makefile
+++ b/Makefile
@@ -523,6 +523,7 @@ TEST_PROGRAMS_NEED_X += test-sigchain
 TEST_PROGRAMS_NEED_X += test-string-list
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
+TEST_PROGRAMS_NEED_X += test-wildmatch
 
 TEST_PROGRAMS = $(patsubst %,%$X,$(TEST_PROGRAMS_NEED_X))
 
@@ -695,6 +696,7 @@ LIB_H += userdiff.h
 LIB_H += utf8.h
 LIB_H += varint.h
 LIB_H += walker.h
+LIB_H += wildmatch.h
 LIB_H += wt-status.h
 LIB_H += xdiff-interface.h
 LIB_H += xdiff/xdiff.h
@@ -826,6 +828,7 @@ LIB_OBJS += utf8.o
 LIB_OBJS += varint.o
 LIB_OBJS += version.o
 LIB_OBJS += walker.o
+LIB_OBJS += wildmatch.o
 LIB_OBJS += wrapper.o
 LIB_OBJS += write_or_die.o
 LIB_OBJS += ws.o
diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
new file mode 100755
index 0000000..dbd3c8b
--- /dev/null
+++ b/t/t3070-wildmatch.sh
@@ -0,0 +1,188 @@
+#!/bin/sh
+
+test_description='wildmatch tests'
+
+. ./test-lib.sh
+
+match() {
+    if [ $1 = 1 ]; then
+	test_expect_success "wildmatch:    match '$3' '$4'" "
+	    test-wildmatch wildmatch '$3' '$4'
+	"
+    else
+	test_expect_success "wildmatch: no match '$3' '$4'" "
+	    ! test-wildmatch wildmatch '$3' '$4'
+	"
+    fi
+    if [ $2 = 1 ]; then
+	test_expect_success "fnmatch:      match '$3' '$4'" "
+	    test-wildmatch fnmatch '$3' '$4'
+	"
+    elif [ $2 = 0 ]; then
+	test_expect_success "fnmatch:   no match '$3' '$4'" "
+	    ! test-wildmatch fnmatch '$3' '$4'
+	"
+#    else
+#	test_expect_success BROKEN_FNMATCH "fnmatch:       '$3' '$4'" "
+#	    ! test-wildmatch fnmatch '$3' '$4'
+#	"
+    fi
+}
+
+# Basic wildmat features
+match 1 1 foo foo
+match 0 0 foo bar
+match 1 1 '' ""
+match 1 1 foo '???'
+match 0 0 foo '??'
+match 1 1 foo '*'
+match 1 1 foo 'f*'
+match 0 0 foo '*f'
+match 1 1 foo '*foo*'
+match 1 1 foobar '*ob*a*r*'
+match 1 1 aaaaaaabababab '*ab'
+match 1 1 'foo*' 'foo\*'
+match 0 0 foobar 'foo\*bar'
+match 1 1 'f\oo' 'f\\oo'
+match 1 1 ball '*[al]?'
+match 0 0 ten '[ten]'
+match 1 1 ten '**[!te]'
+match 0 0 ten '**[!ten]'
+match 1 1 ten 't[a-g]n'
+match 0 0 ten 't[!a-g]n'
+match 1 1 ton 't[!a-g]n'
+match 1 1 ton 't[^a-g]n'
+match 1 1 'a]b' 'a[]]b'
+match 1 1 a-b 'a[]-]b'
+match 1 1 'a]b' 'a[]-]b'
+match 0 0 aab 'a[]-]b'
+match 1 1 aab 'a[]a-]b'
+match 1 1 ']' ']'
+
+# Extended slash-matching features
+match 0 0 'foo/baz/bar' 'foo*bar'
+match 1 0 'foo/baz/bar' 'foo**bar'
+match 0 0 'foo/bar' 'foo?bar'
+match 0 0 'foo/bar' 'foo[/]bar'
+match 0 0 'foo/bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
+match 1 1 'foo-bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
+match 0 0 'foo' '**/foo'
+match 1 1 '/foo' '**/foo'
+match 1 0 'bar/baz/foo' '**/foo'
+match 0 0 'bar/baz/foo' '*/foo'
+match 0 0 'foo/bar/baz' '**/bar*'
+match 1 0 'deep/foo/bar/baz' '**/bar/*'
+match 0 0 'deep/foo/bar/baz/' '**/bar/*'
+match 1 0 'deep/foo/bar/baz/' '**/bar/**'
+match 0 0 'deep/foo/bar' '**/bar/*'
+match 1 0 'deep/foo/bar/' '**/bar/**'
+match 1 0 'foo/bar/baz' '**/bar**'
+match 1 0 'foo/bar/baz/x' '*/bar/**'
+match 0 0 'deep/foo/bar/baz/x' '*/bar/**'
+match 1 0 'deep/foo/bar/baz/x' '**/bar/*/*'
+
+# Various additional tests
+match 0 0 'acrt' 'a[c-c]st'
+match 1 1 'acrt' 'a[c-c]rt'
+match 0 0 ']' '[!]-]'
+match 1 1 'a' '[!]-]'
+match 0 0 '' '\'
+match 0 0 '\' '\'
+match 0 0 '/\' '*/\'
+match 1 1 '/\' '*/\\'
+match 1 1 'foo' 'foo'
+match 1 1 '@foo' '@foo'
+match 0 0 'foo' '@foo'
+match 1 1 '[ab]' '\[ab]'
+match 1 1 '[ab]' '[[]ab]'
+match 1 1 '[ab]' '[[:]ab]'
+match 0 0 '[ab]' '[[::]ab]'
+match 1 1 '[ab]' '[[:digit]ab]'
+match 1 1 '[ab]' '[\[:]ab]'
+match 1 1 '?a?b' '\??\?b'
+match 1 1 'abc' '\a\b\c'
+match 0 0 'foo' ''
+match 1 0 'foo/bar/baz/to' '**/t[o]'
+
+# Character class tests
+match 1 1 'a1B' '[[:alpha:]][[:digit:]][[:upper:]]'
+match 0 0 'a' '[[:digit:][:upper:][:space:]]'
+match 1 1 'A' '[[:digit:][:upper:][:space:]]'
+match 1 0 '1' '[[:digit:][:upper:][:space:]]'
+match 0 0 '1' '[[:digit:][:upper:][:spaci:]]'
+match 1 1 ' ' '[[:digit:][:upper:][:space:]]'
+match 0 0 '.' '[[:digit:][:upper:][:space:]]'
+match 1 1 '.' '[[:digit:][:punct:][:space:]]'
+match 1 1 '5' '[[:xdigit:]]'
+match 1 1 'f' '[[:xdigit:]]'
+match 1 1 'D' '[[:xdigit:]]'
+match 1 0 '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
+match 1 0 '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
+match 1 1 '.' '[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]'
+match 1 1 '5' '[a-c[:digit:]x-z]'
+match 1 1 'b' '[a-c[:digit:]x-z]'
+match 1 1 'y' '[a-c[:digit:]x-z]'
+match 0 0 'q' '[a-c[:digit:]x-z]'
+
+# Additional tests, including some malformed wildmats
+match 1 1 ']' '[\\-^]'
+match 0 0 '[' '[\\-^]'
+match 1 1 '-' '[\-_]'
+match 1 1 ']' '[\]]'
+match 0 0 '\]' '[\]]'
+match 0 0 '\' '[\]]'
+match 0 0 'ab' 'a[]b'
+match 0 1 'a[]b' 'a[]b'
+match 0 1 'ab[' 'ab['
+match 0 0 'ab' '[!'
+match 0 0 'ab' '[-'
+match 1 1 '-' '[-]'
+match 0 0 '-' '[a-'
+match 0 0 '-' '[!a-'
+match 1 1 '-' '[--A]'
+match 1 1 '5' '[--A]'
+match 1 1 ' ' '[ --]'
+match 1 1 '$' '[ --]'
+match 1 1 '-' '[ --]'
+match 0 0 '0' '[ --]'
+match 1 1 '-' '[---]'
+match 1 1 '-' '[------]'
+match 0 0 'j' '[a-e-n]'
+match 1 1 '-' '[a-e-n]'
+match 1 1 'a' '[!------]'
+match 0 0 '[' '[]-a]'
+match 1 1 '^' '[]-a]'
+match 0 0 '^' '[!]-a]'
+match 1 1 '[' '[!]-a]'
+match 1 1 '^' '[a^bc]'
+match 1 1 '-b]' '[a-]b]'
+match 0 0 '\' '[\]'
+match 1 1 '\' '[\\]'
+match 0 0 '\' '[!\\]'
+match 1 1 'G' '[A-\\]'
+match 0 0 'aaabbb' 'b*a'
+match 0 0 'aabcaa' '*ba*'
+match 1 1 ',' '[,]'
+match 1 1 ',' '[\\,]'
+match 1 1 '\' '[\\,]'
+match 1 1 '-' '[,-.]'
+match 0 0 '+' '[,-.]'
+match 0 0 '-.]' '[,-.]'
+match 1 1 '2' '[\1-\3]'
+match 1 1 '3' '[\1-\3]'
+match 0 0 '4' '[\1-\3]'
+match 1 1 '\' '[[-\]]'
+match 1 1 '[' '[[-\]]'
+match 1 1 ']' '[[-\]]'
+match 0 0 '-' '[[-\]]'
+
+# Test recursion and the abort code (use "wildtest -i" to see iteration counts)
+match 1 1 '-adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1' '-*-*-*-*-*-*-12-*-*-*-m-*-*-*'
+match 0 0 '-adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1' '-*-*-*-*-*-*-12-*-*-*-m-*-*-*'
+match 0 0 '-adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1' '-*-*-*-*-*-*-12-*-*-*-m-*-*-*'
+match 1 1 '/adobe/courier/bold/o/normal//12/120/75/75/m/70/iso8859/1' '/*/*/*/*/*/*/12/*/*/*/m/*/*/*'
+match 0 0 '/adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1' '/*/*/*/*/*/*/12/*/*/*/m/*/*/*'
+match 1 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt' '**/*a*b*g*n*t'
+match 0 0 'abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz' '**/*a*b*g*n*t'
+
+test_done
diff --git a/t/t3070/wildtest.txt b/t/t3070/wildtest.txt
deleted file mode 100644
index 42c1678..0000000
--- a/t/t3070/wildtest.txt
+++ /dev/null
@@ -1,165 +0,0 @@
-# Input is in the following format (all items white-space separated):
-#
-# The first two items are 1 or 0 indicating if the wildmat call is expected to
-# succeed and if fnmatch works the same way as wildmat, respectively.  After
-# that is a text string for the match, and a pattern string.  Strings can be
-# quoted (if desired) in either double or single quotes, as well as backticks.
-#
-# MATCH FNMATCH_SAME "text to match" 'pattern to use'
-
-# Basic wildmat features
-1 1 foo			foo
-0 1 foo			bar
-1 1 ''			""
-1 1 foo			???
-0 1 foo			??
-1 1 foo			*
-1 1 foo			f*
-0 1 foo			*f
-1 1 foo			*foo*
-1 1 foobar		*ob*a*r*
-1 1 aaaaaaabababab	*ab
-1 1 foo*		foo\*
-0 1 foobar		foo\*bar
-1 1 f\oo		f\\oo
-1 1 ball		*[al]?
-0 1 ten			[ten]
-1 1 ten			**[!te]
-0 1 ten			**[!ten]
-1 1 ten			t[a-g]n
-0 1 ten			t[!a-g]n
-1 1 ton			t[!a-g]n
-1 1 ton			t[^a-g]n
-1 1 a]b			a[]]b
-1 1 a-b			a[]-]b
-1 1 a]b			a[]-]b
-0 1 aab			a[]-]b
-1 1 aab			a[]a-]b
-1 1 ]			]
-
-# Extended slash-matching features
-0 1 foo/baz/bar		foo*bar
-1 1 foo/baz/bar		foo**bar
-0 1 foo/bar		foo?bar
-0 1 foo/bar		foo[/]bar
-0 1 foo/bar		f[^eiu][^eiu][^eiu][^eiu][^eiu]r
-1 1 foo-bar		f[^eiu][^eiu][^eiu][^eiu][^eiu]r
-0 1 foo			**/foo
-1 1 /foo		**/foo
-1 1 bar/baz/foo		**/foo
-0 1 bar/baz/foo		*/foo
-0 0 foo/bar/baz		**/bar*
-1 1 deep/foo/bar/baz	**/bar/*
-0 1 deep/foo/bar/baz/	**/bar/*
-1 1 deep/foo/bar/baz/	**/bar/**
-0 1 deep/foo/bar	**/bar/*
-1 1 deep/foo/bar/	**/bar/**
-1 1 foo/bar/baz		**/bar**
-1 1 foo/bar/baz/x	*/bar/**
-0 0 deep/foo/bar/baz/x	*/bar/**
-1 1 deep/foo/bar/baz/x	**/bar/*/*
-
-# Various additional tests
-0 1 acrt		a[c-c]st
-1 1 acrt		a[c-c]rt
-0 1 ]			[!]-]
-1 1 a			[!]-]
-0 1 ''			\
-0 1 \			\
-0 1 /\			*/\
-1 1 /\			*/\\
-1 1 foo			foo
-1 1 @foo		@foo
-0 1 foo			@foo
-1 1 [ab]		\[ab]
-1 1 [ab]		[[]ab]
-1 1 [ab]		[[:]ab]
-0 1 [ab]		[[::]ab]
-1 1 [ab]		[[:digit]ab]
-1 1 [ab]		[\[:]ab]
-1 1 ?a?b		\??\?b
-1 1 abc			\a\b\c
-0 1 foo			''
-1 1 foo/bar/baz/to	**/t[o]
-
-# Character class tests
-1 1 a1B		[[:alpha:]][[:digit:]][[:upper:]]
-0 1 a		[[:digit:][:upper:][:space:]]
-1 1 A		[[:digit:][:upper:][:space:]]
-1 1 1		[[:digit:][:upper:][:space:]]
-0 1 1		[[:digit:][:upper:][:spaci:]]
-1 1 ' '		[[:digit:][:upper:][:space:]]
-0 1 .		[[:digit:][:upper:][:space:]]
-1 1 .		[[:digit:][:punct:][:space:]]
-1 1 5		[[:xdigit:]]
-1 1 f		[[:xdigit:]]
-1 1 D		[[:xdigit:]]
-1 1 _		[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
-#1 1 …		[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
-1 1 \x7f		[^[:alnum:][:alpha:][:blank:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]
-1 1 .		[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]
-1 1 5		[a-c[:digit:]x-z]
-1 1 b		[a-c[:digit:]x-z]
-1 1 y		[a-c[:digit:]x-z]
-0 1 q		[a-c[:digit:]x-z]
-
-# Additional tests, including some malformed wildmats
-1 1 ]		[\\-^]
-0 1 [		[\\-^]
-1 1 -		[\-_]
-1 1 ]		[\]]
-0 1 \]		[\]]
-0 1 \		[\]]
-0 1 ab		a[]b
-0 1 a[]b	a[]b
-0 1 ab[		ab[
-0 1 ab		[!
-0 1 ab		[-
-1 1 -		[-]
-0 1 -		[a-
-0 1 -		[!a-
-1 1 -		[--A]
-1 1 5		[--A]
-1 1 ' '		'[ --]'
-1 1 $		'[ --]'
-1 1 -		'[ --]'
-0 1 0		'[ --]'
-1 1 -		[---]
-1 1 -		[------]
-0 1 j		[a-e-n]
-1 1 -		[a-e-n]
-1 1 a		[!------]
-0 1 [		[]-a]
-1 1 ^		[]-a]
-0 1 ^		[!]-a]
-1 1 [		[!]-a]
-1 1 ^		[a^bc]
-1 1 -b]		[a-]b]
-0 1 \		[\]
-1 1 \		[\\]
-0 1 \		[!\\]
-1 1 G		[A-\\]
-0 1 aaabbb	b*a
-0 1 aabcaa	*ba*
-1 1 ,		[,]
-1 1 ,		[\\,]
-1 1 \		[\\,]
-1 1 -		[,-.]
-0 1 +		[,-.]
-0 1 -.]		[,-.]
-1 1 2		[\1-\3]
-1 1 3		[\1-\3]
-0 1 4		[\1-\3]
-1 1 \		[[-\]]
-1 1 [		[[-\]]
-1 1 ]		[[-\]]
-0 1 -		[[-\]]
-
-# Test recursion and the abort code (use "wildtest -i" to see iteration counts)
-1 1 -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
-0 1 -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
-0 1 -adobe-courier-bold-o-normal--12-120-75-75-/-70-iso8859-1	-*-*-*-*-*-*-12-*-*-*-m-*-*-*
-1 1 /adobe/courier/bold/o/normal//12/120/75/75/m/70/iso8859/1	/*/*/*/*/*/*/12/*/*/*/m/*/*/*
-0 1 /adobe/courier/bold/o/normal//12/120/75/75/X/70/iso8859/1	/*/*/*/*/*/*/12/*/*/*/m/*/*/*
-1 1 abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txt		**/*a*b*g*n*t
-0 1 abcd/abcdefg/abcdefghijk/abcdefghijklmnop.txtz		**/*a*b*g*n*t
diff --git a/test-wildmatch.c b/test-wildmatch.c
new file mode 100644
index 0000000..ac56420
--- /dev/null
+++ b/test-wildmatch.c
@@ -0,0 +1,14 @@
+#include "cache.h"
+#include "wildmatch.h"
+
+int main(int argc, char **argv)
+{
+	if (!strcmp(argv[1], "wildmatch"))
+		return wildmatch(argv[3], argv[2]) ? 0 : 1;
+	else if (!strcmp(argv[1], "iwildmatch"))
+		return iwildmatch(argv[3], argv[2]) ? 0 : 1;
+	else if (!strcmp(argv[1], "fnmatch"))
+		return !!fnmatch(argv[3], argv[2], FNM_PATHNAME);
+	else
+		return 1;
+}
diff --git a/wildmatch.c b/wildmatch.c
index fae7397..d0b906a 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -9,7 +9,10 @@
 **  work differently than '*', and to fix the character-class code.
 */
 
-#include "rsync.h"
+#include "cache.h"
+#include "wildmatch.h"
+
+typedef unsigned char uchar;
 
 /* What character marks an inverted character class? */
 #define NEGATE_CLASS	'!'
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 06/12] t3070: disable unreliable fnmatch tests
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (4 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 05/12] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch Nguyễn Thái Ngọc Duy
                   ` (5 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

These tests show different results on different fnmatch() versions. We
don't want to test fnmatch here. We want to make sure wildmatch
behavior matches fnmatch and that only makes sense in cases when
fnmatch() behaves consistently.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 t/t3070-wildmatch.sh | 86 ++++++++++++++++++++++++++--------------------------
 1 file changed, 43 insertions(+), 43 deletions(-)

diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
index dbd3c8b..dd95b00 100755
--- a/t/t3070-wildmatch.sh
+++ b/t/t3070-wildmatch.sh
@@ -52,11 +52,11 @@ match 1 1 ten 't[a-g]n'
 match 0 0 ten 't[!a-g]n'
 match 1 1 ton 't[!a-g]n'
 match 1 1 ton 't[^a-g]n'
-match 1 1 'a]b' 'a[]]b'
-match 1 1 a-b 'a[]-]b'
-match 1 1 'a]b' 'a[]-]b'
-match 0 0 aab 'a[]-]b'
-match 1 1 aab 'a[]a-]b'
+match 1 x 'a]b' 'a[]]b'
+match 1 x a-b 'a[]-]b'
+match 1 x 'a]b' 'a[]-]b'
+match 0 x aab 'a[]-]b'
+match 1 x aab 'a[]a-]b'
 match 1 1 ']' ']'
 
 # Extended slash-matching features
@@ -67,7 +67,7 @@ match 0 0 'foo/bar' 'foo[/]bar'
 match 0 0 'foo/bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
 match 1 1 'foo-bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
 match 0 0 'foo' '**/foo'
-match 1 1 '/foo' '**/foo'
+match 1 x '/foo' '**/foo'
 match 1 0 'bar/baz/foo' '**/foo'
 match 0 0 'bar/baz/foo' '*/foo'
 match 0 0 'foo/bar/baz' '**/bar*'
@@ -85,77 +85,77 @@ match 1 0 'deep/foo/bar/baz/x' '**/bar/*/*'
 match 0 0 'acrt' 'a[c-c]st'
 match 1 1 'acrt' 'a[c-c]rt'
 match 0 0 ']' '[!]-]'
-match 1 1 'a' '[!]-]'
+match 1 x 'a' '[!]-]'
 match 0 0 '' '\'
-match 0 0 '\' '\'
-match 0 0 '/\' '*/\'
-match 1 1 '/\' '*/\\'
+match 0 x '\' '\'
+match 0 x '/\' '*/\'
+match 1 x '/\' '*/\\'
 match 1 1 'foo' 'foo'
 match 1 1 '@foo' '@foo'
 match 0 0 'foo' '@foo'
 match 1 1 '[ab]' '\[ab]'
 match 1 1 '[ab]' '[[]ab]'
-match 1 1 '[ab]' '[[:]ab]'
-match 0 0 '[ab]' '[[::]ab]'
-match 1 1 '[ab]' '[[:digit]ab]'
-match 1 1 '[ab]' '[\[:]ab]'
+match 1 x '[ab]' '[[:]ab]'
+match 0 x '[ab]' '[[::]ab]'
+match 1 x '[ab]' '[[:digit]ab]'
+match 1 x '[ab]' '[\[:]ab]'
 match 1 1 '?a?b' '\??\?b'
 match 1 1 'abc' '\a\b\c'
 match 0 0 'foo' ''
 match 1 0 'foo/bar/baz/to' '**/t[o]'
 
 # Character class tests
-match 1 1 'a1B' '[[:alpha:]][[:digit:]][[:upper:]]'
-match 0 0 'a' '[[:digit:][:upper:][:space:]]'
-match 1 1 'A' '[[:digit:][:upper:][:space:]]'
-match 1 0 '1' '[[:digit:][:upper:][:space:]]'
-match 0 0 '1' '[[:digit:][:upper:][:spaci:]]'
-match 1 1 ' ' '[[:digit:][:upper:][:space:]]'
-match 0 0 '.' '[[:digit:][:upper:][:space:]]'
-match 1 1 '.' '[[:digit:][:punct:][:space:]]'
+match 1 x 'a1B' '[[:alpha:]][[:digit:]][[:upper:]]'
+match 0 x 'a' '[[:digit:][:upper:][:space:]]'
+match 1 x 'A' '[[:digit:][:upper:][:space:]]'
+match 1 x '1' '[[:digit:][:upper:][:space:]]'
+match 0 x '1' '[[:digit:][:upper:][:spaci:]]'
+match 1 x ' ' '[[:digit:][:upper:][:space:]]'
+match 0 x '.' '[[:digit:][:upper:][:space:]]'
+match 1 x '.' '[[:digit:][:punct:][:space:]]'
 match 1 1 '5' '[[:xdigit:]]'
 match 1 1 'f' '[[:xdigit:]]'
 match 1 1 'D' '[[:xdigit:]]'
-match 1 0 '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
-match 1 0 '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
-match 1 1 '.' '[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]'
-match 1 1 '5' '[a-c[:digit:]x-z]'
-match 1 1 'b' '[a-c[:digit:]x-z]'
-match 1 1 'y' '[a-c[:digit:]x-z]'
-match 0 0 'q' '[a-c[:digit:]x-z]'
+match 1 x '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
+match 1 x '_' '[[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:graph:][:lower:][:print:][:punct:][:space:][:upper:][:xdigit:]]'
+match 1 x '.' '[^[:alnum:][:alpha:][:blank:][:cntrl:][:digit:][:lower:][:space:][:upper:][:xdigit:]]'
+match 1 x '5' '[a-c[:digit:]x-z]'
+match 1 x 'b' '[a-c[:digit:]x-z]'
+match 1 x 'y' '[a-c[:digit:]x-z]'
+match 0 x 'q' '[a-c[:digit:]x-z]'
 
 # Additional tests, including some malformed wildmats
-match 1 1 ']' '[\\-^]'
+match 1 x ']' '[\\-^]'
 match 0 0 '[' '[\\-^]'
-match 1 1 '-' '[\-_]'
-match 1 1 ']' '[\]]'
+match 1 x '-' '[\-_]'
+match 1 x ']' '[\]]'
 match 0 0 '\]' '[\]]'
 match 0 0 '\' '[\]]'
 match 0 0 'ab' 'a[]b'
-match 0 1 'a[]b' 'a[]b'
-match 0 1 'ab[' 'ab['
+match 0 x 'a[]b' 'a[]b'
+match 0 x 'ab[' 'ab['
 match 0 0 'ab' '[!'
 match 0 0 'ab' '[-'
 match 1 1 '-' '[-]'
 match 0 0 '-' '[a-'
 match 0 0 '-' '[!a-'
-match 1 1 '-' '[--A]'
-match 1 1 '5' '[--A]'
+match 1 x '-' '[--A]'
+match 1 x '5' '[--A]'
 match 1 1 ' ' '[ --]'
 match 1 1 '$' '[ --]'
 match 1 1 '-' '[ --]'
 match 0 0 '0' '[ --]'
-match 1 1 '-' '[---]'
-match 1 1 '-' '[------]'
+match 1 x '-' '[---]'
+match 1 x '-' '[------]'
 match 0 0 'j' '[a-e-n]'
-match 1 1 '-' '[a-e-n]'
-match 1 1 'a' '[!------]'
+match 1 x '-' '[a-e-n]'
+match 1 x 'a' '[!------]'
 match 0 0 '[' '[]-a]'
-match 1 1 '^' '[]-a]'
+match 1 x '^' '[]-a]'
 match 0 0 '^' '[!]-a]'
-match 1 1 '[' '[!]-a]'
+match 1 x '[' '[!]-a]'
 match 1 1 '^' '[a^bc]'
-match 1 1 '-b]' '[a-]b]'
+match 1 x '-b]' '[a-]b]'
 match 0 0 '\' '[\]'
 match 1 1 '\' '[\\]'
 match 0 0 '\' '[!\\]'
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (5 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 06/12] t3070: disable unreliable fnmatch tests Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  5:09   ` Junio C Hamano
  2012-10-14  2:35 ` [PATCH v5 08/12] wildmatch: remove static variable force_lower_case Nguyễn Thái Ngọc Duy
                   ` (4 subsequent siblings)
  11 siblings, 1 reply; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

wildmatch returns non-zero if matched, zero otherwise. This patch
makes it return zero if matches, non-zero otherwise, like fnmatch().

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 test-wildmatch.c |  4 ++--
 wildmatch.c      | 21 ++++++++++++---------
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/test-wildmatch.c b/test-wildmatch.c
index ac56420..77014e9 100644
--- a/test-wildmatch.c
+++ b/test-wildmatch.c
@@ -4,9 +4,9 @@
 int main(int argc, char **argv)
 {
 	if (!strcmp(argv[1], "wildmatch"))
-		return wildmatch(argv[3], argv[2]) ? 0 : 1;
+		return !!wildmatch(argv[3], argv[2]);
 	else if (!strcmp(argv[1], "iwildmatch"))
-		return iwildmatch(argv[3], argv[2]) ? 0 : 1;
+		return !!iwildmatch(argv[3], argv[2]);
 	else if (!strcmp(argv[1], "fnmatch"))
 		return !!fnmatch(argv[3], argv[2], FNM_PATHNAME);
 	else
diff --git a/wildmatch.c b/wildmatch.c
index d0b906a..e3ac6cc 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -20,6 +20,9 @@ typedef unsigned char uchar;
 
 #define FALSE 0
 #define TRUE 1
+
+#define NOMATCH 1
+#define MATCH 0
 #define ABORT_ALL -1
 #define ABORT_TO_STARSTAR -2
 
@@ -78,12 +81,12 @@ static int dowild(const uchar *p, const uchar *text)
 	    /* FALLTHROUGH */
 	  default:
 	    if (t_ch != p_ch)
-		return FALSE;
+		return NOMATCH;
 	    continue;
 	  case '?':
 	    /* Match anything but '/'. */
 	    if (t_ch == '/')
-		return FALSE;
+		return NOMATCH;
 	    continue;
 	  case '*':
 	    if (*++p == '*') {
@@ -96,14 +99,14 @@ static int dowild(const uchar *p, const uchar *text)
 		 * only if there are no more slash characters. */
 		if (!special) {
 			if (strchr((char*)text, '/') != NULL)
-			    return FALSE;
+			    return NOMATCH;
 		}
-		return TRUE;
+		return MATCH;
 	    }
 	    while (1) {
 		if (t_ch == '\0')
 		    break;
-		if ((matched = dowild(p, text)) != FALSE) {
+		if ((matched = dowild(p, text)) != NOMATCH) {
 		    if (!special || matched != ABORT_TO_STARSTAR)
 			return matched;
 		} else if (!special && t_ch == '/')
@@ -202,18 +205,18 @@ static int dowild(const uchar *p, const uchar *text)
 		    matched = TRUE;
 	    } while (prev_ch = p_ch, (p_ch = *++p) != ']');
 	    if (matched == special || t_ch == '/')
-		return FALSE;
+		return NOMATCH;
 	    continue;
 	}
     }
 
-    return *text ? FALSE : TRUE;
+    return *text ? NOMATCH : MATCH;
 }
 
 /* Match the "pattern" against the "text" string. */
 int wildmatch(const char *pattern, const char *text)
 {
-    return dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
+    return dowild((const uchar*)pattern, (const uchar*)text);
 }
 
 /* Match the "pattern" against the forced-to-lower-case "text" string. */
@@ -221,7 +224,7 @@ int iwildmatch(const char *pattern, const char *text)
 {
     int ret;
     force_lower_case = 1;
-    ret = dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
+    ret = dowild((const uchar*)pattern, (const uchar*)text);
     force_lower_case = 0;
     return ret;
 }
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 08/12] wildmatch: remove static variable force_lower_case
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (6 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 09/12] wildmatch: fix case-insensitive matching Nguyễn Thái Ngọc Duy
                   ` (3 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

One place less to worry about thread safety. Also combine wildmatch
and iwildmatch into one.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 test-wildmatch.c |  4 ++--
 wildmatch.c      | 21 +++++----------------
 wildmatch.h      |  3 +--
 3 files changed, 8 insertions(+), 20 deletions(-)

diff --git a/test-wildmatch.c b/test-wildmatch.c
index 77014e9..74c0864 100644
--- a/test-wildmatch.c
+++ b/test-wildmatch.c
@@ -4,9 +4,9 @@
 int main(int argc, char **argv)
 {
 	if (!strcmp(argv[1], "wildmatch"))
-		return !!wildmatch(argv[3], argv[2]);
+		return !!wildmatch(argv[3], argv[2], 0);
 	else if (!strcmp(argv[1], "iwildmatch"))
-		return !!iwildmatch(argv[3], argv[2]);
+		return !!wildmatch(argv[3], argv[2], FNM_CASEFOLD);
 	else if (!strcmp(argv[1], "fnmatch"))
 		return !!fnmatch(argv[3], argv[2], FNM_PATHNAME);
 	else
diff --git a/wildmatch.c b/wildmatch.c
index e3ac6cc..20c5ef6 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -59,10 +59,8 @@ typedef unsigned char uchar;
 #define ISUPPER(c) (ISASCII(c) && isupper(c))
 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
 
-static int force_lower_case = 0;
-
 /* Match pattern "p" against "text" */
-static int dowild(const uchar *p, const uchar *text)
+static int dowild(const uchar *p, const uchar *text, int force_lower_case)
 {
     uchar p_ch;
 
@@ -106,7 +104,7 @@ static int dowild(const uchar *p, const uchar *text)
 	    while (1) {
 		if (t_ch == '\0')
 		    break;
-		if ((matched = dowild(p, text)) != NOMATCH) {
+		if ((matched = dowild(p, text, force_lower_case)) != NOMATCH) {
 		    if (!special || matched != ABORT_TO_STARSTAR)
 			return matched;
 		} else if (!special && t_ch == '/')
@@ -214,17 +212,8 @@ static int dowild(const uchar *p, const uchar *text)
 }
 
 /* Match the "pattern" against the "text" string. */
-int wildmatch(const char *pattern, const char *text)
-{
-    return dowild((const uchar*)pattern, (const uchar*)text);
-}
-
-/* Match the "pattern" against the forced-to-lower-case "text" string. */
-int iwildmatch(const char *pattern, const char *text)
+int wildmatch(const char *pattern, const char *text, int flags)
 {
-    int ret;
-    force_lower_case = 1;
-    ret = dowild((const uchar*)pattern, (const uchar*)text);
-    force_lower_case = 0;
-    return ret;
+    return dowild((const uchar*)pattern, (const uchar*)text,
+		  flags & FNM_CASEFOLD ? 1 : 0);
 }
diff --git a/wildmatch.h b/wildmatch.h
index 562faa3..e974f9a 100644
--- a/wildmatch.h
+++ b/wildmatch.h
@@ -1,4 +1,3 @@
 /* wildmatch.h */
 
-int wildmatch(const char *pattern, const char *text);
-int iwildmatch(const char *pattern, const char *text);
+int wildmatch(const char *pattern, const char *text, int flags);
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 09/12] wildmatch: fix case-insensitive matching
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (7 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 08/12] wildmatch: remove static variable force_lower_case Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 10/12] wildmatch: adjust "**" behavior Nguyễn Thái Ngọc Duy
                   ` (2 subsequent siblings)
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

dowild() does case insensitive matching by lower-casing the text. That
means lower case letters in patterns imply case-insensitive matching,
but upper case means exact matching.

We do not want that subtlety. Lower case pattern too so iwildmatch()
always does what we expect it to do.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 wildmatch.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/wildmatch.c b/wildmatch.c
index 20c5ef6..6542524 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -71,6 +71,8 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case)
 		return ABORT_ALL;
 	if (force_lower_case && ISUPPER(t_ch))
 	    t_ch = tolower(t_ch);
+	if (force_lower_case && ISUPPER(p_ch))
+	    p_ch = tolower(p_ch);
 	switch (p_ch) {
 	  case '\\':
 	    /* Literal match with following character.  Note that the test
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 10/12] wildmatch: adjust "**" behavior
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (8 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 09/12] wildmatch: fix case-insensitive matching Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 11/12] wildmatch: make /**/ match zero or more directories Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 12/12] Support "**" wildcard in .gitignore and .gitattributes Nguyễn Thái Ngọc Duy
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

Standard wildmatch() sees consecutive asterisks as "*" that can also
match slashes. But that may be hard to explain to users as
"abc/**/def" can match "abcdef", "abcxyzdef", "abc/def", "abc/x/def",
"abc/x/y/def"...

This patch changes wildmatch so that users can do

- "**/def" -> all paths ending with file/directory 'def'
- "abc/**" - equivalent to "/abc/"
- "abc/**/def" -> "abc/x/def", "abc/x/y/def"...
- otherwise consider the pattern malformed if "**" is found

Basically the magic of "**" only remains if it's wrapped around by
slashes.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 t/t3070-wildmatch.sh |  5 +++--
 wildmatch.c          | 13 +++++++------
 wildmatch.h          |  6 ++++++
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
index dd95b00..15848d5 100755
--- a/t/t3070-wildmatch.sh
+++ b/t/t3070-wildmatch.sh
@@ -46,7 +46,7 @@ match 0 0 foobar 'foo\*bar'
 match 1 1 'f\oo' 'f\\oo'
 match 1 1 ball '*[al]?'
 match 0 0 ten '[ten]'
-match 1 1 ten '**[!te]'
+match 0 1 ten '**[!te]'
 match 0 0 ten '**[!ten]'
 match 1 1 ten 't[a-g]n'
 match 0 0 ten 't[!a-g]n'
@@ -61,7 +61,8 @@ match 1 1 ']' ']'
 
 # Extended slash-matching features
 match 0 0 'foo/baz/bar' 'foo*bar'
-match 1 0 'foo/baz/bar' 'foo**bar'
+match 0 0 'foo/baz/bar' 'foo**bar'
+match 0 1 'foobazbar' 'foo**bar'
 match 0 0 'foo/bar' 'foo?bar'
 match 0 0 'foo/bar' 'foo[/]bar'
 match 0 0 'foo/bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
diff --git a/wildmatch.c b/wildmatch.c
index 6542524..7209f26 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -21,11 +21,6 @@ typedef unsigned char uchar;
 #define FALSE 0
 #define TRUE 1
 
-#define NOMATCH 1
-#define MATCH 0
-#define ABORT_ALL -1
-#define ABORT_TO_STARSTAR -2
-
 #define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
 				    && *(class) == *(litmatch) \
 				    && strncmp((char*)class, litmatch, len) == 0)
@@ -90,8 +85,14 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case)
 	    continue;
 	  case '*':
 	    if (*++p == '*') {
+		const uchar *prev_p = p - 2;
 		while (*++p == '*') {}
-		special = TRUE;
+		if ((prev_p == text || *prev_p == '/') ||
+		    (*p == '\0' || *p == '/' ||
+		     (p[0] == '\\' && p[1] == '/'))) {
+		    special = TRUE;
+		} else
+		    return ABORT_MALFORMED;
 	    } else
 		special = FALSE;
 	    if (*p == '\0') {
diff --git a/wildmatch.h b/wildmatch.h
index e974f9a..984a38c 100644
--- a/wildmatch.h
+++ b/wildmatch.h
@@ -1,3 +1,9 @@
 /* wildmatch.h */
 
+#define ABORT_MALFORMED 2
+#define NOMATCH 1
+#define MATCH 0
+#define ABORT_ALL -1
+#define ABORT_TO_STARSTAR -2
+
 int wildmatch(const char *pattern, const char *text, int flags);
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 11/12] wildmatch: make /**/ match zero or more directories
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (9 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 10/12] wildmatch: adjust "**" behavior Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  2012-10-14  2:35 ` [PATCH v5 12/12] Support "**" wildcard in .gitignore and .gitattributes Nguyễn Thái Ngọc Duy
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy

"foo/**/bar" matches "foo/x/bar", "foo/x/y/bar"... but not
"foo/bar". We make a special case, when foo/**/ is detected (and
"foo/" part is already matched), try matching "bar" with the rest of
the string.

"Match one or more directories" semantics can be easily achieved using
"foo/*/**/bar".

This also makes "**/foo" match "foo" in addition to "x/foo",
"x/y/foo"..

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 t/t3070-wildmatch.sh |  8 +++++++-
 wildmatch.c          | 17 +++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
index 15848d5..e6ad6f4 100755
--- a/t/t3070-wildmatch.sh
+++ b/t/t3070-wildmatch.sh
@@ -63,11 +63,17 @@ match 1 1 ']' ']'
 match 0 0 'foo/baz/bar' 'foo*bar'
 match 0 0 'foo/baz/bar' 'foo**bar'
 match 0 1 'foobazbar' 'foo**bar'
+match 1 1 'foo/baz/bar' 'foo/**/bar'
+match 1 0 'foo/baz/bar' 'foo/**/**/bar'
+match 1 0 'foo/b/a/z/bar' 'foo/**/bar'
+match 1 0 'foo/b/a/z/bar' 'foo/**/**/bar'
+match 1 0 'foo/bar' 'foo/**/bar'
+match 1 0 'foo/bar' 'foo/**/**/bar'
 match 0 0 'foo/bar' 'foo?bar'
 match 0 0 'foo/bar' 'foo[/]bar'
 match 0 0 'foo/bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
 match 1 1 'foo-bar' 'f[^eiu][^eiu][^eiu][^eiu][^eiu]r'
-match 0 0 'foo' '**/foo'
+match 1 0 'foo' '**/foo'
 match 1 x '/foo' '**/foo'
 match 1 0 'bar/baz/foo' '**/foo'
 match 0 0 'bar/baz/foo' '*/foo'
diff --git a/wildmatch.c b/wildmatch.c
index 7209f26..35c34ac 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -90,6 +90,23 @@ static int dowild(const uchar *p, const uchar *text, int force_lower_case)
 		if ((prev_p == text || *prev_p == '/') ||
 		    (*p == '\0' || *p == '/' ||
 		     (p[0] == '\\' && p[1] == '/'))) {
+			/*
+			 * Assuming we already match 'foo/' and are at
+			 * <star star slash>, just assume it matches
+			 * nothing and go ahead match the rest of the
+			 * pattern with the remaining string. This
+			 * helps make foo/<*><*>/bar (<> because
+			 * otherwise it breaks C comment syntax) match
+			 * both foo/bar and foo/a/bar.
+			 *
+			 * Crazy patterns like /<*><*>/<*><*>/ are
+			 * treated like /<*><*>/. But undefined
+			 * behavior is even appropriate for people
+			 * writing such a pattern.
+			 */
+			if (p[0] == '/' &&
+			    dowild(p + 1, text, force_lower_case) == MATCH)
+				return MATCH;
 		    special = TRUE;
 		} else
 		    return ABORT_MALFORMED;
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* [PATCH v5 12/12] Support "**" wildcard in .gitignore and .gitattributes
  2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
                   ` (10 preceding siblings ...)
  2012-10-14  2:35 ` [PATCH v5 11/12] wildmatch: make /**/ match zero or more directories Nguyễn Thái Ngọc Duy
@ 2012-10-14  2:35 ` Nguyễn Thái Ngọc Duy
  11 siblings, 0 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-10-14  2:35 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Nguyễn Thái Ngọc Duy


Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Documentation/gitignore.txt        | 19 +++++++++++++++++++
 attr.c                             |  4 +++-
 dir.c                              |  4 +++-
 t/t0003-attributes.sh              | 37 +++++++++++++++++++++++++++++++++++++
 t/t3001-ls-files-others-exclude.sh | 19 +++++++++++++++++++
 5 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/Documentation/gitignore.txt b/Documentation/gitignore.txt
index 1b82fe1..91a6438 100644
--- a/Documentation/gitignore.txt
+++ b/Documentation/gitignore.txt
@@ -108,6 +108,25 @@ PATTERN FORMAT
    For example, "/{asterisk}.c" matches "cat-file.c" but not
    "mozilla-sha1/sha1.c".
 
+Two consecutive asterisks ("`**`") in patterns matched against
+full pathname may have special meaning:
+
+ - A leading "`**`" followed by a slash means match in all
+   directories. For example, "`**/foo`" matches file or directory
+   "`foo`" anywhere, the same as pattern "`foo`". "**/foo/bar"
+   matches file or directory "`bar`" anywhere that is directly
+   under directory "`foo`".
+
+ - A trailing "/**" matches everything inside. For example,
+   "abc/**" matches all files inside directory "abc", relative
+   to the location of the `.gitignore` file, with infinite depth.
+
+ - A slash followed by two consecutive asterisks then a slash
+   matches zero or more directories. For example, "`a/**/b`"
+   matches "`a/b`", "`a/x/b`", "`a/x/y/b`" and so on.
+
+ - Other consecutive asterisks are considered invalid.
+
 NOTES
 -----
 
diff --git a/attr.c b/attr.c
index 887a9ae..8010429 100644
--- a/attr.c
+++ b/attr.c
@@ -12,6 +12,7 @@
 #include "exec_cmd.h"
 #include "attr.h"
 #include "dir.h"
+#include "wildmatch.h"
 
 const char git_attr__true[] = "(builtin)true";
 const char git_attr__false[] = "\0(builtin)false";
@@ -666,7 +667,8 @@ static int path_matches(const char *pathname, int pathlen,
 		return 0;
 	if (baselen != 0)
 		baselen++;
-	return fnmatch_icase(pattern, pathname + baselen, FNM_PATHNAME) == 0;
+	return wildmatch(pattern, pathname + baselen,
+			 ignore_case ? FNM_CASEFOLD : 0) == 0;
 }
 
 static int macroexpand_one(int attr_nr, int rem);
diff --git a/dir.c b/dir.c
index 4868339..442db1c 100644
--- a/dir.c
+++ b/dir.c
@@ -8,6 +8,7 @@
 #include "cache.h"
 #include "dir.h"
 #include "refs.h"
+#include "wildmatch.h"
 
 struct path_simplify {
 	int len;
@@ -575,7 +576,8 @@ int excluded_from_list(const char *pathname,
 			namelen -= prefix;
 		}
 
-		if (!namelen || !fnmatch_icase(exclude, name, FNM_PATHNAME))
+		if (!namelen ||
+		    wildmatch(exclude, name, ignore_case ? FNM_CASEFOLD : 0) == 0)
 			return to_exclude;
 	}
 	return -1; /* undecided */
diff --git a/t/t0003-attributes.sh b/t/t0003-attributes.sh
index febc45c..d5a6946 100755
--- a/t/t0003-attributes.sh
+++ b/t/t0003-attributes.sh
@@ -196,6 +196,43 @@ test_expect_success 'root subdir attribute test' '
 	attr_check subdir/a/i unspecified
 '
 
+test_expect_success '"**" test' '
+	echo "**/f foo=bar" >.gitattributes &&
+	cat <<\EOF >expect &&
+f: foo: bar
+a/f: foo: bar
+a/b/f: foo: bar
+a/b/c/f: foo: bar
+EOF
+	git check-attr foo -- "f" >actual 2>err &&
+	git check-attr foo -- "a/f" >>actual 2>>err &&
+	git check-attr foo -- "a/b/f" >>actual 2>>err &&
+	git check-attr foo -- "a/b/c/f" >>actual 2>>err &&
+	test_cmp expect actual &&
+	test_line_count = 0 err
+'
+
+test_expect_success '"**" with no slashes test' '
+	echo "a**f foo=bar" >.gitattributes &&
+	git check-attr foo -- "f" >actual &&
+	cat <<\EOF >expect &&
+f: foo: unspecified
+af: foo: bar
+axf: foo: bar
+a/f: foo: unspecified
+a/b/f: foo: unspecified
+a/b/c/f: foo: unspecified
+EOF
+	git check-attr foo -- "f" >actual 2>err &&
+	git check-attr foo -- "af" >>actual 2>err &&
+	git check-attr foo -- "axf" >>actual 2>err &&
+	git check-attr foo -- "a/f" >>actual 2>>err &&
+	git check-attr foo -- "a/b/f" >>actual 2>>err &&
+	git check-attr foo -- "a/b/c/f" >>actual 2>>err &&
+	test_cmp expect actual &&
+	test_line_count = 0 err
+'
+
 test_expect_success 'setup bare' '
 	git clone --bare . bare.git &&
 	cd bare.git
diff --git a/t/t3001-ls-files-others-exclude.sh b/t/t3001-ls-files-others-exclude.sh
index c8fe978..278315d 100755
--- a/t/t3001-ls-files-others-exclude.sh
+++ b/t/t3001-ls-files-others-exclude.sh
@@ -214,4 +214,23 @@ test_expect_success 'subdirectory ignore (l1)' '
 	test_cmp expect actual
 '
 
+
+test_expect_success 'ls-files with "**" patterns' '
+	cat <<\EOF >expect &&
+a.1
+one/a.1
+one/two/a.1
+three/a.1
+EOF
+	git ls-files -o -i --exclude "**/a.1" >actual
+	test_cmp expect actual
+'
+
+
+test_expect_success 'ls-files with "**" patterns and no slashes' '
+	: >expect &&
+	git ls-files -o -i --exclude "one**a.1" >actual &&
+	test_cmp expect actual
+'
+
 test_done
-- 
1.8.0.rc2.11.g2b79d01

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14  2:35 ` [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
@ 2012-10-14  5:02   ` Junio C Hamano
  2012-10-14  5:07     ` Nguyen Thai Ngoc Duy
  2012-10-14 12:59   ` René Scharfe
  1 sibling, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2012-10-14  5:02 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---

The description to justify why it is ctype2[] seems to have been
lost.  Intended?

>  ctype.c           | 18 ++++++++++++++++++
>  git-compat-util.h | 13 +++++++++++++
>  2 files changed, 31 insertions(+)
>
> diff --git a/ctype.c b/ctype.c
> index faeaf34..b4bf48a 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
>  	/* Nothing in the 128.. range */
>  };
>  
> +enum {
> +	CN = GIT_CNTRL,
> +	PU = GIT_PUNCT,
> +	XD = GIT_XDIGIT,
> +};
> +
> +const unsigned char sane_ctype2[256] = {
> +	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*    0..15 */
> +	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   16..31 */
> +	0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   32..47 */
> +	XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   48..63 */
> +	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*   64..79 */
> +	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   80..95 */
> +	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*  96..111 */
> +	0,  0,	0,  0,	0,  0,	0,  0,	0,  0,	0,  PU, PU, PU, PU, CN, /* 112..127 */
> +	/* Nothing in the 128.. range */
> +};
> +
>  /* For case-insensitive kwset */
>  const char tolower_trans_tbl[256] = {
>  	0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
> diff --git a/git-compat-util.h b/git-compat-util.h
> index f8b859c..ea11694 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -510,14 +510,23 @@ extern const char tolower_trans_tbl[256];
>  #undef isupper
>  #undef tolower
>  #undef toupper
> +#undef iscntrl
> +#undef ispunct
> +#undef isxdigit
> +#undef isprint
>  extern const unsigned char sane_ctype[256];
> +extern const unsigned char sane_ctype2[256];
>  #define GIT_SPACE 0x01
>  #define GIT_DIGIT 0x02
>  #define GIT_ALPHA 0x04
>  #define GIT_GLOB_SPECIAL 0x08
>  #define GIT_REGEX_SPECIAL 0x10
>  #define GIT_PATHSPEC_MAGIC 0x20
> +#define GIT_CNTRL 0x01
> +#define GIT_PUNCT 0x02
> +#define GIT_XDIGIT 0x04
>  #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
> +#define sane_istest2(x,mask) ((sane_ctype2[(unsigned char)(x)] & (mask)) != 0)
>  #define isascii(x) (((x) & ~0x7f) == 0)
>  #define isspace(x) sane_istest(x,GIT_SPACE)
>  #define isdigit(x) sane_istest(x,GIT_DIGIT)
> @@ -527,6 +536,10 @@ extern const unsigned char sane_ctype[256];
>  #define isupper(x) sane_iscase(x, 0)
>  #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
> +#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
> +#define ispunct(x) sane_istest2(x, GIT_PUNCT)
> +#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
> +#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))
>  #define tolower(x) sane_case((unsigned char)(x), 0x20)
>  #define toupper(x) sane_case((unsigned char)(x), 0)
>  #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 04/12] wildmatch: remove unnecessary functions
  2012-10-14  2:35 ` [PATCH v5 04/12] wildmatch: remove unnecessary functions Nguyễn Thái Ngọc Duy
@ 2012-10-14  5:04   ` Junio C Hamano
  2012-10-14  6:29     ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 37+ messages in thread
From: Junio C Hamano @ 2012-10-14  5:04 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---

The comment-fix seems to be new but otherwise this is unchanged,
right?


>  wildmatch.c | 164 ++++--------------------------------------------------------
>  wildmatch.h |   2 -
>  2 files changed, 10 insertions(+), 156 deletions(-)
>
> diff --git a/wildmatch.c b/wildmatch.c
> index f3a1731..fae7397 100644
> --- a/wildmatch.c
> +++ b/wildmatch.c
> @@ -53,33 +53,18 @@
>  #define ISUPPER(c) (ISASCII(c) && isupper(c))
>  #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
>  
> -#ifdef WILD_TEST_ITERATIONS
> -int wildmatch_iteration_count;
> -#endif
> -
>  static int force_lower_case = 0;
>  
> -/* Match pattern "p" against the a virtually-joined string consisting
> - * of "text" and any strings in array "a". */
> -static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
> +/* Match pattern "p" against "text" */
> +static int dowild(const uchar *p, const uchar *text)
>  {
>      uchar p_ch;
>  
> -#ifdef WILD_TEST_ITERATIONS
> -    wildmatch_iteration_count++;
> -#endif
> -
>      for ( ; (p_ch = *p) != '\0'; text++, p++) {
>  	int matched, special;
>  	uchar t_ch, prev_ch;
> -	while ((t_ch = *text) == '\0') {
> -	    if (*a == NULL) {
> -		if (p_ch != '*')
> -		    return ABORT_ALL;
> -		break;
> -	    }
> -	    text = *a++;
> -	}
> +	if ((t_ch = *text) == '\0' && p_ch != '*')
> +		return ABORT_ALL;
>  	if (force_lower_case && ISUPPER(t_ch))
>  	    t_ch = tolower(t_ch);
>  	switch (p_ch) {
> @@ -107,21 +92,15 @@ static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
>  		/* Trailing "**" matches everything.  Trailing "*" matches
>  		 * only if there are no more slash characters. */
>  		if (!special) {
> -		    do {
>  			if (strchr((char*)text, '/') != NULL)
>  			    return FALSE;
> -		    } while ((text = *a++) != NULL);
>  		}
>  		return TRUE;
>  	    }
>  	    while (1) {
> -		if (t_ch == '\0') {
> -		    if ((text = *a++) == NULL)
> -			break;
> -		    t_ch = *text;
> -		    continue;
> -		}
> -		if ((matched = dowild(p, text, a)) != FALSE) {
> +		if (t_ch == '\0')
> +		    break;
> +		if ((matched = dowild(p, text)) != FALSE) {
>  		    if (!special || matched != ABORT_TO_STARSTAR)
>  			return matched;
>  		} else if (!special && t_ch == '/')
> @@ -225,144 +204,21 @@ static int dowild(const uchar *p, const uchar *text, const uchar*const *a)
>  	}
>      }
>  
> -    do {
> -	if (*text)
> -	    return FALSE;
> -    } while ((text = *a++) != NULL);
> -
> -    return TRUE;
> -}
> -
> -/* Match literal string "s" against the a virtually-joined string consisting
> - * of "text" and any strings in array "a". */
> -static int doliteral(const uchar *s, const uchar *text, const uchar*const *a)
> -{
> -    for ( ; *s != '\0'; text++, s++) {
> -	while (*text == '\0') {
> -	    if ((text = *a++) == NULL)
> -		return FALSE;
> -	}
> -	if (*text != *s)
> -	    return FALSE;
> -    }
> -
> -    do {
> -	if (*text)
> -	    return FALSE;
> -    } while ((text = *a++) != NULL);
> -
> -    return TRUE;
> -}
> -
> -/* Return the last "count" path elements from the concatenated string.
> - * We return a string pointer to the start of the string, and update the
> - * array pointer-pointer to point to any remaining string elements. */
> -static const uchar *trailing_N_elements(const uchar*const **a_ptr, int count)
> -{
> -    const uchar*const *a = *a_ptr;
> -    const uchar*const *first_a = a;
> -
> -    while (*a)
> -	    a++;
> -
> -    while (a != first_a) {
> -	const uchar *s = *--a;
> -	s += strlen((char*)s);
> -	while (--s >= *a) {
> -	    if (*s == '/' && !--count) {
> -		*a_ptr = a+1;
> -		return s+1;
> -	    }
> -	}
> -    }
> -
> -    if (count == 1) {
> -	*a_ptr = a+1;
> -	return *a;
> -    }
> -
> -    return NULL;
> +    return *text ? FALSE : TRUE;
>  }
>  
>  /* Match the "pattern" against the "text" string. */
>  int wildmatch(const char *pattern, const char *text)
>  {
> -    static const uchar *nomore[1]; /* A NULL pointer. */
> -#ifdef WILD_TEST_ITERATIONS
> -    wildmatch_iteration_count = 0;
> -#endif
> -    return dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
> +    return dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
>  }
>  
>  /* Match the "pattern" against the forced-to-lower-case "text" string. */
>  int iwildmatch(const char *pattern, const char *text)
>  {
> -    static const uchar *nomore[1]; /* A NULL pointer. */
>      int ret;
> -#ifdef WILD_TEST_ITERATIONS
> -    wildmatch_iteration_count = 0;
> -#endif
>      force_lower_case = 1;
> -    ret = dowild((const uchar*)pattern, (const uchar*)text, nomore) == TRUE;
> +    ret = dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
>      force_lower_case = 0;
>      return ret;
>  }
> -
> -/* Match pattern "p" against the a virtually-joined string consisting
> - * of all the pointers in array "texts" (which has a NULL pointer at the
> - * end).  The int "where" can be 0 (normal matching), > 0 (match only
> - * the trailing N slash-separated filename components of "texts"), or < 0
> - * (match the "pattern" at the start or after any slash in "texts"). */
> -int wildmatch_array(const char *pattern, const char*const *texts, int where)
> -{
> -    const uchar *p = (const uchar*)pattern;
> -    const uchar*const *a = (const uchar*const*)texts;
> -    const uchar *text;
> -    int matched;
> -
> -#ifdef WILD_TEST_ITERATIONS
> -    wildmatch_iteration_count = 0;
> -#endif
> -
> -    if (where > 0)
> -	text = trailing_N_elements(&a, where);
> -    else
> -	text = *a++;
> -    if (!text)
> -	return FALSE;
> -
> -    if ((matched = dowild(p, text, a)) != TRUE && where < 0
> -     && matched != ABORT_ALL) {
> -	while (1) {
> -	    if (*text == '\0') {
> -		if ((text = (uchar*)*a++) == NULL)
> -		    return FALSE;
> -		continue;
> -	    }
> -	    if (*text++ == '/' && (matched = dowild(p, text, a)) != FALSE
> -	     && matched != ABORT_TO_STARSTAR)
> -		break;
> -	}
> -    }
> -    return matched == TRUE;
> -}
> -
> -/* Match literal string "s" against the a virtually-joined string consisting
> - * of all the pointers in array "texts" (which has a NULL pointer at the
> - * end).  The int "where" can be 0 (normal matching), or > 0 (match
> - * only the trailing N slash-separated filename components of "texts"). */
> -int litmatch_array(const char *string, const char*const *texts, int where)
> -{
> -    const uchar *s = (const uchar*)string;
> -    const uchar*const *a = (const uchar* const*)texts;
> -    const uchar *text;
> -
> -    if (where > 0)
> -	text = trailing_N_elements(&a, where);
> -    else
> -	text = *a++;
> -    if (!text)
> -	return FALSE;
> -
> -    return doliteral(s, text, a) == TRUE;
> -}
> diff --git a/wildmatch.h b/wildmatch.h
> index e7f1a35..562faa3 100644
> --- a/wildmatch.h
> +++ b/wildmatch.h
> @@ -2,5 +2,3 @@
>  
>  int wildmatch(const char *pattern, const char *text);
>  int iwildmatch(const char *pattern, const char *text);
> -int wildmatch_array(const char *pattern, const char*const *texts, int where);
> -int litmatch_array(const char *string, const char*const *texts, int where);

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 05/12] Integrate wildmatch to git
  2012-10-14  2:35 ` [PATCH v5 05/12] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
@ 2012-10-14  5:06   ` Junio C Hamano
  2012-10-14 11:07   ` Torsten Bögershausen
  1 sibling, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2012-10-14  5:06 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> +++ b/t/t3070-wildmatch.sh
> @@ -0,0 +1,188 @@
> +#!/bin/sh
> +
> +test_description='wildmatch tests'
> +
> +. ./test-lib.sh
> +
> +match() {
> +    if [ $1 = 1 ]; then
> +	test_expect_success "wildmatch:    match '$3' '$4'" "
> +	    test-wildmatch wildmatch '$3' '$4'
> +	"
> +    else
> +	test_expect_success "wildmatch: no match '$3' '$4'" "
> +	    ! test-wildmatch wildmatch '$3' '$4'
> +	"
> +    fi
> +    if [ $2 = 1 ]; then
> +	test_expect_success "fnmatch:      match '$3' '$4'" "
> +	    test-wildmatch fnmatch '$3' '$4'
> +	"
> +    elif [ $2 = 0 ]; then
> +	test_expect_success "fnmatch:   no match '$3' '$4'" "
> +	    ! test-wildmatch fnmatch '$3' '$4'
> +	"
> +#    else
> +#	test_expect_success BROKEN_FNMATCH "fnmatch:       '$3' '$4'" "
> +#	    ! test-wildmatch fnmatch '$3' '$4'
> +#	"
> +    fi

Heh, broken can be two-way.  Either it may succeed matching what it
shouldn't, or it may not match what it should.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14  5:02   ` Junio C Hamano
@ 2012-10-14  5:07     ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-14  5:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, Oct 14, 2012 at 12:02 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>> ---
>
> The description to justify why it is ctype2[] seems to have been
> lost.  Intended?

Nope. I added the description after generating patches and forgot to
update the same to my branch. Thanks for catching.
-- 
Duy

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch
  2012-10-14  2:35 ` [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch Nguyễn Thái Ngọc Duy
@ 2012-10-14  5:09   ` Junio C Hamano
  0 siblings, 0 replies; 37+ messages in thread
From: Junio C Hamano @ 2012-10-14  5:09 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git

Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:

> wildmatch returns non-zero if matched, zero otherwise. This patch
> makes it return zero if matches, non-zero otherwise, like fnmatch().
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---

OK, so ABORT cases where the patterns are either broken or impossible
to match are also taken as not-matching, which sounds like the right
thing to do.

>  test-wildmatch.c |  4 ++--
>  wildmatch.c      | 21 ++++++++++++---------
>  2 files changed, 14 insertions(+), 11 deletions(-)
>
> diff --git a/test-wildmatch.c b/test-wildmatch.c
> index ac56420..77014e9 100644
> --- a/test-wildmatch.c
> +++ b/test-wildmatch.c
> @@ -4,9 +4,9 @@
>  int main(int argc, char **argv)
>  {
>  	if (!strcmp(argv[1], "wildmatch"))
> -		return wildmatch(argv[3], argv[2]) ? 0 : 1;
> +		return !!wildmatch(argv[3], argv[2]);
>  	else if (!strcmp(argv[1], "iwildmatch"))
> -		return iwildmatch(argv[3], argv[2]) ? 0 : 1;
> +		return !!iwildmatch(argv[3], argv[2]);
>  	else if (!strcmp(argv[1], "fnmatch"))
>  		return !!fnmatch(argv[3], argv[2], FNM_PATHNAME);
>  	else
> diff --git a/wildmatch.c b/wildmatch.c
> index d0b906a..e3ac6cc 100644
> --- a/wildmatch.c
> +++ b/wildmatch.c
> @@ -20,6 +20,9 @@ typedef unsigned char uchar;
>  
>  #define FALSE 0
>  #define TRUE 1
> +
> +#define NOMATCH 1
> +#define MATCH 0
>  #define ABORT_ALL -1
>  #define ABORT_TO_STARSTAR -2
>  
> @@ -78,12 +81,12 @@ static int dowild(const uchar *p, const uchar *text)
>  	    /* FALLTHROUGH */
>  	  default:
>  	    if (t_ch != p_ch)
> -		return FALSE;
> +		return NOMATCH;
>  	    continue;
>  	  case '?':
>  	    /* Match anything but '/'. */
>  	    if (t_ch == '/')
> -		return FALSE;
> +		return NOMATCH;
>  	    continue;
>  	  case '*':
>  	    if (*++p == '*') {
> @@ -96,14 +99,14 @@ static int dowild(const uchar *p, const uchar *text)
>  		 * only if there are no more slash characters. */
>  		if (!special) {
>  			if (strchr((char*)text, '/') != NULL)
> -			    return FALSE;
> +			    return NOMATCH;
>  		}
> -		return TRUE;
> +		return MATCH;
>  	    }
>  	    while (1) {
>  		if (t_ch == '\0')
>  		    break;
> -		if ((matched = dowild(p, text)) != FALSE) {
> +		if ((matched = dowild(p, text)) != NOMATCH) {
>  		    if (!special || matched != ABORT_TO_STARSTAR)
>  			return matched;
>  		} else if (!special && t_ch == '/')
> @@ -202,18 +205,18 @@ static int dowild(const uchar *p, const uchar *text)
>  		    matched = TRUE;
>  	    } while (prev_ch = p_ch, (p_ch = *++p) != ']');
>  	    if (matched == special || t_ch == '/')
> -		return FALSE;
> +		return NOMATCH;
>  	    continue;
>  	}
>      }
>  
> -    return *text ? FALSE : TRUE;
> +    return *text ? NOMATCH : MATCH;
>  }
>  
>  /* Match the "pattern" against the "text" string. */
>  int wildmatch(const char *pattern, const char *text)
>  {
> -    return dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
> +    return dowild((const uchar*)pattern, (const uchar*)text);
>  }
>  
>  /* Match the "pattern" against the forced-to-lower-case "text" string. */
> @@ -221,7 +224,7 @@ int iwildmatch(const char *pattern, const char *text)
>  {
>      int ret;
>      force_lower_case = 1;
> -    ret = dowild((const uchar*)pattern, (const uchar*)text) == TRUE;
> +    ret = dowild((const uchar*)pattern, (const uchar*)text);
>      force_lower_case = 0;
>      return ret;
>  }

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 04/12] wildmatch: remove unnecessary functions
  2012-10-14  5:04   ` Junio C Hamano
@ 2012-10-14  6:29     ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-14  6:29 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sun, Oct 14, 2012 at 12:04 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Nguyễn Thái Ngọc Duy  <pclouds@gmail.com> writes:
>
>> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
>> ---
>
> The comment-fix seems to be new but otherwise this is unchanged,
> right?

Right.--
Duy

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 05/12] Integrate wildmatch to git
  2012-10-14  2:35 ` [PATCH v5 05/12] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
  2012-10-14  5:06   ` Junio C Hamano
@ 2012-10-14 11:07   ` Torsten Bögershausen
  1 sibling, 0 replies; 37+ messages in thread
From: Torsten Bögershausen @ 2012-10-14 11:07 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: git, Junio C Hamano, Torsten Bögershausen

diff --git a/t/t3070-wildmatch.sh b/t/t3070-wildmatch.sh
new file mode 100755
index 0000000..dbd3c8b
--- /dev/null
+++ b/t/t3070-wildmatch.sh
@@ -0,0 +1,188 @@
+#!/bin/sh
+#    else
+#    test_expect_success BROKEN_FNMATCH "fnmatch:       '$3' '$4'" "
+#        ! test-wildmatch fnmatch '$3' '$4'
+#    "
+    fi
+}
+

Thanks:
On my Mac OS X box:
# passed all 259 test(s)

And a quick test on cygwin:
$ ./t3070-wildmatch.sh  2>&1 | grep "not ok"
not ok - 148 fnmatch:      match '5' '[[:xdigit:]]'
not ok - 150 fnmatch:      match 'f' '[[:xdigit:]]'
not ok - 152 fnmatch:      match 'D' '[[:xdigit:]]'


And 2 micronits:
a) Commented out code
b) Whithespace damage
( 4 spaces used for an indent of 1, TAB for indent of 2)

/Torsten

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14  2:35 ` [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
  2012-10-14  5:02   ` Junio C Hamano
@ 2012-10-14 12:59   ` René Scharfe
  2012-10-14 13:25     ` Nguyen Thai Ngoc Duy
  1 sibling, 1 reply; 37+ messages in thread
From: René Scharfe @ 2012-10-14 12:59 UTC (permalink / raw)
  To: git; +Cc: git, Junio C Hamano

Am 14.10.2012 04:35, schrieb Nguyễn Thái Ngọc Duy:
>
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>   ctype.c           | 18 ++++++++++++++++++
>   git-compat-util.h | 13 +++++++++++++
>   2 files changed, 31 insertions(+)
>
> diff --git a/ctype.c b/ctype.c
> index faeaf34..b4bf48a 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -26,6 +26,24 @@ const unsigned char sane_ctype[256] = {
>   	/* Nothing in the 128.. range */
>   };
>
> +enum {
> +	CN = GIT_CNTRL,
> +	PU = GIT_PUNCT,
> +	XD = GIT_XDIGIT,
> +};
> +
> +const unsigned char sane_ctype2[256] = {
> +	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*    0..15 */
> +	CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*   16..31 */
> +	0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*   32..47 */
> +	XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*   48..63 */
> +	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*   64..79 */
> +	0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*   80..95 */
> +	PU, 0,	XD, 0,	XD, 0,	XD, 0,	0,  0,	0,  0,	0,  0,	0,  0,	/*  96..111 */
> +	0,  0,	0,  0,	0,  0,	0,  0,	0,  0,	0,  PU, PU, PU, PU, CN, /* 112..127 */

Shouldn't [ace] (65, 67, 69) and [ACE] (97, 99, 101) be xdigits as well?

But how about using the existing hexval_table instead, like this:

	#define isxdigit(x) (hexval_table[(x)] != -1)

With that, couldn't you squeeze the other two classes into the existing 
sane_type?

By the way, I'm working on a patch series for implementing a lot more 
character classes with table lookups.  It grew out of a desire to make 
bad_ref_char() faster but perhaps got a bit out of hand by now; it's at 
24 patches and still not finished.  I'm curious how long we have until 
it escapes. ;-)

>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
> +#define iscntrl(x) sane_istest2(x, GIT_CNTRL)
> +#define ispunct(x) sane_istest2(x, GIT_PUNCT)
> +#define isxdigit(x) sane_istest2(x, GIT_XDIGIT)
> +#define isprint(x) (isalnum(x) || isspace(x) || ispunct(x))

If a single table is used, you can do with a single table lookup by 
adding the bits for the component classes, like isalnum and 
is_regex_special do.

René

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14 12:59   ` René Scharfe
@ 2012-10-14 13:25     ` Nguyen Thai Ngoc Duy
  2012-10-14 13:59       ` René Scharfe
  0 siblings, 1 reply; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-14 13:25 UTC (permalink / raw)
  To: René Scharfe; +Cc: git, Junio C Hamano

On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
<rene.scharfe@lsrfire.ath.cx> wrote:
>> +const unsigned char sane_ctype2[256] = {
>> +       CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
>> 0..15 */
>> +       CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, CN, /*
>> 16..31 */
>> +       0,  PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, PU, /*
>> 32..47 */
>> +       XD, XD, XD, XD, XD, XD, XD, XD, XD, XD, PU, PU, PU, PU, PU, PU, /*
>> 48..63 */
>> +       PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*
>> 64..79 */
>> +       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, PU, /*
>> 80..95 */
>> +       PU, 0,  XD, 0,  XD, 0,  XD, 0,  0,  0,  0,  0,  0,  0,  0,  0,  /*
>> 96..111 */
>> +       0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  PU, PU, PU, PU, CN, /*
>> 112..127 */
>
>
> Shouldn't [ace] (65, 67, 69) and [ACE] (97, 99, 101) be xdigits as well?

Hmm.. I generated it from LANG=C. I wonder where I got it wrong..

> But how about using the existing hexval_table instead, like this:
>
>         #define isxdigit(x) (hexval_table[(x)] != -1)
>
> With that, couldn't you squeeze the other two classes into the existing
> sane_type?

No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
123, 124 and 126 as regex/pathspec special (vs punctuation).

> By the way, I'm working on a patch series for implementing a lot more
> character classes with table lookups.  It grew out of a desire to make
> bad_ref_char() faster but perhaps got a bit out of hand by now; it's at 24
> patches and still not finished.  I'm curious how long we have until it
> escapes. ;-)

I don't think the series is going to graduate any time soon :)
-- 
Duy

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14 13:25     ` Nguyen Thai Ngoc Duy
@ 2012-10-14 13:59       ` René Scharfe
  2012-10-14 14:26         ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 37+ messages in thread
From: René Scharfe @ 2012-10-14 13:59 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: git, Junio C Hamano

Am 14.10.2012 15:25, schrieb Nguyen Thai Ngoc Duy:
> On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
> <rene.scharfe@lsrfire.ath.cx> wrote:
>> With that, couldn't you squeeze the other two classes into the existing
>> sane_type?
>
> No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
> 123, 124 and 126 as regex/pathspec special (vs punctuation).

That's not a problem, an entry in the table can have more than one bit 
set -- just OR them together in ctype.c.  It may not look as nice, but 
that's OK.  You could also define a character for GIT_SPACE | GIT_CNTRL 
etc. for cosmetic reasons.

René

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14 13:59       ` René Scharfe
@ 2012-10-14 14:26         ` Nguyen Thai Ngoc Duy
  2012-10-17 12:09           ` "Jan H. Schönherr"
  0 siblings, 1 reply; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-14 14:26 UTC (permalink / raw)
  To: René Scharfe; +Cc: git, Junio C Hamano

On Sun, Oct 14, 2012 at 03:59:31PM +0200, René Scharfe wrote:
> Am 14.10.2012 15:25, schrieb Nguyen Thai Ngoc Duy:
> > On Sun, Oct 14, 2012 at 7:59 PM, René Scharfe
> > <rene.scharfe@lsrfire.ath.cx> wrote:
> >> With that, couldn't you squeeze the other two classes into the existing
> >> sane_type?
> >
> > No there are still conflicts: 9, 10 and 13 as spaces (vs controls) and
> > 123, 124 and 126 as regex/pathspec special (vs punctuation).
> 
> That's not a problem, an entry in the table can have more than one bit 
> set -- just OR them together in ctype.c.  It may not look as nice, but 
> that's OK.  You could also define a character for GIT_SPACE | GIT_CNTRL 
> etc. for cosmetic reasons.

Only space chars is not a subset of control chars, which needs a new
combination. So the result does not look as bad as I thought:

-- 8< --
diff --git a/ctype.c b/ctype.c
index faeaf34..0bfebb4 100644
--- a/ctype.c
+++ b/ctype.c
@@ -11,18 +11,21 @@ enum {
 	D = GIT_DIGIT,
 	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
 	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | */
-	P = GIT_PATHSPEC_MAGIC  /* other non-alnum, except for ] and } */
+	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
+	X = GIT_CNTRL,
+	U = GIT_PUNCT,
+	Z = GIT_CNTRL | GIT_SPACE
 };
 
 const unsigned char sane_ctype[256] = {
-	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
-	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
+	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
+	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
 	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
 	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
 	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
-	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, P,		/*  80.. 95 */
+	A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,		/*  80.. 95 */
 	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
-	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, P, 0,		/* 112..127 */
+	A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,		/* 112..127 */
 	/* Nothing in the 128.. range */
 };
 
diff --git a/git-compat-util.h b/git-compat-util.h
index f8b859c..db77f3e 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -510,6 +510,10 @@ extern const char tolower_trans_tbl[256];
 #undef isupper
 #undef tolower
 #undef toupper
+#undef iscntrl
+#undef ispunct
+#undef isxdigit
+#undef isprint
 extern const unsigned char sane_ctype[256];
 #define GIT_SPACE 0x01
 #define GIT_DIGIT 0x02
@@ -517,6 +521,8 @@ extern const unsigned char sane_ctype[256];
 #define GIT_GLOB_SPECIAL 0x08
 #define GIT_REGEX_SPECIAL 0x10
 #define GIT_PATHSPEC_MAGIC 0x20
+#define GIT_CNTRL 0x40
+#define GIT_PUNCT 0x80
 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
@@ -527,6 +533,13 @@ extern const unsigned char sane_ctype[256];
 #define isupper(x) sane_iscase(x, 0)
 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
+#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
+#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
+		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
+#define isxdigit(x) (hexval_table[x] != -1)
+#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
+		GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
+		GIT_PATHSPEC_MAGIC))
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
-- 8< --

-- 
Duy

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-14 14:26         ` Nguyen Thai Ngoc Duy
@ 2012-10-17 12:09           ` "Jan H. Schönherr"
  2012-10-17 12:26             ` Nguyen Thai Ngoc Duy
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
  0 siblings, 2 replies; 37+ messages in thread
From: "Jan H. Schönherr" @ 2012-10-17 12:09 UTC (permalink / raw)
  To: Nguyen Thai Ngoc Duy; +Cc: René Scharfe, git, Junio C Hamano

Hi Nguyen.

I just had a need for isprint() myself, and then I found
your code here.

I had a look at the POSIX locale as describe here:

http://sourceware.org/git/?p=glibc.git;a=blob;f=localedata/locales/POSIX

Some remarks below.

Am 14.10.2012 16:26, schrieb Nguyen Thai Ngoc Duy:
> -- 8< --
> diff --git a/ctype.c b/ctype.c
> index faeaf34..0bfebb4 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -11,18 +11,21 @@ enum {
>  	D = GIT_DIGIT,
>  	G = GIT_GLOB_SPECIAL,	/* *, ?, [, \\ */
>  	R = GIT_REGEX_SPECIAL,	/* $, (, ), +, ., ^, {, | */
> -	P = GIT_PATHSPEC_MAGIC  /* other non-alnum, except for ] and } */
> +	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
> +	X = GIT_CNTRL,
> +	U = GIT_PUNCT,
> +	Z = GIT_CNTRL | GIT_SPACE
>  };
>  
>  const unsigned char sane_ctype[256] = {
> -	0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,		/*   0.. 15 */
> -	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,		/*  16.. 31 */
> +	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
> +	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */

"Normal" isspace() also includes vertical tab (11) and form-feed (12) as
white-space characters. Is there a reason, why they are not included here?

>  	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
>  	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
>  	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  64.. 79 */
> -	A, A, A, A, A, A, A, A, A, A, A, G, G, 0, R, P,		/*  80.. 95 */
> +	A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,		/*  80.. 95 */
>  	P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,		/*  96..111 */
> -	A, A, A, A, A, A, A, A, A, A, A, R, R, 0, P, 0,		/* 112..127 */
> +	A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,		/* 112..127 */
>  	/* Nothing in the 128.. range */
>  };
>  
> diff --git a/git-compat-util.h b/git-compat-util.h
> index f8b859c..db77f3e 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
[...]
> @@ -527,6 +533,13 @@ extern const unsigned char sane_ctype[256];
>  #define isupper(x) sane_iscase(x, 0)
>  #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
> +#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
> +#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
> +		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
> +#define isxdigit(x) (hexval_table[x] != -1)
> +#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
> +		GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
> +		GIT_PATHSPEC_MAGIC))

"Normal" isprint() only includes space (32) from the white-space characters.
The other white-space characters are not considered printable.

Do we want to stay close to the "original", or not?

Regards
Jan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint
  2012-10-17 12:09           ` "Jan H. Schönherr"
@ 2012-10-17 12:26             ` Nguyen Thai Ngoc Duy
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
  1 sibling, 0 replies; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-10-17 12:26 UTC (permalink / raw)
  To: Jan H. Schönherr
  Cc: René Scharfe, git, Junio C Hamano, Linus Torvalds

On Wed, Oct 17, 2012 at 7:09 PM, "Jan H. Schönherr"
<schnhrr@cs.tu-berlin.de> wrote:
>>  const unsigned char sane_ctype[256] = {
>> -     0, 0, 0, 0, 0, 0, 0, 0, 0, S, S, 0, 0, S, 0, 0,         /*   0.. 15 */
>> -     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,         /*  16.. 31 */
>> +     X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,         /*   0.. 15 */
>> +     X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,         /*  16.. 31 */
>
> "Normal" isspace() also includes vertical tab (11) and form-feed (12) as
> white-space characters. Is there a reason, why they are not included here?

I'm not sure. They were not classified as spaces in the very first
version in 4546738 (Unlocalized isspace and friends - 2005-10-13).
Maybe Linus had a reason to do so.

>> +#define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
>> +             GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
>> +             GIT_PATHSPEC_MAGIC))
>
> "Normal" isprint() only includes space (32) from the white-space characters.
> The other white-space characters are not considered printable.
>
> Do we want to stay close to the "original", or not?

We do. I followed [1] but obvious missed the last sentence in "print"
description: "No characters specified for the keyword cntrl shall be
specified". Thanks for catching. I'll fix it soon.

[1] http://pubs.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
-- 
Duy

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-10-17 12:09           ` "Jan H. Schönherr"
  2012-10-17 12:26             ` Nguyen Thai Ngoc Duy
@ 2012-11-13 10:46             ` Nguyễn Thái Ngọc Duy
  2012-11-13 18:58               ` "Jan H. Schönherr"
                                 ` (4 more replies)
  1 sibling, 5 replies; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-11-13 10:46 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, schnhrr, rene.scharfe,
	Nguyễn Thái Ngọc Duy

Git's ispace does not include 11 and 12. Git's isprint includes
control space characters (10-13). According to glibc-2.14.1 on C
locale on Linux, this is wrong. This patch fixes it.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 I wrote a small C program to compare the result of all is* functions
 that Git replaces against the libc version. These are the only ones that
 differ. Which matches what Jan Schönherr commented.

 ctype.c           |  6 +++---
 git-compat-util.h | 11 ++++++-----
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/ctype.c b/ctype.c
index 0bfebb4..71311a3 100644
--- a/ctype.c
+++ b/ctype.c
@@ -14,11 +14,11 @@ enum {
 	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
 	X = GIT_CNTRL,
 	U = GIT_PUNCT,
-	Z = GIT_CNTRL | GIT_SPACE
+	Z = GIT_CNTRL_SPACE
 };
 
-const unsigned char sane_ctype[256] = {
-	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
+const unsigned int sane_ctype[256] = {
+	X, X, X, X, X, X, X, X, X, Z, Z, Z, Z, Z, X, X,		/*   0.. 15 */
 	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
 	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
 	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
diff --git a/git-compat-util.h b/git-compat-util.h
index 02f48f6..4ed3f94 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -474,8 +474,8 @@ extern const char tolower_trans_tbl[256];
 #undef ispunct
 #undef isxdigit
 #undef isprint
-extern const unsigned char sane_ctype[256];
-#define GIT_SPACE 0x01
+extern const unsigned int sane_ctype[256];
+#define GIT_CNTRL_SPACE 0x01
 #define GIT_DIGIT 0x02
 #define GIT_ALPHA 0x04
 #define GIT_GLOB_SPECIAL 0x08
@@ -483,9 +483,10 @@ extern const unsigned char sane_ctype[256];
 #define GIT_PATHSPEC_MAGIC 0x20
 #define GIT_CNTRL 0x40
 #define GIT_PUNCT 0x80
-#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
+#define GIT_SPACE 0x100
+#define sane_istest(x,mask) ((sane_ctype[(unsigned int)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
-#define isspace(x) sane_istest(x,GIT_SPACE)
+#define isspace(x) sane_istest(x,GIT_SPACE | GIT_CNTRL_SPACE)
 #define isdigit(x) sane_istest(x,GIT_DIGIT)
 #define isalpha(x) sane_istest(x,GIT_ALPHA)
 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
@@ -493,7 +494,7 @@ extern const unsigned char sane_ctype[256];
 #define isupper(x) sane_iscase(x, 0)
 #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
 #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
-#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
+#define iscntrl(x) (sane_istest(x,GIT_CNTRL | GIT_CNTRL_SPACE))
 #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
 		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
 #define isxdigit(x) (hexval_table[x] != -1)
-- 
1.8.0.rc2.23.g1fb49df

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
@ 2012-11-13 18:58               ` "Jan H. Schönherr"
  2012-11-13 19:14               ` René Scharfe
                                 ` (3 subsequent siblings)
  4 siblings, 0 replies; 37+ messages in thread
From: "Jan H. Schönherr" @ 2012-11-13 18:58 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Junio C Hamano, rene.scharfe

Hi.

Am 13.11.2012 11:46, schrieb Nguyễn Thái Ngọc Duy:
> Git's ispace does not include 11 and 12. Git's isprint includes
> control space characters (10-13). According to glibc-2.14.1 on C
> locale on Linux, this is wrong. This patch fixes it.
> 
> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
> ---
>  I wrote a small C program to compare the result of all is* functions
>  that Git replaces against the libc version. These are the only ones that
>  differ. Which matches what Jan Schönherr commented.
> 
>  ctype.c           |  6 +++---
>  git-compat-util.h | 11 ++++++-----
>  2 files changed, 9 insertions(+), 8 deletions(-)
> 
> diff --git a/ctype.c b/ctype.c
> index 0bfebb4..71311a3 100644
> --- a/ctype.c
> +++ b/ctype.c
> @@ -14,11 +14,11 @@ enum {
>  	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
>  	X = GIT_CNTRL,
>  	U = GIT_PUNCT,
> -	Z = GIT_CNTRL | GIT_SPACE
> +	Z = GIT_CNTRL_SPACE
>  };
>  
> -const unsigned char sane_ctype[256] = {
> -	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
> +const unsigned int sane_ctype[256] = {
> +	X, X, X, X, X, X, X, X, X, Z, Z, Z, Z, Z, X, X,		/*   0.. 15 */
>  	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
>  	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
>  	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */

An alternative to switching from 1-byte to 4-byte values (don't we have
a 2-byte datatype?), would be to free up GIT_CNTRL and simply do:

#define iscntrl(x) ((x) < 0x20)


> diff --git a/git-compat-util.h b/git-compat-util.h
> index 02f48f6..4ed3f94 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
[...]
> @@ -483,9 +483,10 @@ extern const unsigned char sane_ctype[256];
>  #define GIT_PATHSPEC_MAGIC 0x20
>  #define GIT_CNTRL 0x40
>  #define GIT_PUNCT 0x80
> -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
> +#define GIT_SPACE 0x100
> +#define sane_istest(x,mask) ((sane_ctype[(unsigned int)(x)] & (mask)) != 0)

That should better be left "(unsigned char)"? We might access values after the
array otherwise.

(That said, it wasn't really correct before either, when there really is a
possibility that x >= 0x100.)

Regards
Jan

PS: It looks like my isprint() version was given precedence over your
isprint() version during the merge into next. That should also be sorted out,
but I've no idea which one is actually better: two comparisons versus one
cache lookup and a bitop... (though my guess is that comparisons are cheaper,
but then we should also convert isdigit()...)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
  2012-11-13 18:58               ` "Jan H. Schönherr"
@ 2012-11-13 19:14               ` René Scharfe
  2012-11-13 19:15               ` René Scharfe
                                 ` (2 subsequent siblings)
  4 siblings, 0 replies; 37+ messages in thread
From: René Scharfe @ 2012-11-13 19:14 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy; +Cc: git, Junio C Hamano, schnhrr

Am 13.11.2012 11:46, schrieb Nguyễn Thái Ngọc Duy:
> Git's isprint includes
> control space characters (10-13). According to glibc-2.14.1 on C
> locale on Linux, this is wrong. This patch fixes it.

isprint() is not in master, yet.  Can we perhaps still introduce it in 
such a way that we never have an incorrect version in master's history?

And could you please update test-ctype.c to match the change to 
isspace()?  The tests there just documented the status quo before I made 
changes to ctype.c long ago, so it's definitions are just as correct (or 
wrong) as the original implementation.

Thanks,
René

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
  2012-11-13 18:58               ` "Jan H. Schönherr"
  2012-11-13 19:14               ` René Scharfe
@ 2012-11-13 19:15               ` René Scharfe
  2012-11-13 19:40                 ` Linus Torvalds
  2012-11-13 19:41               ` Johannes Sixt
  2012-11-15 12:19               ` [PATCH] wildmatch: correct " Nguyễn Thái Ngọc Duy
  4 siblings, 1 reply; 37+ messages in thread
From: René Scharfe @ 2012-11-13 19:15 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy, Linus Torvalds
  Cc: git, Junio C Hamano, schnhrr

Am 13.11.2012 11:46, schrieb Nguyễn Thái Ngọc Duy:
> Git's ispace does not include 11 and 12.  [...]
 > According to glibc-2.14.1 on C locale on Linux, this is wrong.

11 and 12 being vertical tab (\v) and form-feed (\f).  This lack goes 
back to the introduction of git's own character classifier macros seven 
years ago in 4546738b (Unlocalized isspace and friends).

Linus, do you remember if you left them out on purpose?

Thanks,
René

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 19:15               ` René Scharfe
@ 2012-11-13 19:40                 ` Linus Torvalds
  2012-11-13 19:50                   ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2012-11-13 19:40 UTC (permalink / raw)
  To: René Scharfe
  Cc: Nguyễn Thái Ngọc Duy, Git Mailing List,
	Junio C Hamano, schnhrr

On Tue, Nov 13, 2012 at 11:15 AM, René Scharfe
<rene.scharfe@lsrfire.ath.cx> wrote:
>
> Linus, do you remember if you left them out on purpose?

Umm, no.

I have to wonder why you care? As far as I'm concerned, the only valid
space is space, TAB and CR/LF.

Anything else is *noise*, not space. What's the reason for even caring?

                  Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
                                 ` (2 preceding siblings ...)
  2012-11-13 19:15               ` René Scharfe
@ 2012-11-13 19:41               ` Johannes Sixt
  2012-11-15 12:19               ` [PATCH] wildmatch: correct " Nguyễn Thái Ngọc Duy
  4 siblings, 0 replies; 37+ messages in thread
From: Johannes Sixt @ 2012-11-13 19:41 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: git, Junio C Hamano, schnhrr, rene.scharfe

Am 13.11.2012 11:46, schrieb Nguyễn Thái Ngọc Duy:
> @@ -14,11 +14,11 @@ enum {
>  	P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
>  	X = GIT_CNTRL,
>  	U = GIT_PUNCT,
> -	Z = GIT_CNTRL | GIT_SPACE
> +	Z = GIT_CNTRL_SPACE
>  };
>  
> -const unsigned char sane_ctype[256] = {
> -	X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,		/*   0.. 15 */
> +const unsigned int sane_ctype[256] = {
> +	X, X, X, X, X, X, X, X, X, Z, Z, Z, Z, Z, X, X,		/*   0.. 15 */
>  	X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,		/*  16.. 31 */
>  	S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,		/*  32.. 47 */
>  	D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,		/*  48.. 63 */
> diff --git a/git-compat-util.h b/git-compat-util.h
> index 02f48f6..4ed3f94 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -474,8 +474,8 @@ extern const char tolower_trans_tbl[256];
>  #undef ispunct
>  #undef isxdigit
>  #undef isprint
> -extern const unsigned char sane_ctype[256];
> -#define GIT_SPACE 0x01
> +extern const unsigned int sane_ctype[256];
> +#define GIT_CNTRL_SPACE 0x01
>  #define GIT_DIGIT 0x02
>  #define GIT_ALPHA 0x04
>  #define GIT_GLOB_SPECIAL 0x08
> @@ -483,9 +483,10 @@ extern const unsigned char sane_ctype[256];
>  #define GIT_PATHSPEC_MAGIC 0x20
>  #define GIT_CNTRL 0x40
>  #define GIT_PUNCT 0x80
> -#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
> +#define GIT_SPACE 0x100
> +#define sane_istest(x,mask) ((sane_ctype[(unsigned int)(x)] & (mask)) != 0)
>  #define isascii(x) (((x) & ~0x7f) == 0)
> -#define isspace(x) sane_istest(x,GIT_SPACE)
> +#define isspace(x) sane_istest(x,GIT_SPACE | GIT_CNTRL_SPACE)
>  #define isdigit(x) sane_istest(x,GIT_DIGIT)
>  #define isalpha(x) sane_istest(x,GIT_ALPHA)
>  #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
> @@ -493,7 +494,7 @@ extern const unsigned char sane_ctype[256];
>  #define isupper(x) sane_iscase(x, 0)
>  #define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
>  #define is_regex_special(x) sane_istest(x,GIT_GLOB_SPECIAL | GIT_REGEX_SPECIAL)
> -#define iscntrl(x) (sane_istest(x,GIT_CNTRL))
> +#define iscntrl(x) (sane_istest(x,GIT_CNTRL | GIT_CNTRL_SPACE))
>  #define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \
>  		GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC)
>  #define isxdigit(x) (hexval_table[x] != -1)

So we have two properties that overlap:

      SSSSSSSSSS
   CCCCCCCC

You seem to generate partions:

   XXXYYYYYZZZZZ

then assign individual bits to each partition. Now each entry in the
lookup table has only one bit set. Then you define isxxx() to check for
one of the two possible bits:

   iscntrl is X or Y
   isspace is Y or Z

But shouldn't you just assign one bit for S and another one for C, have
entries in the lookup table with more than one bit set, and check for
only one bit in the isxxx macro?

That way you don't run out of bits as easily as you do with this patch.

-- Hannes

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 19:40                 ` Linus Torvalds
@ 2012-11-13 19:50                   ` Linus Torvalds
  2012-11-14 19:30                     ` René Scharfe
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2012-11-13 19:50 UTC (permalink / raw)
  To: René Scharfe
  Cc: Nguyễn Thái Ngọc Duy, Git Mailing List,
	Junio C Hamano, schnhrr

On Tue, Nov 13, 2012 at 11:40 AM, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
>
> I have to wonder why you care? As far as I'm concerned, the only valid
> space is space, TAB and CR/LF.
>
> Anything else is *noise*, not space. What's the reason for even caring?

Btw, expanding the whitespace selection may actually be very
counter-productive. It is used primarily for things like removing
extraneous space at the end of lines etc, and for that, the current
selection of SPACE, TAB and LF/CR is the right thing to do.

Adding things like FF etc - that are *technically* whitespace, but
aren't the normal kind of silent whitespace - is potentially going to
change things too much. People might *want* a form-feed in their
messages, for all we know.

So I really object to changing things "just because". There's a reason
we do our own ctype.c: it avoids the crazy crap. It avoids the idiotic
localization issues, and it avoids the ambiguous cases.

So just let it be, unless you have some major real reason to actually
care about a real-world case. And if you do, please explain it. Don't
change things just because.

               Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH nd/wildmatch] Correct Git's version of isprint and isspace
  2012-11-13 19:50                   ` Linus Torvalds
@ 2012-11-14 19:30                     ` René Scharfe
  0 siblings, 0 replies; 37+ messages in thread
From: René Scharfe @ 2012-11-14 19:30 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Nguyễn Thái Ngọc Duy, Git Mailing List,
	Junio C Hamano, schnhrr

Am 13.11.2012 20:50, schrieb Linus Torvalds:
> On Tue, Nov 13, 2012 at 11:40 AM, Linus Torvalds
> <torvalds@linux-foundation.org> wrote:
>>
>> I have to wonder why you care? As far as I'm concerned, the only valid
>> space is space, TAB and CR/LF.
>>
>> Anything else is *noise*, not space. What's the reason for even caring?
>
> Btw, expanding the whitespace selection may actually be very
> counter-productive. It is used primarily for things like removing
> extraneous space at the end of lines etc, and for that, the current
> selection of SPACE, TAB and LF/CR is the right thing to do.
>
> Adding things like FF etc - that are *technically* whitespace, but
> aren't the normal kind of silent whitespace - is potentially going to
> change things too much. People might *want* a form-feed in their
> messages, for all we know.

The patch was motivated by the integration of the wildmatch library, 
which exposes named character classes to users.  It replaces a call of 
fnmatch in match_pathname.  Users probably expect [:space:] to mean the 
same in git as in other programs.

I never saw a vertical tab and I can't imagine what it's used for.  I'd 
expect form-feeds to be matched as space, though.  Didn't see them very 
often, admittedly.

Nevertheless, it's unfortunate that we have an isspace() that *almost* 
does what the widely known thing of the same name does.  I'd shy away 
from changing git's version directly, because it's used more than a 
hundred times in the code, and estimating the impact of adding \v and \f 
to it.  Perhaps renaming it to isgitspace() is a good first step, 
followed by adding a "standard" version of isspace() for wildmatch?

René

^ permalink raw reply	[flat|nested] 37+ messages in thread

* [PATCH] wildmatch: correct isprint and isspace
  2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
                                 ` (3 preceding siblings ...)
  2012-11-13 19:41               ` Johannes Sixt
@ 2012-11-15 12:19               ` Nguyễn Thái Ngọc Duy
  2012-11-15 17:13                 ` "Jan H. Schönherr"
  4 siblings, 1 reply; 37+ messages in thread
From: Nguyễn Thái Ngọc Duy @ 2012-11-15 12:19 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, schnhrr, rene.scharfe, Johannes Sixt, torvalds,
	Nguyễn Thái Ngọc Duy

Current isprint() incorrectly includes control characters 9, 10 and
13, which is fixed by this patch.

Current isspace() lacks 11 and 12. But Git's isspace() has been
designed this way since the beginning and has over 100 call sites
relying on this. Instead of updating isspace() behavior (which could be
tricky as patches from other topics may come in parallel that assume
the old isspace()), a new isspace_posix() is introduced and used by
wildmatch.c. Other part of Git can be converted to use this new
function if it seems appropriate.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 Sorry for the late response. I'll reply to everybody in one mail.

 On Wed, Nov 14, 2012 at 1:58 AM, "Jan H. Schönherr" <schnhrr@cs.tu-berlin.de> wrote:
 > An alternative to switching from 1-byte to 4-byte values (don't we have
 > a 2-byte datatype?), would be to free up GIT_CNTRL and simply do:
 >
 > #define iscntrl(x) ((x) < 0x20)
 
 No. 127 is also a control character.

 On Wed, Nov 14, 2012 at 2:41 AM, Johannes Sixt <j6t@kdbg.org> wrote:
 > So we have two properties that overlap:
 >
 >       SSSSSSSSSS
 >    CCCCCCCC
 >
 > You seem to generate partions:
 >
 >    XXXYYYYYZZZZZ
 >
 > then assign individual bits to each partition. Now each entry in the
 > lookup table has only one bit set. Then you define isxxx() to check for
 > one of the two possible bits:
 >
 >    iscntrl is X or Y
 >    isspace is Y or Z
 >
 > But shouldn't you just assign one bit for S and another one for C, have
 > entries in the lookup table with more than one bit set, and check for
 > only one bit in the isxxx macro?
 >
 > That way you don't run out of bits as easily as you do with this patch.

 I need three sets of characters actually: control, spaces and
 printable (which contains non-control spaces). Making it
 (isspace(x) && (x) >= 32) is simpler and because isprint() is only used in
 wildmatch, I don't need to think about performance penalty (yet).

 On Thu, Nov 15, 2012 at 2:30 AM, René Scharfe <rene.scharfe@lsrfire.ath.cx> wrote:
 > Nevertheless, it's unfortunate that we have an isspace() that *almost* does
 > what the widely known thing of the same name does.  I'd shy away from
 > changing git's version directly, because it's used more than a hundred times
 > in the code, and estimating the impact of adding \v and \f to it.
 > Perhaps renaming it to isgitspace() is a good first step, followed by
 > adding a "standard" version of isspace() for wildmatch?

 There are just too many call sites of isspace() and there is a risk
 of new call sites coming in independently. So I think keeping isspace()
 as-is and using a different name for the standard version is probably
 a better choice.

 As the new isspace_posix() is only used by wildmatch, its performance
 as of now is not critical and a simple macro like in this patch is
 probably enough. We can optimize it later if we need to.

 git-compat-util.h | 4 +++-
 wildmatch.c       | 8 ++------
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index 02f48f6..d4c3fda 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -486,6 +486,7 @@ extern const unsigned char sane_ctype[256];
 #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
 #define isascii(x) (((x) & ~0x7f) == 0)
 #define isspace(x) sane_istest(x,GIT_SPACE)
+#define isspace_posix(x) (((x) >= 9 && (x) <= 13) || (x) == 32)
 #define isdigit(x) sane_istest(x,GIT_DIGIT)
 #define isalpha(x) sane_istest(x,GIT_ALPHA)
 #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
@@ -499,7 +500,8 @@ extern const unsigned char sane_ctype[256];
 #define isxdigit(x) (hexval_table[x] != -1)
 #define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
 		GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
-		GIT_PATHSPEC_MAGIC))
+		GIT_PATHSPEC_MAGIC) && \
+		(x) >= 32)
 #define tolower(x) sane_case((unsigned char)(x), 0x20)
 #define toupper(x) sane_case((unsigned char)(x), 0)
 #define is_pathspec_magic(x) sane_istest(x,GIT_PATHSPEC_MAGIC)
diff --git a/wildmatch.c b/wildmatch.c
index 3972e26..fd74efd 100644
--- a/wildmatch.c
+++ b/wildmatch.c
@@ -37,11 +37,7 @@ typedef unsigned char uchar;
 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 #endif
 
-#ifdef isgraph
-# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
-#else
-# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
-#endif
+#define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace_posix(c))
 
 #define ISPRINT(c) (ISASCII(c) && isprint(c))
 #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
@@ -50,7 +46,7 @@ typedef unsigned char uchar;
 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
 #define ISLOWER(c) (ISASCII(c) && islower(c))
 #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
-#define ISSPACE(c) (ISASCII(c) && isspace(c))
+#define ISSPACE(c) (ISASCII(c) && isspace_posix(c))
 #define ISUPPER(c) (ISASCII(c) && isupper(c))
 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
 
-- 
1.8.0.rc2.23.g1fb49df

^ permalink raw reply related	[flat|nested] 37+ messages in thread

* Re: [PATCH] wildmatch: correct isprint and isspace
  2012-11-15 12:19               ` [PATCH] wildmatch: correct " Nguyễn Thái Ngọc Duy
@ 2012-11-15 17:13                 ` "Jan H. Schönherr"
  2012-11-16  4:19                   ` Nguyen Thai Ngoc Duy
  0 siblings, 1 reply; 37+ messages in thread
From: "Jan H. Schönherr" @ 2012-11-15 17:13 UTC (permalink / raw)
  To: Nguyễn Thái Ngọc Duy
  Cc: git, Junio C Hamano, rene.scharfe, Johannes Sixt, torvalds

Am 15.11.2012 13:19, schrieb Nguyễn Thái Ngọc Duy:
>  On Thu, Nov 15, 2012 at 2:30 AM, René Scharfe <rene.scharfe@lsrfire.ath.cx> wrote:
>  > Nevertheless, it's unfortunate that we have an isspace() that *almost* does
>  > what the widely known thing of the same name does.  I'd shy away from
>  > changing git's version directly, because it's used more than a hundred times
>  > in the code, and estimating the impact of adding \v and \f to it.
>  > Perhaps renaming it to isgitspace() is a good first step, followed by
>  > adding a "standard" version of isspace() for wildmatch?
> 
>  There are just too many call sites of isspace() and there is a risk
>  of new call sites coming in independently. So I think keeping isspace()
>  as-is and using a different name for the standard version is probably
>  a better choice.

After having a closer look, where wildmatch is actually used -- matching
filenames -- and I've not yet seen \v or \f in a filename, it's possibly
unnecessary to do anything about isspace() right now.

(It's probably more an issue that filenames can be localized, and we only
support unlocalized character classes.)

> diff --git a/git-compat-util.h b/git-compat-util.h
> index 02f48f6..d4c3fda 100644
> --- a/git-compat-util.h
> +++ b/git-compat-util.h
> @@ -486,6 +486,7 @@ extern const unsigned char sane_ctype[256];
>  #define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
>  #define isascii(x) (((x) & ~0x7f) == 0)
>  #define isspace(x) sane_istest(x,GIT_SPACE)
> +#define isspace_posix(x) (((x) >= 9 && (x) <= 13) || (x) == 32)
>  #define isdigit(x) sane_istest(x,GIT_DIGIT)
>  #define isalpha(x) sane_istest(x,GIT_ALPHA)
>  #define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT)
> @@ -499,7 +500,8 @@ extern const unsigned char sane_ctype[256];
>  #define isxdigit(x) (hexval_table[x] != -1)

This was from a previous patch, but maybe: "hexval_table[(unsigned char)x]"

>  #define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
>  		GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
> -		GIT_PATHSPEC_MAGIC))
> +		GIT_PATHSPEC_MAGIC) && \
> +		(x) >= 32)

May I suggest the current is_print() implementation in master:

#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)


To summarize my opinion:

I no longer see a reason to correct isspace() (unless somebody with an actual
use case complains), and a more POSIXly isprint() is already in master.

=> Nothing to do. :)

Regards
Jan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [PATCH] wildmatch: correct isprint and isspace
  2012-11-15 17:13                 ` "Jan H. Schönherr"
@ 2012-11-16  4:19                   ` Nguyen Thai Ngoc Duy
  0 siblings, 0 replies; 37+ messages in thread
From: Nguyen Thai Ngoc Duy @ 2012-11-16  4:19 UTC (permalink / raw)
  To: Jan H. Schönherr
  Cc: git, Junio C Hamano, rene.scharfe, Johannes Sixt, torvalds

On Fri, Nov 16, 2012 at 12:13 AM, "Jan H. Schönherr"
<schnhrr@cs.tu-berlin.de> wrote:
>>  #define isprint(x) (sane_istest(x, GIT_ALPHA | GIT_DIGIT | GIT_SPACE | \
>>               GIT_PUNCT | GIT_REGEX_SPECIAL | GIT_GLOB_SPECIAL | \
>> -             GIT_PATHSPEC_MAGIC))
>> +             GIT_PATHSPEC_MAGIC) && \
>> +             (x) >= 32)
>
> May I suggest the current is_print() implementation in master:
>
> #define isprint(x) ((x) >= 0x20 && (x) <= 0x7e)
>
>
> To summarize my opinion:
>
> I no longer see a reason to correct isspace() (unless somebody with an actual
> use case complains), and a more POSIXly isprint() is already in master.
>
> => Nothing to do. :)


Yeah. I remember to remind myself to check "the implementation in
master" you mentioned but I probably failed at that. Just checked that
isprint() is already in master, and your comment about isspace() use
in wildmatch.c makes sense too. So I'm all for doing nothing.
-- 
Duy

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2012-11-16  4:20 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-10-14  2:34 [PATCH v5 00/12] nd/wildmatch Nguyễn Thái Ngọc Duy
2012-10-14  2:34 ` [PATCH v5 01/12] ctype: make sane_ctype[] const array Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 02/12] ctype: support iscntrl, ispunct, isxdigit and isprint Nguyễn Thái Ngọc Duy
2012-10-14  5:02   ` Junio C Hamano
2012-10-14  5:07     ` Nguyen Thai Ngoc Duy
2012-10-14 12:59   ` René Scharfe
2012-10-14 13:25     ` Nguyen Thai Ngoc Duy
2012-10-14 13:59       ` René Scharfe
2012-10-14 14:26         ` Nguyen Thai Ngoc Duy
2012-10-17 12:09           ` "Jan H. Schönherr"
2012-10-17 12:26             ` Nguyen Thai Ngoc Duy
2012-11-13 10:46             ` [PATCH nd/wildmatch] Correct Git's version of isprint and isspace Nguyễn Thái Ngọc Duy
2012-11-13 18:58               ` "Jan H. Schönherr"
2012-11-13 19:14               ` René Scharfe
2012-11-13 19:15               ` René Scharfe
2012-11-13 19:40                 ` Linus Torvalds
2012-11-13 19:50                   ` Linus Torvalds
2012-11-14 19:30                     ` René Scharfe
2012-11-13 19:41               ` Johannes Sixt
2012-11-15 12:19               ` [PATCH] wildmatch: correct " Nguyễn Thái Ngọc Duy
2012-11-15 17:13                 ` "Jan H. Schönherr"
2012-11-16  4:19                   ` Nguyen Thai Ngoc Duy
2012-10-14  2:35 ` [PATCH v5 03/12] Import wildmatch from rsync Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 04/12] wildmatch: remove unnecessary functions Nguyễn Thái Ngọc Duy
2012-10-14  5:04   ` Junio C Hamano
2012-10-14  6:29     ` Nguyen Thai Ngoc Duy
2012-10-14  2:35 ` [PATCH v5 05/12] Integrate wildmatch to git Nguyễn Thái Ngọc Duy
2012-10-14  5:06   ` Junio C Hamano
2012-10-14 11:07   ` Torsten Bögershausen
2012-10-14  2:35 ` [PATCH v5 06/12] t3070: disable unreliable fnmatch tests Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 07/12] wildmatch: make wildmatch's return value compatible with fnmatch Nguyễn Thái Ngọc Duy
2012-10-14  5:09   ` Junio C Hamano
2012-10-14  2:35 ` [PATCH v5 08/12] wildmatch: remove static variable force_lower_case Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 09/12] wildmatch: fix case-insensitive matching Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 10/12] wildmatch: adjust "**" behavior Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 11/12] wildmatch: make /**/ match zero or more directories Nguyễn Thái Ngọc Duy
2012-10-14  2:35 ` [PATCH v5 12/12] Support "**" wildcard in .gitignore and .gitattributes Nguyễn Thái Ngọc Duy

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.