All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Eric Sunshine" <sunshine@sunshineco.com>,
	"Ramsay Jones" <ramsay@ramsay1.demon.co.uk>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v6 00/11] Fix icase grep on non-ascii
Date: Sat,  6 Feb 2016 09:02:59 +0700	[thread overview]
Message-ID: <1454724190-14063-1-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1453982183-24124-1-git-send-email-pclouds@gmail.com>

v6 fixes comments from Ramsay and Eric. Interdiff below. The only
thing to add is, I decided not to replace !icase_non_ascii with
icase_ascii_only. I went with spelling out "!icase || ascii_only". I
think it expresses the intention better.

diff --git a/grep.c b/grep.c
index 2e4f71d..aed4fe0 100644
--- a/grep.c
+++ b/grep.c
@@ -412,11 +412,11 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 	basic_regex_quote_buf(&sb, p->pattern);
 	err = regcomp(&p->regexp, sb.buf, opt->regflags & ~REG_EXTENDED);
 	if (opt->debug)
-		fprintf(stderr, "fixed%s\n", sb.buf);
+		fprintf(stderr, "fixed %s\n", sb.buf);
 	strbuf_release(&sb);
 	if (err) {
 		char errbuf[1024];
-		regerror(err, &p->regexp, errbuf, 1024);
+		regerror(err, &p->regexp, errbuf, sizeof(errbuf));
 		regfree(&p->regexp);
 		compile_regexp_failed(p, errbuf);
 	}
@@ -424,19 +424,18 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
 
 static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 {
-	int icase_non_ascii;
+	int icase, ascii_only;
 	int err;
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
-	icase_non_ascii =
-		(opt->regflags & REG_ICASE || p->ignore_case) &&
-		has_non_ascii(p->pattern);
+	icase	       = opt->regflags & REG_ICASE || p->ignore_case;
+	ascii_only     = !has_non_ascii(p->pattern);
 
-	if (!icase_non_ascii && is_fixed(p->pattern, p->patternlen))
+	if ((!icase || ascii_only) && is_fixed(p->pattern, p->patternlen))
 		p->fixed = 1;
 	else if (opt->fixed) {
-		p->fixed = !icase_non_ascii;
+		p->fixed = !icase || ascii_only;
 		if (!p->fixed) {
 			compile_fixed_regexp(p, opt);
 			return;
diff --git a/t/t0070-fundamental.sh b/t/t0070-fundamental.sh
index 5ed69a6..991ed2a 100755
--- a/t/t0070-fundamental.sh
+++ b/t/t0070-fundamental.sh
@@ -31,7 +31,7 @@ test_expect_success 'git_mkstemps_mode does not fail if fd 0 is not open' '
 
 test_expect_success 'check for a bug in the regex routines' '
 	# if this test fails, re-build git with NO_REGEX=1
-	test-regex
+	test-regex --bug
 '
 
 test_done
diff --git a/t/t7812-grep-icase-non-ascii.sh b/t/t7812-grep-icase-non-ascii.sh
index a5475bb..4176625 100755
--- a/t/t7812-grep-icase-non-ascii.sh
+++ b/t/t7812-grep-icase-non-ascii.sh
@@ -38,12 +38,12 @@ test_expect_success GETTEXT_LOCALE,LIBPCRE 'grep pcre utf-8 string with "+"' '
 test_expect_success REGEX_LOCALE 'grep literal string, with -F' '
 	git grep --debug -i -F "TILRAUN: Halló Heimur!"  2>&1 >/dev/null |
 		 grep fixed >debug1 &&
-	echo "fixedTILRAUN: Halló Heimur!" >expect1 &&
+	echo "fixed TILRAUN: Halló Heimur!" >expect1 &&
 	test_cmp expect1 debug1 &&
 
 	git grep --debug -i -F "TILRAUN: HALLÓ HEIMUR!"  2>&1 >/dev/null |
 		 grep fixed >debug2 &&
-	echo "fixedTILRAUN: HALLÓ HEIMUR!" >expect2 &&
+	echo "fixed TILRAUN: HALLÓ HEIMUR!" >expect2 &&
 	test_cmp expect2 debug2
 '
 
@@ -52,12 +52,12 @@ test_expect_success REGEX_LOCALE 'grep string with regex, with -F' '
 
 	git grep --debug -i -F "^*TILR^AUN:.* \\Halló \$He[]imur!\$" 2>&1 >/dev/null |
 		 grep fixed >debug1 &&
-	echo "fixed\\^*TILR^AUN:\\.\\* \\\\Halló \$He\\[]imur!\\\$" >expect1 &&
+	echo "fixed \\^*TILR^AUN:\\.\\* \\\\Halló \$He\\[]imur!\\\$" >expect1 &&
 	test_cmp expect1 debug1 &&
 
 	git grep --debug -i -F "^*TILR^AUN:.* \\HALLÓ \$HE[]IMUR!\$"  2>&1 >/dev/null |
 		 grep fixed >debug2 &&
-	echo "fixed\\^*TILR^AUN:\\.\\* \\\\HALLÓ \$HE\\[]IMUR!\\\$" >expect2 &&
+	echo "fixed \\^*TILR^AUN:\\.\\* \\\\HALLÓ \$HE\\[]IMUR!\\\$" >expect2 &&
 	test_cmp expect2 debug2
 '
 
diff --git a/test-regex.c b/test-regex.c
index 3b5641c..d1a952c 100644
--- a/test-regex.c
+++ b/test-regex.c
@@ -17,6 +17,25 @@ static struct reg_flag reg_flags[] = {
 	{ NULL, 0 }
 };
 
+static int test_regex_bug(void)
+{
+	char *pat = "[^={} \t]+";
+	char *str = "={}\nfred";
+	regex_t r;
+	regmatch_t m[1];
+
+	if (regcomp(&r, pat, REG_EXTENDED | REG_NEWLINE))
+		die("failed regcomp() for pattern '%s'", pat);
+	if (regexec(&r, str, 1, m, 0))
+		die("no match of pattern '%s' to string '%s'", pat, str);
+
+	/* http://sourceware.org/bugzilla/show_bug.cgi?id=3957  */
+	if (m[0].rm_so == 3) /* matches '\n' when it should not */
+		die("regex bug confirmed: re-build git with NO_REGEX=1");
+
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	const char *pat;
@@ -25,40 +44,32 @@ int main(int argc, char **argv)
 	regex_t r;
 	regmatch_t m[1];
 
-	if (argc == 1) {
-		/* special case, bug check */
-		pat = "[^={} \t]+";
-		str = "={}\nfred";
-		flags = REG_EXTENDED | REG_NEWLINE;
-	} else {
+	if (argc == 2 && !strcmp(argv[1], "--bug"))
+		return test_regex_bug();
+	else if (argc < 3)
+		die("usage: test-regex --bug\n"
+		    "       test-regex <pattern> <string> [<options>]");
+
+	argv++;
+	pat = *argv++;
+	str = *argv++;
+	while (*argv) {
+		struct reg_flag *rf;
+		for (rf = reg_flags; rf->name; rf++)
+			if (!strcmp(*argv, rf->name)) {
+				flags |= rf->flag;
+				break;
+			}
+		if (!rf->name)
+			die("do not recognize %s", *argv);
 		argv++;
-		pat = *argv++;
-		str = *argv++;
-		while (*argv) {
-			struct reg_flag *rf;
-			for (rf = reg_flags; rf->name; rf++)
-				if (!strcmp(*argv, rf->name)) {
-					flags |= rf->flag;
-					break;
-				}
-			if (!rf->name)
-				die("do not recognize %s", *argv);
-			argv++;
-		}
-		git_setup_gettext();
 	}
+	git_setup_gettext();
 
 	if (regcomp(&r, pat, flags))
 		die("failed regcomp() for pattern '%s'", pat);
-	if (regexec(&r, str, 1, m, 0)) {
-		if (argc == 1)
-			die("no match of pattern '%s' to string '%s'", pat, str);
+	if (regexec(&r, str, 1, m, 0))
 		return 1;
-	}
-
-	/* http://sourceware.org/bugzilla/show_bug.cgi?id=3957  */
-	if (argc == 1 && m[0].rm_so == 3) /* matches '\n' when it should not */
-		die("regex bug confirmed: re-build git with NO_REGEX=1");
 
-	exit(0);
+	return 0;
 }
-- 
2.7.0.377.g4cd97dd

  parent reply	other threads:[~2016-02-06  2:03 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-28 11:56 [PATCH v5 00/10] Fix icase grep on non-ascii Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 01/10] grep: allow -F -i combination Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 02/10] grep: break down an "if" stmt in preparation for next changes Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 03/10] test-regex: expose full regcomp() to the command line Nguyễn Thái Ngọc Duy
2016-01-29  5:31   ` Eric Sunshine
2016-01-29 14:29     ` Ramsay Jones
2016-01-28 11:56 ` [PATCH v5 04/10] grep/icase: avoid kwsset on literal non-ascii strings Nguyễn Thái Ngọc Duy
2016-01-29  6:18   ` Eric Sunshine
2016-01-29  6:41     ` Eric Sunshine
2016-01-28 11:56 ` [PATCH v5 05/10] grep/icase: avoid kwsset when -F is specified Nguyễn Thái Ngọc Duy
2016-01-29  6:23   ` Eric Sunshine
2016-01-28 11:56 ` [PATCH v5 06/10] grep/pcre: prepare locale-dependent tables for icase matching Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 07/10] gettext: add is_utf8_locale() Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 08/10] grep/pcre: support utf-8 Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 09/10] diffcore-pickaxe: "share" regex error handling code Nguyễn Thái Ngọc Duy
2016-01-28 11:56 ` [PATCH v5 10/10] diffcore-pickaxe: support case insensitive match on non-ascii Nguyễn Thái Ngọc Duy
2016-01-29  6:38   ` Eric Sunshine
2016-01-28 23:54 ` [PATCH v5 00/10] Fix icase grep " Junio C Hamano
2016-02-06  2:02 ` Nguyễn Thái Ngọc Duy [this message]
2016-02-06  2:03   ` [PATCH v6 01/11] grep: allow -F -i combination Nguyễn Thái Ngọc Duy
2016-06-17 21:54     ` Junio C Hamano
2016-06-18  0:07       ` Duy Nguyen
2016-02-06  2:03   ` [PATCH v6 02/11] grep: break down an "if" stmt in preparation for next changes Nguyễn Thái Ngọc Duy
2016-02-09 18:20     ` Junio C Hamano
2016-02-06  2:03   ` [PATCH v6 03/11] test-regex: isolate the bug test code Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 04/11] test-regex: expose full regcomp() to the command line Nguyễn Thái Ngọc Duy
2016-02-07  8:44     ` Eric Sunshine
2016-02-09 18:21       ` Junio C Hamano
2016-02-06  2:03   ` [PATCH v6 05/11] grep/icase: avoid kwsset on literal non-ascii strings Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 06/11] grep/icase: avoid kwsset when -F is specified Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 07/11] grep/pcre: prepare locale-dependent tables for icase matching Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 08/11] gettext: add is_utf8_locale() Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 09/11] grep/pcre: support utf-8 Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 10/11] diffcore-pickaxe: "share" regex error handling code Nguyễn Thái Ngọc Duy
2016-02-06  2:03   ` [PATCH v6 11/11] diffcore-pickaxe: support case insensitive match on non-ascii Nguyễn Thái Ngọc Duy
2016-02-07  8:48   ` [PATCH v6 00/11] Fix icase grep " Eric Sunshine
2016-02-14 11:49   ` [PATCH v7 00/12] nd/icase updates Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 01/12] grep: allow -F -i combination Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 02/12] grep: break down an "if" stmt in preparation for next changes Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 03/12] test-regex: isolate the bug test code Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 04/12] test-regex: expose full regcomp() to the command line Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 05/12] grep/icase: avoid kwsset on literal non-ascii strings Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 06/12] grep/icase: avoid kwsset when -F is specified Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 07/12] grep/pcre: prepare locale-dependent tables for icase matching Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 08/12] gettext: add is_utf8_locale() Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 09/12] grep/pcre: support utf-8 Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 10/12] diffcore-pickaxe: "share" regex error handling code Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 11/12] diffcore-pickaxe: support case insensitive match on non-ascii Nguyễn Thái Ngọc Duy
2016-02-14 11:49     ` [PATCH v7 12/12] grep.c: reuse "icase" variable Nguyễn Thái Ngọc Duy
2016-06-17 23:17   ` [PATCH v6 00/11] Fix icase grep on non-ascii Junio C Hamano
2016-06-18  0:26     ` Duy Nguyen
2016-06-22 18:29       ` Duy Nguyen
2016-06-22 18:36         ` Junio C Hamano
2016-06-22 18:41           ` Duy Nguyen
2016-06-22 18:59             ` Junio C Hamano
2016-06-22 19:32               ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1454724190-14063-1-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ramsay@ramsay1.demon.co.uk \
    --cc=sunshine@sunshineco.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.