All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
To: git@vger.kernel.org
Cc: git-packagers@googlegroups.com, gitgitgadget@gmail.com,
	gitster@pobox.com, johannes.schindelin@gmx.de, peff@peff.net,
	sandals@crustytoothpaste.net, szeder.dev@gmail.com,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>
Subject: [RFC/PATCH 6/7] grep: remove the kwset optimization
Date: Wed, 26 Jun 2019 02:03:28 +0200	[thread overview]
Message-ID: <20190626000329.32475-7-avarab@gmail.com> (raw)
In-Reply-To: <87r27u8pie.fsf@evledraar.gmail.com>

A later change will replace this optimization with a different one,
but as removing it and running the tests demonstrates no grep
semantics depend on this backend anymore.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
---
 grep.c | 63 +++-------------------------------------------------------
 grep.h |  2 --
 2 files changed, 3 insertions(+), 62 deletions(-)

diff --git a/grep.c b/grep.c
index 14570c7ac1..4716217837 100644
--- a/grep.c
+++ b/grep.c
@@ -356,18 +356,6 @@ static NORETURN void compile_regexp_failed(const struct grep_pat *p,
 	die("%s'%s': %s", where, p->pattern, error);
 }
 
-static int is_fixed(const char *s, size_t len)
-{
-	size_t i;
-
-	for (i = 0; i < len; i++) {
-		if (is_regex_special(s[i]))
-			return 0;
-	}
-
-	return 1;
-}
-
 #ifdef USE_LIBPCRE1
 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
 {
@@ -643,38 +631,12 @@ static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
 
 	p->word_regexp = opt->word_regexp;
 	p->ignore_case = opt->ignore_case;
+	p->fixed = opt->fixed;
 
 	if (memchr(p->pattern, 0, p->patternlen) && !opt->pcre2)
 		die(_("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2"));
 
-	/*
-	 * Even when -F (fixed) asks us to do a non-regexp search, we
-	 * may not be able to correctly case-fold when -i
-	 * (ignore-case) is asked (in which case, we'll synthesize a
-	 * regexp to match the pattern that matches regexp special
-	 * characters literally, while ignoring case differences).  On
-	 * the other hand, even without -F, if the pattern does not
-	 * have any regexp special characters and there is no need for
-	 * case-folding search, we can internally turn it into a
-	 * simple string match using kws.  p->fixed tells us if we
-	 * want to use kws.
-	 */
-	if (opt->fixed || is_fixed(p->pattern, p->patternlen))
-		p->fixed = !p->ignore_case || !has_non_ascii(p->pattern);
-
-	if (p->fixed) {
-		p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
-		kwsincr(p->kws, p->pattern, p->patternlen);
-		kwsprep(p->kws);
-		return;
-	}
-
 	if (opt->fixed) {
-		/*
-		 * We come here when the pattern has the non-ascii
-		 * characters we cannot case-fold, and asked to
-		 * ignore-case.
-		 */
 		compile_fixed_regexp(p, opt);
 		return;
 	}
@@ -1042,9 +1004,7 @@ void free_grep_patterns(struct grep_opt *opt)
 		case GREP_PATTERN: /* atom */
 		case GREP_PATTERN_HEAD:
 		case GREP_PATTERN_BODY:
-			if (p->kws)
-				kwsfree(p->kws);
-			else if (p->pcre1_regexp)
+			if (p->pcre1_regexp)
 				free_pcre1_regexp(p);
 			else if (p->pcre2_pattern)
 				free_pcre2_pattern(p);
@@ -1104,29 +1064,12 @@ static void show_name(struct grep_opt *opt, const char *name)
 	opt->output(opt, opt->null_following_name ? "\0" : "\n", 1);
 }
 
-static int fixmatch(struct grep_pat *p, char *line, char *eol,
-		    regmatch_t *match)
-{
-	struct kwsmatch kwsm;
-	size_t offset = kwsexec(p->kws, line, eol - line, &kwsm);
-	if (offset == -1) {
-		match->rm_so = match->rm_eo = -1;
-		return REG_NOMATCH;
-	} else {
-		match->rm_so = offset;
-		match->rm_eo = match->rm_so + kwsm.size[0];
-		return 0;
-	}
-}
-
 static int patmatch(struct grep_pat *p, char *line, char *eol,
 		    regmatch_t *match, int eflags)
 {
 	int hit;
 
-	if (p->fixed)
-		hit = !fixmatch(p, line, eol, match);
-	else if (p->pcre1_regexp)
+	if (p->pcre1_regexp)
 		hit = !pcre1match(p, line, eol, match, eflags);
 	else if (p->pcre2_pattern)
 		hit = !pcre2match(p, line, eol, match, eflags);
diff --git a/grep.h b/grep.h
index 1875880f37..90ca435aad 100644
--- a/grep.h
+++ b/grep.h
@@ -32,7 +32,6 @@ typedef int pcre2_compile_context;
 typedef int pcre2_match_context;
 typedef int pcre2_jit_stack;
 #endif
-#include "kwset.h"
 #include "thread-utils.h"
 #include "userdiff.h"
 
@@ -97,7 +96,6 @@ struct grep_pat {
 	pcre2_match_context *pcre2_match_context;
 	pcre2_jit_stack *pcre2_jit_stack;
 	uint32_t pcre2_jit_on;
-	kwset_t kws;
 	unsigned fixed:1;
 	unsigned ignore_case:1;
 	unsigned word_regexp:1;
-- 
2.22.0.455.g172b71a6c5


  parent reply	other threads:[~2019-06-26  0:04 UTC|newest]

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-13 11:49 [PATCH 0/4] Support building with GCC v8.x/v9.x Johannes Schindelin via GitGitGadget
2019-06-13 11:49 ` [PATCH 1/4] poll (mingw): allow compiling with GCC 8 and DEVELOPER=1 Johannes Schindelin via GitGitGadget
2019-06-13 11:49 ` [PATCH 2/4] kwset: allow building with GCC 8 Johannes Schindelin via GitGitGadget
2019-06-13 16:11   ` Junio C Hamano
2019-06-14  9:53   ` SZEDER Gábor
2019-06-14 10:00     ` [RFC/PATCH v1 0/4] compat/obstack: update from upstream SZEDER Gábor
2019-06-14 10:00       ` [PATCH v1 1/4] " SZEDER Gábor
2019-06-14 10:00       ` [PATCH v1 2/4] SQUASH??? compat/obstack: fix portability issues SZEDER Gábor
2019-06-14 10:00       ` [PATCH v1 3/4] SQUASH??? compat/obstack: fix build errors with Clang SZEDER Gábor
2019-06-14 10:00       ` [PATCH v1 4/4] compat/obstack: fix some sparse warnings SZEDER Gábor
2019-06-14 17:57       ` [RFC/PATCH v1 0/4] compat/obstack: update from upstream Jeff King
2019-06-14 18:19       ` Junio C Hamano
2019-06-14 20:30       ` Ramsay Jones
2019-06-14 21:24         ` Ramsay Jones
2019-06-17 18:36         ` SZEDER Gábor
2019-06-14 16:12     ` [PATCH 2/4] kwset: allow building with GCC 8 Junio C Hamano
2019-06-17 18:26       ` SZEDER Gábor
2019-06-14 22:09   ` Ævar Arnfjörð Bjarmason
2019-06-14 22:55   ` Can we just get rid of kwset & obstack in favor of optimistically using PCRE v2 JIT? Ævar Arnfjörð Bjarmason
2019-06-14 23:19     ` Ævar Arnfjörð Bjarmason
2019-06-20 10:35       ` Jeff King
2019-06-15  9:01     ` Carlo Arenas
2019-06-15 19:15     ` brian m. carlson
2019-06-15 22:14       ` Ævar Arnfjörð Bjarmason
2019-06-26  0:03         ` [RFC/PATCH 0/7] grep: move from kwset to optional PCRE v2 Ævar Arnfjörð Bjarmason
2019-06-26 14:02           ` Johannes Schindelin
2019-06-27  9:16             ` Johannes Schindelin
2019-06-27 16:27               ` Ævar Arnfjörð Bjarmason
2019-06-27 18:21                 ` Johannes Schindelin
2019-06-27 23:39           ` [PATCH v2 0/9] " Ævar Arnfjörð Bjarmason
2019-06-28  7:23             ` Ævar Arnfjörð Bjarmason
2019-06-28 16:10               ` Junio C Hamano
2019-07-01 21:20             ` [PATCH v3 00/10] " Ævar Arnfjörð Bjarmason
2019-07-01 21:31               ` Junio C Hamano
2019-07-02 11:10                 ` Ævar Arnfjörð Bjarmason
2019-07-02 12:32               ` Johannes Schindelin
2019-07-02 19:57                 ` Junio C Hamano
2019-07-03 10:08                   ` Johannes Schindelin
2019-07-03 10:25                 ` Johannes Schindelin
2019-07-03 11:27                   ` Johannes Schindelin
2019-07-01 21:20             ` [PATCH v3 01/10] log tests: test regex backends in "--encode=<enc>" tests Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 02/10] grep: don't use PCRE2?_UTF8 with "log --encoding=<non-utf8>" Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 03/10] t4210: skip more command-line encoding tests on MinGW Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 04/10] grep: inline the return value of a function call used only once Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 05/10] grep tests: move "grep binary" alongside the rest Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 06/10] grep tests: move binary pattern tests into their own file Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 07/10] grep: make the behavior for NUL-byte in patterns sane Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 08/10] grep: drop support for \0 in --fixed-strings <pattern> Ævar Arnfjörð Bjarmason
2019-07-01 21:20             ` [PATCH v3 09/10] grep: remove the kwset optimization Ævar Arnfjörð Bjarmason
2019-07-01 21:21             ` [PATCH v3 10/10] grep: use PCRE v2 for optimized fixed-string search Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 1/9] log tests: test regex backends in "--encode=<enc>" tests Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 2/9] grep: don't use PCRE2?_UTF8 with "log --encoding=<non-utf8>" Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 3/9] grep: inline the return value of a function call used only once Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 4/9] grep tests: move "grep binary" alongside the rest Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 5/9] grep tests: move binary pattern tests into their own file Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 6/9] grep: make the behavior for NUL-byte in patterns sane Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 7/9] grep: drop support for \0 in --fixed-strings <pattern> Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 8/9] grep: remove the kwset optimization Ævar Arnfjörð Bjarmason
2019-06-27 23:39           ` [PATCH v2 9/9] grep: use PCRE v2 for optimized fixed-string search Ævar Arnfjörð Bjarmason
2019-06-26  0:03         ` [RFC/PATCH 1/7] grep: inline the return value of a function call used only once Ævar Arnfjörð Bjarmason
2019-06-26  0:03         ` [RFC/PATCH 2/7] grep tests: move "grep binary" alongside the rest Ævar Arnfjörð Bjarmason
2019-06-26 14:05           ` Johannes Schindelin
2019-06-26 18:13           ` Junio C Hamano
2019-06-26  0:03         ` [RFC/PATCH 3/7] grep tests: move binary pattern tests into their own file Ævar Arnfjörð Bjarmason
2019-06-26  0:03         ` [RFC/PATCH 4/7] grep: make the behavior for \0 in patterns sane Ævar Arnfjörð Bjarmason
2019-06-27  2:03           ` brian m. carlson
2019-06-26  0:03         ` [RFC/PATCH 5/7] grep: drop support for \0 in --fixed-strings <pattern> Ævar Arnfjörð Bjarmason
2019-06-26 16:14           ` Junio C Hamano
2019-06-26  0:03         ` Ævar Arnfjörð Bjarmason [this message]
2019-06-26  0:03         ` [RFC/PATCH 7/7] grep: use PCRE v2 for optimized fixed-string search Ævar Arnfjörð Bjarmason
2019-06-26 14:13           ` Johannes Schindelin
2019-06-26 18:45             ` Junio C Hamano
2019-06-27  9:31               ` Johannes Schindelin
2019-06-27 18:45                 ` Johannes Schindelin
2019-06-27 19:06                   ` Junio C Hamano
2019-06-28 10:56                     ` Johannes Schindelin
2019-06-13 11:49 ` [PATCH 3/4] winansi: simplify loading the GetCurrentConsoleFontEx() function Johannes Schindelin via GitGitGadget
2019-06-13 11:49 ` [PATCH 4/4] config: avoid calling `labs()` on too-large data type Johannes Schindelin via GitGitGadget
2019-06-13 16:13   ` Junio C Hamano
2019-06-16  6:48   ` René Scharfe
2019-06-16  8:24     ` René Scharfe
2019-06-16 14:01       ` René Scharfe
2019-06-16 22:26         ` Junio C Hamano
2019-06-20 19:58           ` René Scharfe
2019-06-20 21:07             ` Junio C Hamano
2019-06-21 18:35             ` Johannes Schindelin
2019-06-22 10:03               ` René Scharfe
2019-06-22 10:03           ` [PATCH v2 1/3] config: use unsigned_mult_overflows to check for overflows René Scharfe
2019-06-22 10:03           ` [PATCH v2 2/3] config: don't multiply in parse_unit_factor() René Scharfe
2019-06-22 10:03           ` [PATCH v2 3/3] config: simplify parsing of unit factors René Scharfe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190626000329.32475-7-avarab@gmail.com \
    --to=avarab@gmail.com \
    --cc=git-packagers@googlegroups.com \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=gitster@pobox.com \
    --cc=johannes.schindelin@gmx.de \
    --cc=peff@peff.net \
    --cc=sandals@crustytoothpaste.net \
    --cc=szeder.dev@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.