All of lore.kernel.org
 help / color / mirror / Atom feed
From: Michael J Gruber <git@drmicha.warpmail.net>
To: git@vger.kernel.org
Cc: Jeff King <peff@peff.net>
Subject: [WIP/RFH/RFD/PATCH] grep: allow to use textconv filters
Date: Mon,  4 Feb 2013 16:27:31 +0100	[thread overview]
Message-ID: <2c0641ea4df6a872a4466efe0c0124f304f44c3e.1359991521.git.git@drmicha.warpmail.net> (raw)

Recently and not so recently, we made sure that log/grep type operations
use textconv filters when a userfacing diff would do the same:

ef90ab6 (pickaxe: use textconv for -S counting, 2012-10-28)
b1c2f57 (diff_grep: use textconv buffers for add/deleted files, 2012-10-28)
0508fe5 (combine-diff: respect textconv attributes, 2011-05-23)

"git grep" currently does not use textconv filters at all, that is
neither for displaying the match and context nor for the actual grepping.

Introduce a binary mode "--textconv" (in addition to "--text" and "-I")
which makes git grep use any configured textconv filters for grepping
and output purposes.

Signed-off-by: Michael J Gruber <git@drmicha.warpmail.net>
---

Notes:
    I'm somehow stuck in textconv/filespec/... hell, so I'm sending this out
    in request for help. I'm sure there are people for whom it's a breeze to
    get this right.
    
    The difficulty is in getting the different cases (blob/sha1 vs.
    worktree) right, and in making the changes minimally invasive. It seems
    that some more refactoring could help: "git show --textconv" does not
    use textconv filters when used on blobs either. (It does for diffs, of
    course.) Most existing helper functions are tailored for diffs.

    Nota bene: --textconv does not affect "diff --stat" either...

 builtin/grep.c |  5 ++++-
 grep.c         | 47 +++++++++++++++++++++++++++++------------------
 grep.h         |  3 ++-
 3 files changed, 35 insertions(+), 20 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 8025964..2181c22 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -96,7 +96,7 @@ static void add_work(struct grep_opt *opt, enum grep_source_type type,
 
 	grep_source_init(&todo[todo_end].source, type, name, path, id);
 	if (opt->binary != GREP_BINARY_TEXT)
-		grep_source_load_driver(&todo[todo_end].source);
+		grep_source_load_driver(&todo[todo_end].source, opt);
 	todo[todo_end].done = 0;
 	strbuf_reset(&todo[todo_end].out);
 	todo_end = (todo_end + 1) % ARRAY_SIZE(todo);
@@ -659,6 +659,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix)
 		OPT_SET_INT('I', NULL, &opt.binary,
 			N_("don't match patterns in binary files"),
 			GREP_BINARY_NOMATCH),
+		OPT_SET_INT(0, "textconv", &opt.binary,
+			N_("process binary files with textconv filters"),
+			GREP_BINARY_TEXTCONV),
 		{ OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"),
 			N_("descend at most <depth> levels"), PARSE_OPT_NONEG,
 			NULL, 1 },
diff --git a/grep.c b/grep.c
index 4bd1b8b..410b7b8 100644
--- a/grep.c
+++ b/grep.c
@@ -1,10 +1,12 @@
 #include "cache.h"
+#include "diff.h"
+#include "diffcore.h"
 #include "grep.h"
 #include "userdiff.h"
 #include "xdiff-interface.h"
 
-static int grep_source_load(struct grep_source *gs);
-static int grep_source_is_binary(struct grep_source *gs);
+static int grep_source_load(struct grep_source *gs, struct grep_opt *opt);
+static int grep_source_is_binary(struct grep_source *gs, struct grep_opt *opt);
 
 static struct grep_opt grep_defaults;
 
@@ -1174,7 +1176,7 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo
 {
 	xdemitconf_t *xecfg = opt->priv;
 	if (xecfg && !xecfg->find_func) {
-		grep_source_load_driver(gs);
+		grep_source_load_driver(gs, opt);
 		if (gs->driver->funcname.pattern) {
 			const struct userdiff_funcname *pe = &gs->driver->funcname;
 			xdiff_set_find_func(xecfg, pe->pattern, pe->cflags);
@@ -1354,14 +1356,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 
 	switch (opt->binary) {
 	case GREP_BINARY_DEFAULT:
-		if (grep_source_is_binary(gs))
+		if (grep_source_is_binary(gs, opt))
 			binary_match_only = 1;
 		break;
 	case GREP_BINARY_NOMATCH:
-		if (grep_source_is_binary(gs))
+		if (grep_source_is_binary(gs, opt))
 			return 0; /* Assume unmatch */
 		break;
 	case GREP_BINARY_TEXT:
+	case GREP_BINARY_TEXTCONV:
 		break;
 	default:
 		die("bug: unknown binary handling mode");
@@ -1372,7 +1375,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle
 
 	try_lookahead = should_lookahead(opt);
 
-	if (grep_source_load(gs) < 0)
+	if (grep_source_load(gs, opt) < 0)
 		return 0;
 
 	bol = gs->buf;
@@ -1610,12 +1613,17 @@ void grep_source_clear_data(struct grep_source *gs)
 	}
 }
 
-static int grep_source_load_sha1(struct grep_source *gs)
+static int grep_source_load_sha1(struct grep_source *gs, struct grep_opt *opt)
 {
 	enum object_type type;
-
 	grep_read_lock();
-	gs->buf = read_sha1_file(gs->identifier, &type, &gs->size);
+	if (opt->binary == GREP_BINARY_TEXTCONV) {
+		struct diff_filespec *df = alloc_filespec(gs->name);
+		gs->size = fill_textconv(gs->driver, df, &gs->buf);
+		free_filespec(df);
+	} else {
+		gs->buf = read_sha1_file(gs->identifier, &type, &gs->size);
+	}
 	grep_read_unlock();
 
 	if (!gs->buf)
@@ -1625,7 +1633,7 @@ static int grep_source_load_sha1(struct grep_source *gs)
 	return 0;
 }
 
-static int grep_source_load_file(struct grep_source *gs)
+static int grep_source_load_file(struct grep_source *gs, struct grep_opt *opt)
 {
 	const char *filename = gs->identifier;
 	struct stat st;
@@ -1660,42 +1668,45 @@ static int grep_source_load_file(struct grep_source *gs)
 	return 0;
 }
 
-static int grep_source_load(struct grep_source *gs)
+static int grep_source_load(struct grep_source *gs, struct grep_opt *opt)
 {
 	if (gs->buf)
 		return 0;
 
 	switch (gs->type) {
 	case GREP_SOURCE_FILE:
-		return grep_source_load_file(gs);
+		return grep_source_load_file(gs, opt);
 	case GREP_SOURCE_SHA1:
-		return grep_source_load_sha1(gs);
+		return grep_source_load_sha1(gs, opt);
 	case GREP_SOURCE_BUF:
 		return gs->buf ? 0 : -1;
 	}
 	die("BUG: invalid grep_source type");
 }
 
-void grep_source_load_driver(struct grep_source *gs)
+void grep_source_load_driver(struct grep_source *gs, struct grep_opt *opt)
 {
 	if (gs->driver)
 		return;
 
-	grep_attr_lock();
+	grep_attr_lock(); //TODO
+	printf("Looking up userdiff driver for: %s", gs->path);
 	if (gs->path)
 		gs->driver = userdiff_find_by_path(gs->path);
 	if (!gs->driver)
 		gs->driver = userdiff_find_by_name("default");
+	if (opt->binary == GREP_BINARY_TEXTCONV)
+		gs->driver = userdiff_get_textconv(gs->driver);
 	grep_attr_unlock();
 }
 
-static int grep_source_is_binary(struct grep_source *gs)
+static int grep_source_is_binary(struct grep_source *gs, struct grep_opt *opt)
 {
-	grep_source_load_driver(gs);
+	grep_source_load_driver(gs, opt);
 	if (gs->driver->binary != -1)
 		return gs->driver->binary;
 
-	if (!grep_source_load(gs))
+	if (!grep_source_load(gs, opt))
 		return buffer_is_binary(gs->buf, gs->size);
 
 	return 0;
diff --git a/grep.h b/grep.h
index 8fc854f..d272d25 100644
--- a/grep.h
+++ b/grep.h
@@ -105,6 +105,7 @@ struct grep_opt {
 #define GREP_BINARY_DEFAULT	0
 #define GREP_BINARY_NOMATCH	1
 #define GREP_BINARY_TEXT	2
+#define GREP_BINARY_TEXTCONV	3
 	int binary;
 	int extended;
 	int use_reflog_filter;
@@ -173,7 +174,7 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type,
 		      const void *identifier);
 void grep_source_clear_data(struct grep_source *gs);
 void grep_source_clear(struct grep_source *gs);
-void grep_source_load_driver(struct grep_source *gs);
+void grep_source_load_driver(struct grep_source *gs, struct grep_opt *opt);
 
 
 int grep_source(struct grep_opt *opt, struct grep_source *gs);
-- 
1.8.1.2.718.g9d378fc

             reply	other threads:[~2013-02-04 15:27 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-04 15:27 Michael J Gruber [this message]
2013-02-04 17:12 ` [WIP/RFH/RFD/PATCH] grep: allow to use textconv filters Junio C Hamano
2013-02-05  8:48   ` Michael J Gruber
2013-02-05 11:13 ` Jeff King
2013-02-05 16:21   ` Michael J Gruber
2013-02-05 20:11     ` Jeff King
2013-02-06 15:08       ` [RFC/PATCH 0/4] textconv for show and grep Michael J Gruber
2013-02-06 15:08         ` [RFC/PATCH 1/4] show: obey --textconv for blobs Michael J Gruber
2013-02-06 16:53           ` Junio C Hamano
2013-02-06 22:12             ` Jeff King
2013-02-06 23:49               ` Junio C Hamano
2013-02-07  0:10                 ` Jeff King
2013-02-07  0:26                   ` Junio C Hamano
2013-02-07  8:48             ` Michael J Gruber
2013-02-06 22:06           ` Jeff King
2013-02-07  9:05             ` Michael J Gruber
2013-02-07  9:11               ` Jeff King
2013-02-07  9:34                 ` Michael J Gruber
2013-02-07  9:43                   ` Jeff King
2013-02-06 15:08         ` [RFC/PATCH 2/4] cat-file: do not die on --textconv without textconv filters Michael J Gruber
2013-02-06 16:47           ` Junio C Hamano
2013-02-06 22:19           ` Jeff King
2013-02-06 22:23             ` Junio C Hamano
2013-02-06 22:43               ` Jeff King
2013-02-06 15:08         ` [RFC/PATCH 3/4] grep: allow to use " Michael J Gruber
2013-02-06 15:12           ` Matthieu Moy
2013-02-06 22:23           ` Jeff King
2013-02-06 15:08         ` [RFC/PATCH 4/4] grep: obey --textconv for the case rev:path Michael J Gruber
2013-02-06 22:36           ` Jeff King
2013-02-07  9:05             ` Michael J Gruber
2013-02-07  9:26               ` Jeff King
2013-02-07  9:47                 ` Michael J Gruber
2013-02-07  9:55                   ` Jeff King
2013-02-07 10:31                     ` Michael J Gruber
2013-02-07 18:03                       ` Junio C Hamano
2013-02-08 11:27                         ` Michael J Gruber
2013-02-06 16:55         ` [RFC/PATCH 0/4] textconv for show and grep Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2c0641ea4df6a872a4466efe0c0124f304f44c3e.1359991521.git.git@drmicha.warpmail.net \
    --to=git@drmicha.warpmail.net \
    --cc=git@vger.kernel.org \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.