git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* Implementation of a "textconv" filter for easy custom diff.
@ 2008-09-28  2:06 Matthieu Moy
  2008-09-28  2:06 ` [PATCH] Facility to have multiple kinds of drivers for diff Matthieu Moy
  2008-09-28  4:10 ` Implementation of a "textconv" filter for easy custom diff Jeff King
  0 siblings, 2 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git


This patch serie give an implementation of a "textconv" filter for
"git diff", that allows one to diff anything that can easily be
converted to text with just a few lines in ~/.gitconfig and
.gitattributes.

The really cool thing in comparison with GIT_EXTERNAL_DIFF is that one
gets all the cool things of "git diff" (colors & cie) for free.

I had already posted a prototype of this a long time back[1], but I
finally got time to clean up the code a bit.

http://article.gmane.org/gmane.comp.version-control.git/56896

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH] Facility to have multiple kinds of drivers for diff.
  2008-09-28  2:06 Implementation of a "textconv" filter for easy custom diff Matthieu Moy
@ 2008-09-28  2:06 ` Matthieu Moy
  2008-09-28  2:06   ` [PATCH] Implement run_command_to_buf (spawn a process and reads its stdout) Matthieu Moy
  2008-09-28  4:10 ` Implementation of a "textconv" filter for easy custom diff Jeff King
  1 sibling, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git; +Cc: Matthieu Moy

We now have an array attr_cmd_specs describing the possible kinds of
drivers.
---
 diff.c |   63 +++++++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 41 insertions(+), 22 deletions(-)

diff --git a/diff.c b/diff.c
index 099ce3f..1c8dd19 100644
--- a/diff.c
+++ b/diff.c
@@ -56,11 +56,29 @@ static int parse_diff_color_slot(const char *var, int ofs)
 	die("bad config variable '%s'", var);
 }
 
-static struct ll_cmd_driver {
+struct ll_cmd_driver {
 	const char *name;
 	struct ll_cmd_driver *next;
 	const char *cmd;
-} *user_diff, **user_diff_tail;
+};
+
+struct attr_cmd_spec {
+	char * name;
+	struct git_attr * attr_cmd;
+	struct ll_cmd_driver *head;
+	struct ll_cmd_driver **tail;
+};
+
+enum driver_indices {
+	DIFF_DRIVER=0,
+	TEXTCONV_DRIVER=1,
+	DRIVER_COUNT,
+};
+
+static struct attr_cmd_spec attr_cmd_specs[DRIVER_COUNT] = {
+	{"diff", NULL},
+	{"textconv", NULL},
+};
 
 /*
  * Currently there is only "diff.<drivername>.command" variable;
@@ -68,24 +86,24 @@ static struct ll_cmd_driver {
  * this in a bit convoluted way to allow low level diff driver
  * called "color".
  */
-static int parse_ll_command(const char *var, const char *ep, const char *value)
+static int parse_ll_command(const char *var, const char *name,
+			    const char *ep, const char *value,
+			    int driver)
 {
-	const char *name;
 	int namelen;
 	struct ll_cmd_driver *drv;
 
-	name = var + 5;
 	namelen = ep - name;
-	for (drv = user_diff; drv; drv = drv->next)
+	for (drv = attr_cmd_specs[driver].head; drv; drv = drv->next)
 		if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
 			break;
 	if (!drv) {
 		drv = xcalloc(1, sizeof(struct ll_cmd_driver));
 		drv->name = xmemdupz(name, namelen);
-		if (!user_diff_tail)
-			user_diff_tail = &user_diff;
-		*user_diff_tail = drv;
-		user_diff_tail = &(drv->next);
+		if (!attr_cmd_specs[driver].tail)
+			attr_cmd_specs[driver].tail = &attr_cmd_specs[driver].head;
+		*attr_cmd_specs[driver].tail = drv;
+		attr_cmd_specs[driver].tail = &(drv->next);
 	}
 
 	return git_config_string(&(drv->cmd), var, value);
@@ -161,7 +179,7 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 		const char *ep = strrchr(var, '.');
 
 		if (ep != var + 4 && !strcmp(ep, ".command"))
-			return parse_ll_command(var, ep, value);
+			return parse_ll_command(var, var + 5, ep, value, DIFF_DRIVER);
 	}
 
 	return git_diff_basic_config(var, value, cb);
@@ -1340,14 +1358,14 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-static void setup_diff_attr_check(struct git_attr_check *check)
+/* driver_index must be among the values of "enum driver_indices" */
+static void setup_cmd_attr_check(struct git_attr_check *check, int driver_index)
 {
-	static struct git_attr *attr_diff;
-
-	if (!attr_diff) {
-		attr_diff = git_attr("diff", 4);
+	struct attr_cmd_spec * spec = &attr_cmd_specs[driver_index];
+	if (!spec->attr_cmd) {
+		spec->attr_cmd = git_attr(spec->name, strlen(spec->name));
 	}
-	check[0].attr = attr_diff;
+	check[0].attr = spec->attr_cmd;
 }
 
 static void diff_filespec_check_attr(struct diff_filespec *one)
@@ -1358,7 +1376,7 @@ static void diff_filespec_check_attr(struct diff_filespec *one)
 	if (one->checked_attr)
 		return;
 
-	setup_cmd_attr_check(&attr_cmd_check);
+	setup_cmd_attr_check(&attr_cmd_check, DIFF_DRIVER);
 	one->is_binary = 0;
 	one->funcname_pattern_ident = NULL;
 
@@ -2092,14 +2110,14 @@ static void run_external_diff(const char *pgm,
 	}
 }
 
-static const char *external_cmd_attr(const char *name)
+static const char *external_cmd_attr(const char *name, int driver_index)
 {
 	struct git_attr_check attr_cmd_check;
 
 	if (!name)
 		return NULL;
 
-	setup_diff_attr_check(&attr_cmd_check);
+	setup_cmd_attr_check(&attr_cmd_check, driver_index);
 	if (!git_checkattr(name, 1, &attr_cmd_check)) {
 		const char *value = attr_cmd_check.value;
 		if (!ATTR_TRUE(value) &&
@@ -2107,7 +2125,8 @@ static const char *external_cmd_attr(const char *name)
 		    !ATTR_UNSET(value)) {
 			struct ll_cmd_driver *drv;
 
-			for (drv = user_diff; drv; drv = drv->next)
+			for (drv = attr_cmd_specs[driver_index].head;
+			     drv; drv = drv->next)
 				if (!strcmp(drv->name, value))
 					return drv->cmd;
 		}
@@ -2128,7 +2147,7 @@ static void run_diff_cmd(const char *pgm,
 	if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL))
 		pgm = NULL;
 	else {
-		const char *cmd = external_cmd_attr(attr_path);
+		const char *cmd = external_cmd_attr(attr_path, DIFF_DRIVER);
 		if (cmd)
 			pgm = cmd;
 	}
-- 
1.6.0.2.312.g1ef81a

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH] Implement run_command_to_buf (spawn a process and reads its stdout)
  2008-09-28  2:06 ` [PATCH] Facility to have multiple kinds of drivers for diff Matthieu Moy
@ 2008-09-28  2:06   ` Matthieu Moy
  2008-09-28  2:06     ` [PATCH] Implement a textconv filter for "git diff" Matthieu Moy
  0 siblings, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git; +Cc: Matthieu Moy

---
 diff.c |   24 ++++++++++++++++++++++++
 1 files changed, 24 insertions(+), 0 deletions(-)

diff --git a/diff.c b/diff.c
index 1c8dd19..6917981 100644
--- a/diff.c
+++ b/diff.c
@@ -260,6 +260,10 @@ static struct diff_tempfile {
 	char tmp_path[PATH_MAX];
 } diff_temp[2];
 
+/* Forward declarations */
+static int run_command_to_buf(const char **argv, char **buf, size_t * size);
+/* End forward declarations */
+
 static int count_lines(const char *data, int size)
 {
 	int count, ch, completely_empty = 1, nl_just_seen = 0;
@@ -1487,6 +1491,26 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
 		options->b_prefix = b;
 }
 
+static int run_command_to_buf(const char **argv, char **buf, size_t * size)
+{
+	struct child_process cmd;
+	struct strbuf buffer = STRBUF_INIT;
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.argv = argv;
+	cmd.out = -1;
+
+	if (start_command(&cmd))
+		return -1;
+	strbuf_read(&buffer, cmd.out, 1024);
+	close(cmd.out);
+
+	if (finish_command(&cmd))
+		return -1;
+
+	*buf = strbuf_detach(&buffer, size);
+	return 0;
+}
+
 static void builtin_diff(const char *name_a,
 			 const char *name_b,
 			 struct diff_filespec *one,
-- 
1.6.0.2.312.g1ef81a

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH] Implement a textconv filter for "git diff"
  2008-09-28  2:06   ` [PATCH] Implement run_command_to_buf (spawn a process and reads its stdout) Matthieu Moy
@ 2008-09-28  2:06     ` Matthieu Moy
  2008-09-28  2:06       ` [PATCH] Document the textconv filter Matthieu Moy
  2008-09-28  4:15       ` [PATCH] Implement a textconv filter for "git diff" Jeff King
  0 siblings, 2 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git; +Cc: Matthieu Moy

That feature is similar to the custom diff driver, but the user only has
to provide a text filter (a command to convert a file into a plain-text
representation). Git takes care of diffing, mode change, ...

For example, with

[textconv "odt2txt"]
          command=odt2txt

*.ods textconv=odt2txt
*.odp textconv=odt2txt
*.odt textconv=odt2txt

One can use "git diff" on OpenOffice files (including "git diff --color"
and friends).

This could be extended so that "git blame" can use it also.
---
 diff.c |   68 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 68 insertions(+), 0 deletions(-)

diff --git a/diff.c b/diff.c
index 6917981..7cbc42b 100644
--- a/diff.c
+++ b/diff.c
@@ -181,6 +181,12 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 		if (ep != var + 4 && !strcmp(ep, ".command"))
 			return parse_ll_command(var, var + 5, ep, value, DIFF_DRIVER);
 	}
+	if (!prefixcmp(var, "textconv.")) {
+		const char *ep = strrchr(var, '.');
+
+		if (ep != var + 8 && !strcmp(ep, ".command"))
+			return parse_ll_command(var, var + 9, ep, value, TEXTCONV_DRIVER);
+	}
 
 	return git_diff_basic_config(var, value, cb);
 }
@@ -262,6 +268,10 @@ static struct diff_tempfile {
 
 /* Forward declarations */
 static int run_command_to_buf(const char **argv, char **buf, size_t * size);
+static const char *external_cmd_attr(const char *name, int driver_index);
+static void prepare_temp_file(const char *name,
+			      struct diff_tempfile *temp,
+			      struct diff_filespec *one);
 /* End forward declarations */
 
 static int count_lines(const char *data, int size)
@@ -1511,6 +1521,52 @@ static int run_command_to_buf(const char **argv, char **buf, size_t * size)
 	return 0;
 }
 
+static int textconv_two_files(const char *textconv,
+			      const char *name_a,
+			      const char *name_b,
+			      mmfile_t *mf1,
+			      mmfile_t *mf2,
+			      struct diff_filespec *one,
+			      struct diff_filespec *two)
+{
+	const char *spawn_arg[3];
+	const char **arg = &spawn_arg[0];
+	struct diff_tempfile *temp = diff_temp;
+	char *buf_a, *buf_b;
+	size_t size_a, size_b;
+	prepare_temp_file(name_a, &temp[0], one);
+	*arg++ = textconv;
+	*arg++ = temp[0].name;
+	*arg++ = NULL;
+	/*
+	 * Run both commands before touching arguments to make sure we
+	 * do all or nothing.
+	 */
+	if(run_command_to_buf(spawn_arg, &buf_a, &size_a))
+		return -1;
+
+	prepare_temp_file(name_b, &temp[1], two);
+	spawn_arg[1] = temp[1].name;
+	if(run_command_to_buf(spawn_arg, &buf_b, &size_b))
+		return -1;
+
+	mf1->ptr = buf_a;
+	mf1->size = (long)size_a;
+	one->data = mf1->ptr;
+	one->size = mf1->size;
+	one->should_free = 1;
+	one->should_munmap = 1;
+
+	mf2->ptr = buf_b;
+	mf2->size = (long)size_b;
+	two->data = mf2->ptr;
+	two->size = mf2->size;
+	two->should_free = 1;
+	two->should_munmap = 1;
+	return 0;
+}
+
+
 static void builtin_diff(const char *name_a,
 			 const char *name_b,
 			 struct diff_filespec *one,
@@ -1574,6 +1630,18 @@ static void builtin_diff(const char *name_a,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
+	const char * textconv = external_cmd_attr(name_a, TEXTCONV_DRIVER);
+	const char * textconvb = external_cmd_attr(name_b, TEXTCONV_DRIVER);
+
+	/*
+	 * Only use the textconv driver if it is set, and is the same
+	 * for source and destination file.
+	 */
+	if (textconv && textconvb && !strcmp(textconv, textconvb))
+		if(textconv_two_files(textconv, name_a, name_b, &mf1, &mf2, one, two))
+			fprintf(stderr, "warning: textconv filter failed, "
+				"falling back to plain diff\n");
+
 	if (!DIFF_OPT_TST(o, TEXT) &&
 	    (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
 		/* Quite common confusing case */
-- 
1.6.0.2.312.g1ef81a

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH] Document the textconv filter.
  2008-09-28  2:06     ` [PATCH] Implement a textconv filter for "git diff" Matthieu Moy
@ 2008-09-28  2:06       ` Matthieu Moy
  2008-09-28  2:06         ` [PATCH] Add a basic test for " Matthieu Moy
  2008-09-28 11:07         ` [PATCH] Document " Johannes Sixt
  2008-09-28  4:15       ` [PATCH] Implement a textconv filter for "git diff" Jeff King
  1 sibling, 2 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git; +Cc: Matthieu Moy

---
 Documentation/gitattributes.txt |   43 +++++++++++++++++++++++++++++++++++++++
 1 files changed, 43 insertions(+), 0 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index e848c94..c4f2b8f 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -325,6 +325,49 @@ patterns are available:
 
 - `tex` suitable for source code for LaTeX documents.
 
+Converting files to text before a diff
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The attribute `textconv` affects 'git diff' in a way similar to the
+`diff` attribute, but with `textconv`, the user provides only a way to
+convert the file into text, and git takes care of doing the diff as
+usual (i.e. other options of 'git diff' such as '--color' remain
+available).
+
+The value of `textconv` must be a string, which is the textconv
+driver.
+
+To tell git to use the `exif` filter for jpeg images, use:
+
+----------------------------------------------------------------
+*.jpg   textconv=exif
+----------------------------------------------------------------
+
+Defining a custom textconv driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The definition of the `textconv` driver is done in `gitconfig`. To
+define a driver `exif`, add this to your `$GIT_DIR/config` file (or
+`$HOME/.gitconfig` file):
+
+----------------------------------------------------------------
+[textconv "exif"]
+	command = exiftags
+----------------------------------------------------------------
+
+Git will call the command specified in `command` with the file to
+convert as only argument. The program should write the text on its
+standard output.
+
+Examples of useful filters include:
+
+----------------------------------------------------------------
+[textconv "odt2txt"]
+	command = odt2txt
+[textconv "word"]
+	command = catdoc
+----------------------------------------------------------------
+
 
 Performing a three-way merge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-- 
1.6.0.2.312.g1ef81a

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH] Add a basic test for the textconv filter.
  2008-09-28  2:06       ` [PATCH] Document the textconv filter Matthieu Moy
@ 2008-09-28  2:06         ` Matthieu Moy
  2008-09-28 11:07         ` [PATCH] Document " Johannes Sixt
  1 sibling, 0 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  2:06 UTC (permalink / raw)
  To: git; +Cc: Matthieu Moy

---
 t/t4020-diff-external.sh |    7 +++++++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/t/t4020-diff-external.sh b/t/t4020-diff-external.sh
index dfe3fbc..cf99912 100755
--- a/t/t4020-diff-external.sh
+++ b/t/t4020-diff-external.sh
@@ -99,6 +99,13 @@ test_expect_success 'no diff with -diff' '
 	git diff | grep Binary
 '
 
+test_expect_success 'textconv attribute' '
+	git config textconv.echo.command echo
+	echo >.gitattributes "file textconv=echo"
+
+	git diff | grep "^+file$"
+'
+
 echo NULZbetweenZwords | perl -pe 'y/Z/\000/' > file
 
 test_expect_success 'force diff with "diff"' '
-- 
1.6.0.2.312.g1ef81a

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: Implementation of a "textconv" filter for easy custom diff.
  2008-09-28  2:06 Implementation of a "textconv" filter for easy custom diff Matthieu Moy
  2008-09-28  2:06 ` [PATCH] Facility to have multiple kinds of drivers for diff Matthieu Moy
@ 2008-09-28  4:10 ` Jeff King
  2008-09-28  9:57   ` Matthieu Moy
  1 sibling, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-09-28  4:10 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Sun, Sep 28, 2008 at 04:06:53AM +0200, Matthieu Moy wrote:

> This patch serie give an implementation of a "textconv" filter for
> "git diff", that allows one to diff anything that can easily be
> converted to text with just a few lines in ~/.gitconfig and
> .gitattributes.
> 
> The really cool thing in comparison with GIT_EXTERNAL_DIFF is that one
> gets all the cool things of "git diff" (colors & cie) for free.

Neat. I started on something like this quite a while ago, and have been
meaning to clean it up for some time (and I somehow missed your other
prototype, too). I agree that it matches my goals much better: the
filters are easier to write, and you get the benefit of a nice colorized
diff (or even --color-words!).

We have one major difference in our approaches. In yours, there is a
new "textconv" attribute that can be used. In mine, I subtly changed the
meaning of the "diff=foo" attribute to be "use the diff driver named by
diff.foo.*", and you would set diff.foo.textconv to your command. This
is a bit simpler to implement, and it provides a better path forward for
defining sets of diff tweaks.

For example, one of the limitations of the current syntax is that you
can't say "Choose automatically whether this is binary or text, but if
it is text, use this hunk header." But with my scheme it is easy to do:

  in attributes:
    file diff=foo

  in config:
    [diff "foo"]
    xfuncname = "some regex"
    binary = auto

Is there a particular reason you chose the route you did?

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH] Implement a textconv filter for "git diff"
  2008-09-28  2:06     ` [PATCH] Implement a textconv filter for "git diff" Matthieu Moy
  2008-09-28  2:06       ` [PATCH] Document the textconv filter Matthieu Moy
@ 2008-09-28  4:15       ` Jeff King
  2008-09-28 10:00         ` Matthieu Moy
  1 sibling, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-09-28  4:15 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Sun, Sep 28, 2008 at 04:06:56AM +0200, Matthieu Moy wrote:

> +static int textconv_two_files(const char *textconv,
> +			      const char *name_a,
> +			      const char *name_b,
> +			      mmfile_t *mf1,
> +			      mmfile_t *mf2,
> +			      struct diff_filespec *one,
> +			      struct diff_filespec *two)
> +{

Must we always be textconv'ing two files? What if I am comparing
"v1.5:foo.odf" to "foo.txt" in the working tree?

In my implementation, I textconv one at a time. I just did so from
fill_mmfile, so all of diff automagically just sees the converted text.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Implementation of a "textconv" filter for easy custom diff.
  2008-09-28  4:10 ` Implementation of a "textconv" filter for easy custom diff Jeff King
@ 2008-09-28  9:57   ` Matthieu Moy
  2008-09-28 16:11     ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28  9:57 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> writes:

> Neat. I started on something like this quite a while ago,

Did you publish/send it anywhere?

> We have one major difference in our approaches. In yours, there is a
> new "textconv" attribute that can be used. In mine, I subtly changed the
> meaning of the "diff=foo" attribute to be "use the diff driver named by
> diff.foo.*", and you would set diff.foo.textconv to your command. This
> is a bit simpler to implement, and it provides a better path forward for
> defining sets of diff tweaks.

Yes, that's an interesting approach too.

One point in favor of mine is that the "textconv" thing is no
necessarily limited to diff-ing. That would be really cool to have
"blame" use it too for example (with quite bad performance, but that
would be the first time I see a tool able to do this).

Well, OTOH, one could argue that "blame" is based on diff-ing, and
therefore it's natural to define a diff filter to tell how "blame"
should work.

> For example, one of the limitations of the current syntax is that you
> can't say "Choose automatically whether this is binary or text, but if
> it is text, use this hunk header." But with my scheme it is easy to do:
>
>   in attributes:
>     file diff=foo
>
>   in config:
>     [diff "foo"]
>     xfuncname = "some regex"
>     binary = auto

No sure that would actually be useful in real life, but it doesn't
harm to have it. And the argument "better path forward for defining
sets of diff tweaks" is a good one IMO.

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH] Implement a textconv filter for "git diff"
  2008-09-28  4:15       ` [PATCH] Implement a textconv filter for "git diff" Jeff King
@ 2008-09-28 10:00         ` Matthieu Moy
  2008-09-28 16:12           ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28 10:00 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> writes:

> On Sun, Sep 28, 2008 at 04:06:56AM +0200, Matthieu Moy wrote:
>
>> +static int textconv_two_files(const char *textconv,
>> +			      const char *name_a,
>> +			      const char *name_b,
>> +			      mmfile_t *mf1,
>> +			      mmfile_t *mf2,
>> +			      struct diff_filespec *one,
>> +			      struct diff_filespec *two)
>> +{
>
> Must we always be textconv'ing two files? What if I am comparing
> "v1.5:foo.odf" to "foo.txt" in the working tree?

Hmm, why not, and the textconv could be different (like comparing
old:foo.doc with ./foo.odt).

> In my implementation, I textconv one at a time. I just did so from
> fill_mmfile, so all of diff automagically just sees the converted text.

One has to be carefull not to call textconv for plumbing commands,
since the generated patches are not applicable, and only for the eyes
of the reader.

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH] Document the textconv filter.
  2008-09-28  2:06       ` [PATCH] Document the textconv filter Matthieu Moy
  2008-09-28  2:06         ` [PATCH] Add a basic test for " Matthieu Moy
@ 2008-09-28 11:07         ` Johannes Sixt
  2008-09-28 12:29           ` Matthieu Moy
  1 sibling, 1 reply; 85+ messages in thread
From: Johannes Sixt @ 2008-09-28 11:07 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Sonntag, 28. September 2008, Matthieu Moy wrote:
> +Converting files to text before a diff
> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
> +
> +The attribute `textconv` affects 'git diff' in a way similar to the
> +`diff` attribute, but with `textconv`, the user provides only a way to
> +convert the file into text, and git takes care of doing the diff as
> +usual (i.e. other options of 'git diff' such as '--color' remain
> +available).
> +
> +The value of `textconv` must be a string, which is the textconv
> +driver.

Wouldn't it be much more useful to have git parse stdout of the custom diff 
tool in order to colorize it? Of course, this would mean that external diff 
tools are more complicated and their stdout has to conform mostly to the git 
diff syntax. But:

> +To tell git to use the `exif` filter for jpeg images, use:
> +
> +----------------------------------------------------------------
> +*.jpg   textconv=exif
> +----------------------------------------------------------------

In this very example it would be possible that the external diff driver shows 
the differences in the image in a window and also produces EXIF information 
on stdout.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH] Document the textconv filter.
  2008-09-28 11:07         ` [PATCH] Document " Johannes Sixt
@ 2008-09-28 12:29           ` Matthieu Moy
  0 siblings, 0 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-09-28 12:29 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git

Johannes Sixt <johannes.sixt@telecom.at> writes:

> On Sonntag, 28. September 2008, Matthieu Moy wrote:
>> +Converting files to text before a diff
>> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
>> +
>> +The attribute `textconv` affects 'git diff' in a way similar to the
>> +`diff` attribute, but with `textconv`, the user provides only a way to
>> +convert the file into text, and git takes care of doing the diff as
>> +usual (i.e. other options of 'git diff' such as '--color' remain
>> +available).
>> +
>> +The value of `textconv` must be a string, which is the textconv
>> +driver.
>
> Wouldn't it be much more useful to have git parse stdout of the custom diff 
> tool in order to colorize it?

That would do it for --color, but not --color-words for example. The
problem with the GIT_EXTERNAL_DIFF is that the diff tool has to
re-implement everything that "git diff" already do.

Take my opendocument diff script :

http://www-verimag.imag.fr/~moy/opendocument/git-oodiff

That's 77 lines of code as a wrapper to odt2txt and diff. With the
"textconv" attribute, it's one line in .gitattributes, and two in
~/.gitconfig.

> Of course, this would mean that external diff tools are more
> complicated and their stdout has to conform mostly to the git diff
> syntax. But:
>
>> +To tell git to use the `exif` filter for jpeg images, use:
>> +
>> +----------------------------------------------------------------
>> +*.jpg   textconv=exif
>> +----------------------------------------------------------------
>
> In this very example it would be possible that the external diff driver shows 
> the differences in the image in a window and also produces EXIF information 
> on stdout.

Yes, but that's really a specific example. Having git colorize the
diff would be a little extra, but that wouldn't reduce the effort to
write the external diff tool itself, and doesn't give you _all_ of
git-diff, just --color.

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Implementation of a "textconv" filter for easy custom diff.
  2008-09-28  9:57   ` Matthieu Moy
@ 2008-09-28 16:11     ` Jeff King
  2008-09-30 15:19       ` Matthieu Moy
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-09-28 16:11 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Sun, Sep 28, 2008 at 11:57:05AM +0200, Matthieu Moy wrote:

> > Neat. I started on something like this quite a while ago,
> 
> Did you publish/send it anywhere?

No, I was waiting to clean it up and test it a bit more.

> Well, OTOH, one could argue that "blame" is based on diff-ing, and
> therefore it's natural to define a diff filter to tell how "blame"
> should work.

Yes, I would have made that argument. ;)

> >     [diff "foo"]
> >     xfuncname = "some regex"
> >     binary = auto
> 
> No sure that would actually be useful in real life, but it doesn't
> harm to have it. And the argument "better path forward for defining
> sets of diff tweaks" is a good one IMO.

Yes, I think currently most diff options supersede the decision about
whether or not it's binary (like textconv, in which you probably assume
the result is diff-able as text). xfuncname doesn't, but the example is
perhaps a bit contrived. So I do think of it as more of a way for future
expansion.

I seem to recall actually running into this as part of the textconv work
I was doing, but now I can't remember the exact details.  So that's not
that compelling an argumen.t :)

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH] Implement a textconv filter for "git diff"
  2008-09-28 10:00         ` Matthieu Moy
@ 2008-09-28 16:12           ` Jeff King
  0 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-09-28 16:12 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Sun, Sep 28, 2008 at 12:00:44PM +0200, Matthieu Moy wrote:

> Hmm, why not, and the textconv could be different (like comparing
> old:foo.doc with ./foo.odt).

Exactly.

> One has to be carefull not to call textconv for plumbing commands,
> since the generated patches are not applicable, and only for the eyes
> of the reader.

Right, but that is prevented by not loading the appropriate diff driver
config, I believe (and I didn't look closely at your implementation in
that respect, but I believe it is the same as mine, because both
porcelain and plumbing use the code in diff.c).

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Implementation of a "textconv" filter for easy custom diff.
  2008-09-28 16:11     ` Jeff King
@ 2008-09-30 15:19       ` Matthieu Moy
  2008-09-30 16:45         ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-09-30 15:19 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> writes:

> On Sun, Sep 28, 2008 at 11:57:05AM +0200, Matthieu Moy wrote:
>
>> > Neat. I started on something like this quite a while ago,
>> 
>> Did you publish/send it anywhere?
>
> No, I was waiting to clean it up and test it a bit more.

Well, if you have time and you think your code is better than mine, I
can let you continue on this (and you can pick the parts you want in
mine). Otherwise, I'd be interested in seeing your draft to
incorporate the good things in my version.

Cheers,

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: Implementation of a "textconv" filter for easy custom diff.
  2008-09-30 15:19       ` Matthieu Moy
@ 2008-09-30 16:45         ` Jeff King
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-09-30 16:45 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Tue, Sep 30, 2008 at 05:19:49PM +0200, Matthieu Moy wrote:

> Well, if you have time and you think your code is better than mine, I
> can let you continue on this (and you can pick the parts you want in
> mine). Otherwise, I'd be interested in seeing your draft to
> incorporate the good things in my version.

I am about 90% done cleaning it up for preparation (there is a bit of
refactoring, and I want to make sure I get that just right). I'll post
it in the next day or so.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH 0/4] diff text conversion filter
  2008-09-30 16:45         ` Jeff King
@ 2008-10-05 21:41           ` Jeff King
  2008-10-05 21:42             ` [PATCH 1/4] t4012: use test_cmp instead of cmp Jeff King
                               ` (5 more replies)
  0 siblings, 6 replies; 85+ messages in thread
From: Jeff King @ 2008-10-05 21:41 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

On Tue, Sep 30, 2008 at 12:45:45PM -0400, Jeff King wrote:

> I am about 90% done cleaning it up for preparation (there is a bit of
> refactoring, and I want to make sure I get that just right). I'll post
> it in the next day or so.

Sorry, I didn't get a chance to look at this until today. Patch series
will follow. It is still missing documentation updates and tests, but I
wanted to get you something to look at (and as I am proposing a new
meaning for "diff driver", I would be curious to hear the general
comments).

This is on top of 'next', because it would otherwise conflict with the
funcname pattern changes there.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH 1/4] t4012: use test_cmp instead of cmp
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
@ 2008-10-05 21:42             ` Jeff King
  2008-10-05 21:43             ` [PATCH 2/4] diff: unify external diff and funcname parsing code Jeff King
                               ` (4 subsequent siblings)
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-05 21:42 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

This makes erroneous output slightly easier to see. We also
flip the argument order to match our usual style.

Signed-off-by: Jeff King <peff@peff.net>
---
Just a cleanup I noticed while testing.

 t/t4012-diff-binary.sh |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh
index b8ec6e9..0f954a3 100755
--- a/t/t4012-diff-binary.sh
+++ b/t/t4012-diff-binary.sh
@@ -25,11 +25,11 @@ cat > expected <<\EOF
 EOF
 test_expect_success 'diff without --binary' \
 	'git diff | git apply --stat --summary >current &&
-	 cmp current expected'
+	 test_cmp expected current'
 
 test_expect_success 'diff with --binary' \
 	'git diff --binary | git apply --stat --summary >current &&
-	 cmp current expected'
+	 test_cmp expected current'
 
 # apply needs to be able to skip the binary material correctly
 # in order to report the line number of a corrupt patch.
-- 
1.6.0.2.639.g4d7f.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 2/4] diff: unify external diff and funcname parsing code
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
  2008-10-05 21:42             ` [PATCH 1/4] t4012: use test_cmp instead of cmp Jeff King
@ 2008-10-05 21:43             ` Jeff King
  2008-10-05 21:43             ` [PATCH 3/4] diff: introduce diff.<driver>.binary Jeff King
                               ` (3 subsequent siblings)
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-05 21:43 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

Both sets of code assume that one specifies a diff profile
as a gitattribute via the "diff=foo" attribute. They then
pull information about that profile from the config as
diff.foo.*.

The code for each is currently completely separate from the
other, which has several disadvantages:

  - there is duplication as we maintain code to create and
    search the separate lists of external drivers and
    funcname patterns

  - it is difficult to add new profile options, since it is
    unclear where they should go

  - the code is difficult to follow, as we rely on the
    "check if this file is binary" code to find the funcname
    pattern as a side effect. This is the first step in
    refactoring the binary-checking code.

This patch factors out these diff profiles into "userdiff"
drivers. A file with "diff=foo" uses the "foo" driver, which
is specified by a single struct.

Note that one major difference between the two pieces of
code is that the funcname patterns are always loaded,
whereas external drivers are loaded only for the "git diff"
porcelain; the new code takes care to retain that situation.

Signed-off-by: Jeff King <peff@peff.net>
---
This is obviously a large and difficult to read diff. But I hope the
result is a bit more readable.

 Makefile   |    2 +
 diff.c     |  241 +++++++-----------------------------------------------------
 userdiff.c |  151 +++++++++++++++++++++++++++++++++++++
 userdiff.h |   23 ++++++
 4 files changed, 203 insertions(+), 214 deletions(-)
 create mode 100644 userdiff.c
 create mode 100644 userdiff.h

diff --git a/Makefile b/Makefile
index 308dc70..d6f3695 100644
--- a/Makefile
+++ b/Makefile
@@ -389,6 +389,7 @@ LIB_H += transport.h
 LIB_H += tree.h
 LIB_H += tree-walk.h
 LIB_H += unpack-trees.h
+LIB_H += userdiff.h
 LIB_H += utf8.h
 LIB_H += wt-status.h
 
@@ -485,6 +486,7 @@ LIB_OBJS += tree-diff.o
 LIB_OBJS += tree.o
 LIB_OBJS += tree-walk.o
 LIB_OBJS += unpack-trees.o
+LIB_OBJS += userdiff.o
 LIB_OBJS += usage.o
 LIB_OBJS += utf8.o
 LIB_OBJS += walker.o
diff --git a/diff.c b/diff.c
index 4e4e439..08f335f 100644
--- a/diff.c
+++ b/diff.c
@@ -11,6 +11,7 @@
 #include "attr.h"
 #include "run-command.h"
 #include "utf8.h"
+#include "userdiff.h"
 
 #ifdef NO_FAST_WORKING_DIRECTORY
 #define FAST_WORKING_DIRECTORY 0
@@ -56,80 +57,6 @@ static int parse_diff_color_slot(const char *var, int ofs)
 	die("bad config variable '%s'", var);
 }
 
-static struct ll_diff_driver {
-	const char *name;
-	struct ll_diff_driver *next;
-	const char *cmd;
-} *user_diff, **user_diff_tail;
-
-/*
- * Currently there is only "diff.<drivername>.command" variable;
- * because there are "diff.color.<slot>" variables, we are parsing
- * this in a bit convoluted way to allow low level diff driver
- * called "color".
- */
-static int parse_lldiff_command(const char *var, const char *ep, const char *value)
-{
-	const char *name;
-	int namelen;
-	struct ll_diff_driver *drv;
-
-	name = var + 5;
-	namelen = ep - name;
-	for (drv = user_diff; drv; drv = drv->next)
-		if (!strncmp(drv->name, name, namelen) && !drv->name[namelen])
-			break;
-	if (!drv) {
-		drv = xcalloc(1, sizeof(struct ll_diff_driver));
-		drv->name = xmemdupz(name, namelen);
-		if (!user_diff_tail)
-			user_diff_tail = &user_diff;
-		*user_diff_tail = drv;
-		user_diff_tail = &(drv->next);
-	}
-
-	return git_config_string(&(drv->cmd), var, value);
-}
-
-/*
- * 'diff.<what>.funcname' attribute can be specified in the configuration
- * to define a customized regexp to find the beginning of a function to
- * be used for hunk header lines of "diff -p" style output.
- */
-struct funcname_pattern_entry {
-	char *name;
-	char *pattern;
-	int cflags;
-};
-static struct funcname_pattern_list {
-	struct funcname_pattern_list *next;
-	struct funcname_pattern_entry e;
-} *funcname_pattern_list;
-
-static int parse_funcname_pattern(const char *var, const char *ep, const char *value, int cflags)
-{
-	const char *name;
-	int namelen;
-	struct funcname_pattern_list *pp;
-
-	name = var + 5; /* "diff." */
-	namelen = ep - name;
-
-	for (pp = funcname_pattern_list; pp; pp = pp->next)
-		if (!strncmp(pp->e.name, name, namelen) && !pp->e.name[namelen])
-			break;
-	if (!pp) {
-		pp = xcalloc(1, sizeof(*pp));
-		pp->e.name = xmemdupz(name, namelen);
-		pp->next = funcname_pattern_list;
-		funcname_pattern_list = pp;
-	}
-	free(pp->e.pattern);
-	pp->e.pattern = xstrdup(value);
-	pp->e.cflags = cflags;
-	return 0;
-}
-
 /*
  * These are to give UI layer defaults.
  * The core-level commands such as git-diff-files should
@@ -162,11 +89,11 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	}
 	if (!strcmp(var, "diff.external"))
 		return git_config_string(&external_diff_cmd_cfg, var, value);
-	if (!prefixcmp(var, "diff.")) {
-		const char *ep = strrchr(var, '.');
 
-		if (ep != var + 4 && !strcmp(ep, ".command"))
-			return parse_lldiff_command(var, ep, value);
+	switch (userdiff_config_porcelain(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
 	}
 
 	return git_diff_basic_config(var, value, cb);
@@ -193,21 +120,10 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
-	if (!prefixcmp(var, "diff.")) {
-		const char *ep = strrchr(var, '.');
-		if (ep != var + 4) {
-			if (!strcmp(ep, ".funcname")) {
-				if (!value)
-					return config_error_nonbool(var);
-				return parse_funcname_pattern(var, ep, value,
-					0);
-			} else if (!strcmp(ep, ".xfuncname")) {
-				if (!value)
-					return config_error_nonbool(var);
-				return parse_funcname_pattern(var, ep, value,
-					REG_EXTENDED);
-			}
-		}
+	switch (userdiff_config_basic(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
 	}
 
 	return git_color_default_config(var, value, cb);
@@ -1355,46 +1271,24 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-static void setup_diff_attr_check(struct git_attr_check *check)
-{
-	static struct git_attr *attr_diff;
-
-	if (!attr_diff) {
-		attr_diff = git_attr("diff", 4);
-	}
-	check[0].attr = attr_diff;
-}
-
 static void diff_filespec_check_attr(struct diff_filespec *one)
 {
-	struct git_attr_check attr_diff_check;
+	struct userdiff_driver *drv;
 	int check_from_data = 0;
 
 	if (one->checked_attr)
 		return;
 
-	setup_diff_attr_check(&attr_diff_check);
+	drv = userdiff_find_by_path(one->path);
 	one->is_binary = 0;
-	one->funcname_pattern_ident = NULL;
 
-	if (!git_checkattr(one->path, 1, &attr_diff_check)) {
-		const char *value;
-
-		/* binaryness */
-		value = attr_diff_check.value;
-		if (ATTR_TRUE(value))
-			;
-		else if (ATTR_FALSE(value))
-			one->is_binary = 1;
-		else
-			check_from_data = 1;
-
-		/* funcname pattern ident */
-		if (ATTR_TRUE(value) || ATTR_FALSE(value) || ATTR_UNSET(value))
-			;
-		else
-			one->funcname_pattern_ident = value;
-	}
+	/* binaryness */
+	if (drv == USERDIFF_ATTR_TRUE)
+		;
+	else if (drv == USERDIFF_ATTR_FALSE)
+		one->is_binary = 1;
+	else
+		check_from_data = 1;
 
 	if (check_from_data) {
 		if (!one->data && DIFF_FILE_VALID(one))
@@ -1411,70 +1305,12 @@ int diff_filespec_is_binary(struct diff_filespec *one)
 	return one->is_binary;
 }
 
-static const struct funcname_pattern_entry *funcname_pattern(const char *ident)
-{
-	struct funcname_pattern_list *pp;
-
-	for (pp = funcname_pattern_list; pp; pp = pp->next)
-		if (!strcmp(ident, pp->e.name))
-			return &pp->e;
-	return NULL;
-}
-
-static const struct funcname_pattern_entry builtin_funcname_pattern[] = {
-	{ "bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$",
-	  REG_EXTENDED },
-	{ "html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$", REG_EXTENDED },
-	{ "java",
-	  "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
-	  "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$",
-	  REG_EXTENDED },
-	{ "pascal",
-	  "^((procedure|function|constructor|destructor|interface|"
-		"implementation|initialization|finalization)[ \t]*.*)$"
-	  "\n"
-	  "^(.*=[ \t]*(class|record).*)$",
-	  REG_EXTENDED },
-	{ "php", "^[\t ]*((function|class).*)", REG_EXTENDED },
-	{ "python", "^[ \t]*((class|def)[ \t].*)$", REG_EXTENDED },
-	{ "ruby", "^[ \t]*((class|module|def)[ \t].*)$",
-	  REG_EXTENDED },
-	{ "tex",
-	  "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$",
-	  REG_EXTENDED },
-};
-
-static const struct funcname_pattern_entry *diff_funcname_pattern(struct diff_filespec *one)
+static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one)
 {
-	const char *ident;
-	const struct funcname_pattern_entry *pe;
-	int i;
-
-	diff_filespec_check_attr(one);
-	ident = one->funcname_pattern_ident;
-
-	if (!ident)
-		/*
-		 * If the config file has "funcname.default" defined, that
-		 * regexp is used; otherwise NULL is returned and xemit uses
-		 * the built-in default.
-		 */
-		return funcname_pattern("default");
-
-	/* Look up custom "funcname.$ident" regexp from config. */
-	pe = funcname_pattern(ident);
-	if (pe)
-		return pe;
-
-	/*
-	 * And define built-in fallback patterns here.  Note that
-	 * these can be overridden by the user's config settings.
-	 */
-	for (i = 0; i < ARRAY_SIZE(builtin_funcname_pattern); i++)
-		if (!strcmp(ident, builtin_funcname_pattern[i].name))
-			return &builtin_funcname_pattern[i];
-
-	return NULL;
+	struct userdiff_driver *drv = userdiff_find_by_path(one->path);
+	if (!drv)
+		drv = userdiff_find_by_name("default");
+	return drv && drv->funcname.pattern ? &drv->funcname : NULL;
 }
 
 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
@@ -1568,7 +1404,7 @@ static void builtin_diff(const char *name_a,
 		xdemitconf_t xecfg;
 		xdemitcb_t ecb;
 		struct emit_callback ecbdata;
-		const struct funcname_pattern_entry *pe;
+		const struct userdiff_funcname *pe;
 
 		pe = diff_funcname_pattern(one);
 		if (!pe)
@@ -2108,29 +1944,6 @@ static void run_external_diff(const char *pgm,
 	}
 }
 
-static const char *external_diff_attr(const char *name)
-{
-	struct git_attr_check attr_diff_check;
-
-	if (!name)
-		return NULL;
-
-	setup_diff_attr_check(&attr_diff_check);
-	if (!git_checkattr(name, 1, &attr_diff_check)) {
-		const char *value = attr_diff_check.value;
-		if (!ATTR_TRUE(value) &&
-		    !ATTR_FALSE(value) &&
-		    !ATTR_UNSET(value)) {
-			struct ll_diff_driver *drv;
-
-			for (drv = user_diff; drv; drv = drv->next)
-				if (!strcmp(drv->name, value))
-					return drv->cmd;
-		}
-	}
-	return NULL;
-}
-
 static void run_diff_cmd(const char *pgm,
 			 const char *name,
 			 const char *other,
@@ -2144,9 +1957,9 @@ static void run_diff_cmd(const char *pgm,
 	if (!DIFF_OPT_TST(o, ALLOW_EXTERNAL))
 		pgm = NULL;
 	else {
-		const char *cmd = external_diff_attr(attr_path);
-		if (cmd)
-			pgm = cmd;
+		struct userdiff_driver *drv = userdiff_find_by_path(attr_path);
+		if (drv && drv->external)
+			pgm = drv->external;
 	}
 
 	if (pgm) {
diff --git a/userdiff.c b/userdiff.c
new file mode 100644
index 0000000..3406adc
--- /dev/null
+++ b/userdiff.c
@@ -0,0 +1,151 @@
+#include "userdiff.h"
+#include "cache.h"
+#include "attr.h"
+
+static struct userdiff_driver *drivers;
+static int ndrivers;
+static int drivers_alloc;
+
+#define FUNCNAME(name, pattern) \
+	{ name, NULL, { pattern, REG_EXTENDED } }
+static struct userdiff_driver builtin_drivers[] = {
+FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
+FUNCNAME("java",
+	 "!^[ \t]*(catch|do|for|if|instanceof|new|return|switch|throw|while)\n"
+	 "^[ \t]*(([ \t]*[A-Za-z_][A-Za-z_0-9]*){2,}[ \t]*\\([^;]*)$"),
+FUNCNAME("pascal",
+	 "^((procedure|function|constructor|destructor|interface|"
+		"implementation|initialization|finalization)[ \t]*.*)$"
+	 "\n"
+	 "^(.*=[ \t]*(class|record).*)$"),
+FUNCNAME("php", "^[\t ]*((function|class).*)"),
+FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
+FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
+FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
+FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+};
+#undef FUNCNAME
+
+static struct userdiff_driver driver_true = {
+	"diff=true",
+	NULL,
+	{ NULL, 0 }
+};
+struct userdiff_driver *USERDIFF_ATTR_TRUE = &driver_true;
+
+static struct userdiff_driver driver_false = {
+	"!diff",
+	NULL,
+	{ NULL, 0 }
+};
+struct userdiff_driver *USERDIFF_ATTR_FALSE = &driver_false;
+
+static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len)
+{
+	int i;
+	for (i = 0; i < ndrivers; i++) {
+		struct userdiff_driver *drv = drivers + i;
+		if (!strncmp(drv->name, k, len) && !drv->name[len])
+			return drv;
+	}
+	for (i = 0; i < ARRAY_SIZE(builtin_drivers); i++) {
+		struct userdiff_driver *drv = builtin_drivers + i;
+		if (!strncmp(drv->name, k, len) && !drv->name[len])
+			return drv;
+	}
+	return NULL;
+}
+
+static struct userdiff_driver *parse_driver(const char *var,
+		const char *value, const char *type)
+{
+	struct userdiff_driver *drv;
+	const char *dot;
+	const char *name;
+	int namelen;
+
+	if (prefixcmp(var, "diff."))
+		return NULL;
+	dot = strrchr(var, '.');
+	if (dot == var + 4)
+		return NULL;
+	if (strcmp(type, dot+1))
+		return NULL;
+
+	name = var + 5;
+	namelen = dot - name;
+	drv = userdiff_find_by_namelen(name, namelen);
+	if (!drv) {
+		ALLOC_GROW(drivers, ndrivers+1, drivers_alloc);
+		drv = &drivers[ndrivers++];
+		memset(drv, 0, sizeof(*drv));
+		drv->name = xmemdupz(name, namelen);
+	}
+	return drv;
+}
+
+static int parse_funcname(struct userdiff_funcname *f, const char *k,
+		const char *v, int cflags)
+{
+	if (git_config_string(&f->pattern, k, v) < 0)
+		return -1;
+	f->cflags = cflags;
+	return 1;
+}
+
+static int parse_string(const char **d, const char *k, const char *v)
+{
+	if (git_config_string(d, k, v) < 0)
+		return -1;
+	return 1;
+}
+
+int userdiff_config_basic(const char *k, const char *v)
+{
+	struct userdiff_driver *drv;
+
+	if ((drv = parse_driver(k, v, "funcname")))
+		return parse_funcname(&drv->funcname, k, v, 0);
+	if ((drv = parse_driver(k, v, "xfuncname")))
+		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
+
+	return 0;
+}
+
+int userdiff_config_porcelain(const char *k, const char *v)
+{
+	struct userdiff_driver *drv;
+
+	if ((drv = parse_driver(k, v, "command")))
+		return parse_string(&drv->external, k, v);
+
+	return 0;
+}
+
+struct userdiff_driver *userdiff_find_by_name(const char *name) {
+	int len = strlen(name);
+	return userdiff_find_by_namelen(name, len);
+}
+
+struct userdiff_driver *userdiff_find_by_path(const char *path)
+{
+	static struct git_attr *attr;
+	struct git_attr_check check;
+
+	if (!attr)
+		attr = git_attr("diff", 4);
+	check.attr = attr;
+
+	if (!path)
+		return NULL;
+	if (git_checkattr(path, 1, &check))
+		return NULL;
+
+	if (ATTR_TRUE(check.value))
+		return &driver_true;
+	if (ATTR_FALSE(check.value))
+		return &driver_false;
+	if (ATTR_UNSET(check.value))
+		return NULL;
+	return userdiff_find_by_name(check.value);
+}
diff --git a/userdiff.h b/userdiff.h
new file mode 100644
index 0000000..c64c5f5
--- /dev/null
+++ b/userdiff.h
@@ -0,0 +1,23 @@
+#ifndef USERDIFF_H
+#define USERDIFF_H
+
+struct userdiff_funcname {
+	const char *pattern;
+	int cflags;
+};
+
+struct userdiff_driver {
+	const char *name;
+	const char *external;
+	struct userdiff_funcname funcname;
+};
+
+extern struct userdiff_driver *USERDIFF_ATTR_TRUE;
+extern struct userdiff_driver *USERDIFF_ATTR_FALSE;
+
+int userdiff_config_basic(const char *k, const char *v);
+int userdiff_config_porcelain(const char *k, const char *v);
+struct userdiff_driver *userdiff_find_by_name(const char *name);
+struct userdiff_driver *userdiff_find_by_path(const char *path);
+
+#endif /* USERDIFF */
-- 
1.6.0.2.639.g4d7f.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
  2008-10-05 21:42             ` [PATCH 1/4] t4012: use test_cmp instead of cmp Jeff King
  2008-10-05 21:43             ` [PATCH 2/4] diff: unify external diff and funcname parsing code Jeff King
@ 2008-10-05 21:43             ` Jeff King
  2008-10-07 15:17               ` Johannes Sixt
  2008-10-24  2:46               ` Jeff King
  2008-10-05 21:43             ` [PATCH 4/4] diff: add filter for converting binary to text Jeff King
                               ` (2 subsequent siblings)
  5 siblings, 2 replies; 85+ messages in thread
From: Jeff King @ 2008-10-05 21:43 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

The "diff" gitattribute is somewhat overloaded right now. It
can say one of three things:

  1. this file is definitely binary, or definitely not
     (i.e., diff or !diff)
  2. this file should use an external diff engine (i.e.,
     diff=foo, diff.foo.command = custom-script)
  3. this file should use particular funcname patterns
     (i.e., diff=foo, diff.foo.(x?)funcname = some-regex)

Most of the time, there is no conflict between these uses,
since using one implies that the other is irrelevant (e.g.,
an external diff engine will decide for itself whether the
file is binary).

However, there is at least one conflicting situation: there
is no way to say "use the regular rules to determine whether
this file is binary, but if we do diff it textually, use
this funcname pattern." That is, currently setting diff=foo
indicates that the file is definitely text.

This patch introduces a "binary" config option for a diff
driver, so that one can explicitly set diff.foo.binary. We
default this value to "don't know". That is, setting a diff
attribute to "foo" and using "diff.foo.funcname" will have
no effect on the binaryness of a file. To get the current
behavior, one can set diff.foo.binary to true.

This patch also has one additional advantage: it cleans up
the interface to the userdiff code a bit. Before, calling
code had to know more about whether attributes were false,
true, or unset to determine binaryness. Now that binaryness
is a property of a driver, we can represent these situations
just by passing back a driver struct.

Signed-off-by: Jeff King <peff@peff.net>
---
 diff.c     |   52 ++++++++++++++++++++++------------------------------
 diffcore.h |    8 ++++++--
 userdiff.c |   19 ++++++++++++++++---
 userdiff.h |    4 +---
 4 files changed, 45 insertions(+), 38 deletions(-)

diff --git a/diff.c b/diff.c
index 08f335f..ba4f6fa 100644
--- a/diff.c
+++ b/diff.c
@@ -1271,46 +1271,37 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-static void diff_filespec_check_attr(struct diff_filespec *one)
+void diff_filespec_load_driver(struct diff_filespec *one)
 {
-	struct userdiff_driver *drv;
-	int check_from_data = 0;
-
-	if (one->checked_attr)
-		return;
-
-	drv = userdiff_find_by_path(one->path);
-	one->is_binary = 0;
-
-	/* binaryness */
-	if (drv == USERDIFF_ATTR_TRUE)
-		;
-	else if (drv == USERDIFF_ATTR_FALSE)
-		one->is_binary = 1;
-	else
-		check_from_data = 1;
-
-	if (check_from_data) {
-		if (!one->data && DIFF_FILE_VALID(one))
-			diff_populate_filespec(one, 0);
-
-		if (one->data)
-			one->is_binary = buffer_is_binary(one->data, one->size);
-	}
+	if (!one->driver)
+		one->driver = userdiff_find_by_path(one->path);
+	if (!one->driver)
+		one->driver = userdiff_find_by_name("default");
 }
 
 int diff_filespec_is_binary(struct diff_filespec *one)
 {
-	diff_filespec_check_attr(one);
+	if (one->is_binary == -1) {
+		diff_filespec_load_driver(one);
+		if (one->driver->binary != -1)
+			one->is_binary = one->driver->binary;
+		else {
+			if (!one->data && DIFF_FILE_VALID(one))
+				diff_populate_filespec(one, 0);
+			if (one->data)
+				one->is_binary = buffer_is_binary(one->data,
+						one->size);
+			if (one->is_binary == -1)
+				one->is_binary = 0;
+		}
+	}
 	return one->is_binary;
 }
 
 static const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *one)
 {
-	struct userdiff_driver *drv = userdiff_find_by_path(one->path);
-	if (!drv)
-		drv = userdiff_find_by_name("default");
-	return drv && drv->funcname.pattern ? &drv->funcname : NULL;
+	diff_filespec_load_driver(one);
+	return one->driver->funcname.pattern ? &one->driver->funcname : NULL;
 }
 
 void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const char *b)
@@ -1558,6 +1549,7 @@ struct diff_filespec *alloc_filespec(const char *path)
 	spec->path = (char *)(spec + 1);
 	memcpy(spec->path, path, namelen+1);
 	spec->count = 1;
+	spec->is_binary = -1;
 	return spec;
 }
 
diff --git a/diffcore.h b/diffcore.h
index 8ae3578..713cca7 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -22,6 +22,8 @@
 
 #define MINIMUM_BREAK_SIZE     400 /* do not break a file smaller than this */
 
+struct userdiff_driver;
+
 struct diff_filespec {
 	unsigned char sha1[20];
 	char *path;
@@ -40,8 +42,10 @@ struct diff_filespec {
 #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0)
 	unsigned should_free : 1; /* data should be free()'ed */
 	unsigned should_munmap : 1; /* data should be munmap()'ed */
-	unsigned checked_attr : 1;
-	unsigned is_binary : 1; /* data should be considered "binary" */
+
+	struct userdiff_driver *driver;
+	/* data should be considered "binary"; -1 means "don't know yet" */
+	int is_binary;
 };
 
 extern struct diff_filespec *alloc_filespec(const char *);
diff --git a/userdiff.c b/userdiff.c
index 3406adc..ac6d4a1 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -7,7 +7,7 @@ static int ndrivers;
 static int drivers_alloc;
 
 #define FUNCNAME(name, pattern) \
-	{ name, NULL, { pattern, REG_EXTENDED } }
+	{ name, NULL, -1, { pattern, REG_EXTENDED } }
 static struct userdiff_driver builtin_drivers[] = {
 FUNCNAME("html", "^[ \t]*(<[Hh][1-6][ \t].*>.*)$"),
 FUNCNAME("java",
@@ -23,22 +23,23 @@ FUNCNAME("python", "^[ \t]*((class|def)[ \t].*)$"),
 FUNCNAME("ruby", "^[ \t]*((class|module|def)[ \t].*)$"),
 FUNCNAME("bibtex", "(@[a-zA-Z]{1,}[ \t]*\\{{0,1}[ \t]*[^ \t\"@',\\#}{~%]*).*$"),
 FUNCNAME("tex", "^(\\\\((sub)*section|chapter|part)\\*{0,1}\\{.*)$"),
+{ "default", NULL, -1, { NULL, 0 } },
 };
 #undef FUNCNAME
 
 static struct userdiff_driver driver_true = {
 	"diff=true",
 	NULL,
+	0,
 	{ NULL, 0 }
 };
-struct userdiff_driver *USERDIFF_ATTR_TRUE = &driver_true;
 
 static struct userdiff_driver driver_false = {
 	"!diff",
 	NULL,
+	1,
 	{ NULL, 0 }
 };
-struct userdiff_driver *USERDIFF_ATTR_FALSE = &driver_false;
 
 static struct userdiff_driver *userdiff_find_by_namelen(const char *k, int len)
 {
@@ -80,6 +81,7 @@ static struct userdiff_driver *parse_driver(const char *var,
 		drv = &drivers[ndrivers++];
 		memset(drv, 0, sizeof(*drv));
 		drv->name = xmemdupz(name, namelen);
+		drv->binary = -1;
 	}
 	return drv;
 }
@@ -100,6 +102,15 @@ static int parse_string(const char **d, const char *k, const char *v)
 	return 1;
 }
 
+static int parse_tristate(int *b, const char *k, const char *v)
+{
+	if (v && !strcasecmp(v, "auto"))
+		*b = -1;
+	else
+		*b = git_config_bool(k, v);
+	return 1;
+}
+
 int userdiff_config_basic(const char *k, const char *v)
 {
 	struct userdiff_driver *drv;
@@ -108,6 +119,8 @@ int userdiff_config_basic(const char *k, const char *v)
 		return parse_funcname(&drv->funcname, k, v, 0);
 	if ((drv = parse_driver(k, v, "xfuncname")))
 		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
+	if ((drv = parse_driver(k, v, "binary")))
+		return parse_tristate(&drv->binary, k, v);
 
 	return 0;
 }
diff --git a/userdiff.h b/userdiff.h
index c64c5f5..1c1eb04 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -9,12 +9,10 @@ struct userdiff_funcname {
 struct userdiff_driver {
 	const char *name;
 	const char *external;
+	int binary;
 	struct userdiff_funcname funcname;
 };
 
-extern struct userdiff_driver *USERDIFF_ATTR_TRUE;
-extern struct userdiff_driver *USERDIFF_ATTR_FALSE;
-
 int userdiff_config_basic(const char *k, const char *v);
 int userdiff_config_porcelain(const char *k, const char *v);
 struct userdiff_driver *userdiff_find_by_name(const char *name);
-- 
1.6.0.2.639.g4d7f.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 4/4] diff: add filter for converting binary to text
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
                               ` (2 preceding siblings ...)
  2008-10-05 21:43             ` [PATCH 3/4] diff: introduce diff.<driver>.binary Jeff King
@ 2008-10-05 21:43             ` Jeff King
  2008-10-05 22:03             ` [PATCH 0/4] diff text conversion filter Jakub Narebski
  2008-10-06  6:29             ` Johannes Sixt
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-05 21:43 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: git

When diffing binary files, it is sometimes nice to see the
differences of a canonical text form rather than either a
binary patch or simply "binary files differ."

Until now, the only option for doing this was to define an
external diff command to perform the diff. This was a lot of
work, since the external command needed to take care of
doing the diff itself (including mode changes), and lost the
benefit of git's colorization and other options.

This patch adds a text conversion option, which converts a
file to its canonical format before performing the diff.
This is less flexible than an arbitrary external diff, but
is much less work to set up. For example:

  $ echo '*.jpg diff=exif' >>.gitattributes
  $ git config diff.exif.textconv exiftool
  $ git config diff.exif.binary false

allows one to see jpg diffs represented by the text output
of exiftool.

Signed-off-by: Jeff King <peff@peff.net>
---
 diff.c     |   49 +++++++++++++++++++++++++++++++++++++++++++++++--
 userdiff.c |    2 ++
 userdiff.h |    1 +
 3 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index ba4f6fa..421da29 100644
--- a/diff.c
+++ b/diff.c
@@ -38,6 +38,9 @@ static char diff_colors[][COLOR_MAXLEN] = {
 	"\033[41m",	/* WHITESPACE (red background) */
 };
 
+static void diff_filespec_load_driver(struct diff_filespec *one);
+static char *run_textconv(const char *, struct diff_filespec *, size_t *);
+
 static int parse_diff_color_slot(const char *var, int ofs)
 {
 	if (!strcasecmp(var+ofs, "plain"))
@@ -291,8 +294,19 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	}
 	else if (diff_populate_filespec(one, 0))
 		return -1;
-	mf->ptr = one->data;
-	mf->size = one->size;
+
+	diff_filespec_load_driver(one);
+	if (one->driver->textconv) {
+		size_t size;
+		mf->ptr = run_textconv(one->driver->textconv, one, &size);
+		if (!mf->ptr)
+			return -1;
+		mf->size = size;
+	}
+	else {
+		mf->ptr = one->data;
+		mf->size = one->size;
+	}
 	return 0;
 }
 
@@ -3374,3 +3388,34 @@ void diff_unmerge(struct diff_options *options,
 	fill_filespec(one, sha1, mode);
 	diff_queue(&diff_queued_diff, one, two)->is_unmerged = 1;
 }
+
+static char *run_textconv(const char *pgm, struct diff_filespec *spec,
+		size_t *outsize)
+{
+	struct diff_tempfile temp;
+	const char *argv[3];
+	const char **arg = argv;
+	struct child_process child;
+	struct strbuf buf = STRBUF_INIT;
+
+	prepare_temp_file(spec->path, &temp, spec);
+	*arg++ = pgm;
+	*arg++ = temp.name;
+	*arg = NULL;
+
+	memset(&child, 0, sizeof(child));
+	child.argv = argv;
+	child.out = -1;
+	if (start_command(&child) != 0 ||
+	    strbuf_read(&buf, child.out, 0) < 0 ||
+	    finish_command(&child) != 0) {
+		if (temp.name == temp.tmp_path)
+			unlink(temp.name);
+		error("error running textconv command '%s'", pgm);
+		return NULL;
+	}
+	if (temp.name == temp.tmp_path)
+		unlink(temp.name);
+
+	return strbuf_detach(&buf, outsize);
+}
diff --git a/userdiff.c b/userdiff.c
index ac6d4a1..02f225f 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -131,6 +131,8 @@ int userdiff_config_porcelain(const char *k, const char *v)
 
 	if ((drv = parse_driver(k, v, "command")))
 		return parse_string(&drv->external, k, v);
+	if ((drv = parse_driver(k, v, "textconv")))
+		return parse_string(&drv->textconv, k, v);
 
 	return 0;
 }
diff --git a/userdiff.h b/userdiff.h
index 1c1eb04..f29c18f 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -11,6 +11,7 @@ struct userdiff_driver {
 	const char *external;
 	int binary;
 	struct userdiff_funcname funcname;
+	const char *textconv;
 };
 
 int userdiff_config_basic(const char *k, const char *v);
-- 
1.6.0.2.639.g4d7f.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
                               ` (3 preceding siblings ...)
  2008-10-05 21:43             ` [PATCH 4/4] diff: add filter for converting binary to text Jeff King
@ 2008-10-05 22:03             ` Jakub Narebski
  2008-10-06  6:29             ` Johannes Sixt
  5 siblings, 0 replies; 85+ messages in thread
From: Jakub Narebski @ 2008-10-05 22:03 UTC (permalink / raw)
  To: git

Jeff King wrote:

> On Tue, Sep 30, 2008 at 12:45:45PM -0400, Jeff King wrote:
> 
>> I am about 90% done cleaning it up for preparation (there is a bit of
>> refactoring, and I want to make sure I get that just right). I'll post
>> it in the next day or so.
> 
> Sorry, I didn't get a chance to look at this until today. Patch series
> will follow. It is still missing documentation updates and tests, but I
> wanted to get you something to look at (and as I am proposing a new
> meaning for "diff driver", I would be curious to hear the general
> comments).
> 
> This is on top of 'next', because it would otherwise conflict with the
> funcname pattern changes there.

Documentation, pretty please?

-- 
Jakub Narebski
Warsaw, Poland
ShadeHawk on #git

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
                               ` (4 preceding siblings ...)
  2008-10-05 22:03             ` [PATCH 0/4] diff text conversion filter Jakub Narebski
@ 2008-10-06  6:29             ` Johannes Sixt
  2008-10-06  6:52               ` Jeff King
  2008-10-06 15:15               ` Matthieu Moy
  5 siblings, 2 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-06  6:29 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> On Tue, Sep 30, 2008 at 12:45:45PM -0400, Jeff King wrote:
> 
>> I am about 90% done cleaning it up for preparation (there is a bit of
>> refactoring, and I want to make sure I get that just right). I'll post
>> it in the next day or so.
> 
> Sorry, I didn't get a chance to look at this until today. Patch series
> will follow. It is still missing documentation updates and tests, but I
> wanted to get you something to look at (and as I am proposing a new
> meaning for "diff driver", I would be curious to hear the general
> comments).
> 
> This is on top of 'next', because it would otherwise conflict with the
> funcname pattern changes there.

Does the series in any way change whether plumbing and porcelain invoke
the external diff drivers? I have this particular use-case, which I'd like
that still works:

- In .git/info/attributes I have specified a diff driver:

    *.doc   diff=docdiff

  The driver runs a script that literally loads the two version of the
  file into MS Word and uses Word's diffing capability.

- git-gui should not use the diff driver. That is, plumbing should bypass
  the diff driver and say "Binary files differ". [*]

- Running 'git diff foo.doc', i.e. porcelain, from a command line should
  use the diff driver.

[*] I would not mind seeing a simplified textual diff in git-gui.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-06  6:29             ` Johannes Sixt
@ 2008-10-06  6:52               ` Jeff King
  2008-10-06  8:55                 ` Johannes Sixt
  2008-10-06 15:15               ` Matthieu Moy
  1 sibling, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-06  6:52 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Matthieu Moy, git

On Mon, Oct 06, 2008 at 08:29:10AM +0200, Johannes Sixt wrote:

> Does the series in any way change whether plumbing and porcelain invoke
> the external diff drivers? I have this particular use-case, which I'd like
> that still works:

No, it tries to keep the behavior the same as it is now (in 2/4, note
how the diff driver config reading is split into porcelain and plumbing
sections). Let me know if you have a test case that doesn't work, or
that you would like me to try.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-06  6:52               ` Jeff King
@ 2008-10-06  8:55                 ` Johannes Sixt
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-06  8:55 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> On Mon, Oct 06, 2008 at 08:29:10AM +0200, Johannes Sixt wrote:
> 
>> Does the series in any way change whether plumbing and porcelain invoke
>> the external diff drivers? I have this particular use-case, which I'd like
>> that still works:
> 
> No, it tries to keep the behavior the same as it is now (in 2/4, note
> how the diff driver config reading is split into porcelain and plumbing
> sections). Let me know if you have a test case that doesn't work, or
> that you would like me to try.

A quick check shows that my use-case still works as expected. Thanks.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-06  6:29             ` Johannes Sixt
  2008-10-06  6:52               ` Jeff King
@ 2008-10-06 15:15               ` Matthieu Moy
  2008-10-07  1:20                 ` Jeff King
  1 sibling, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-10-06 15:15 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Jeff King, git

Johannes Sixt <j.sixt@viscovery.net> writes:

> Does the series in any way change whether plumbing and porcelain invoke
> the external diff drivers? I have this particular use-case, which I'd like
> that still works:
>
> - In .git/info/attributes I have specified a diff driver:
>
>     *.doc   diff=docdiff
>
>   The driver runs a script that literally loads the two version of the
>   file into MS Word and uses Word's diffing capability.
>
> - git-gui should not use the diff driver. That is, plumbing should bypass
>   the diff driver and say "Binary files differ". [*]

Actually, I understand you don't want git gui and gitk to load MS-Word
anytime you click something, but I'd love to see the textconv+diff in
gitk.

(yeah, that's pretty hard to specify right, the ideal requirement
seems to be "in a gui, use the good part of the diff driver, but not
the other" :-\).

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-06 15:15               ` Matthieu Moy
@ 2008-10-07  1:20                 ` Jeff King
  2008-10-07  5:52                   ` Johannes Sixt
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-07  1:20 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: Johannes Sixt, git

On Mon, Oct 06, 2008 at 05:15:22PM +0200, Matthieu Moy wrote:

> Actually, I understand you don't want git gui and gitk to load MS-Word
> anytime you click something, but I'd love to see the textconv+diff in
> gitk.
> 
> (yeah, that's pretty hard to specify right, the ideal requirement
> seems to be "in a gui, use the good part of the diff driver, but not
> the other" :-\).

I think it is even more complex than that. Sometimes when doing "git
show" I want to see the textconv'd version, and sometimes I don't. So I
really want a command-line flag or environment variable that I can use
to control it (with a sane default).

So probably the way forward is to stop relying on "oh, we just didn't
parse some of the config, so it won't get used" to just parsing the
config all the time and having a diffopt for "do textconv, do external
diff, etc". And then plumbing can make sure that those flags never get
set, and porcelain can tie them to command-line options.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07  1:20                 ` Jeff King
@ 2008-10-07  5:52                   ` Johannes Sixt
  2008-10-07  6:00                     ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Johannes Sixt @ 2008-10-07  5:52 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> On Mon, Oct 06, 2008 at 05:15:22PM +0200, Matthieu Moy wrote:
> 
>> Actually, I understand you don't want git gui and gitk to load MS-Word
>> anytime you click something, but I'd love to see the textconv+diff in
>> gitk.
>>
>> (yeah, that's pretty hard to specify right, the ideal requirement
>> seems to be "in a gui, use the good part of the diff driver, but not
>> the other" :-\).
> 
> I think it is even more complex than that. Sometimes when doing "git
> show" I want to see the textconv'd version, and sometimes I don't. So I
> really want a command-line flag or environment variable that I can use
> to control it (with a sane default).

How about this: If I run 'git show -- foo.doc' (foo.doc resolves to a
single path, obviously), I want MS Word, but for other uses of 'git show'
I don't. I think that heuristics could be very effective: With a plain
'git show' I get the overview of the change, and with 'git show --
foo.doc' I drill down into a single document.

Or this: 'git show -p' uses the textconv'd version, 'git show' does not
("Binary files differ").

BTW, also with 'git diff' I sometimes don't want MS Word to pop up...

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07  5:52                   ` Johannes Sixt
@ 2008-10-07  6:00                     ` Jeff King
  2008-10-07  6:15                       ` Matthieu Moy
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-07  6:00 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Matthieu Moy, git

On Tue, Oct 07, 2008 at 07:52:02AM +0200, Johannes Sixt wrote:

> How about this: If I run 'git show -- foo.doc' (foo.doc resolves to a
> single path, obviously), I want MS Word, but for other uses of 'git show'
> I don't. I think that heuristics could be very effective: With a plain
> 'git show' I get the overview of the change, and with 'git show --
> foo.doc' I drill down into a single document.

Hrm. I am not opposed to heuristics, but in this case, I don't like the
one you have proposed. ;P

My specific case that prompted this work is a repository full of
pictures and videos, where I rarely (if ever) change the media content,
but frequently change exif tags. So my "usual" case is to want to see
"git log -p" with the textconv'd version. The commit diffs are otherwise
totally meaningless.

But I think for many others, they are primarily working with text data,
but have some (for example) binary word processor documents. Triggering
such conversions for a single file might make more sense there.

So I'm not sure there is a heuristic that serves both desires. Which is
why I would lean towards a command-line argument, backed by a config
option for those repositories which really want it all the time (and let
me reiterate that such a config option would still have _no_ impact on
plumbing; applying text conversion there is just plain wrong).

> Or this: 'git show -p' uses the textconv'd version, 'git show' does not
> ("Binary files differ").

At that point, is there really an advantage over "git show --textconv"?

> BTW, also with 'git diff' I sometimes don't want MS Word to pop up...

Yes. It is annoying that git can't simply read our minds. :)

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07  6:00                     ` Jeff King
@ 2008-10-07  6:15                       ` Matthieu Moy
  2008-10-07 15:46                         ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Matthieu Moy @ 2008-10-07  6:15 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, git

Jeff King <peff@peff.net> writes:

> On Tue, Oct 07, 2008 at 07:52:02AM +0200, Johannes Sixt wrote:
>
>> How about this: If I run 'git show -- foo.doc' (foo.doc resolves to a
>> single path, obviously), I want MS Word, but for other uses of 'git show'
>> I don't. I think that heuristics could be very effective: With a plain
>> 'git show' I get the overview of the change, and with 'git show --
>> foo.doc' I drill down into a single document.
>
> Hrm. I am not opposed to heuristics, but in this case, I don't like the
> one you have proposed. ;P
>
> My specific case that prompted this work is a repository full of
> pictures and videos, where I rarely (if ever) change the media content,
> but frequently change exif tags. So my "usual" case is to want to see
> "git log -p" with the textconv'd version. The commit diffs are otherwise
> totally meaningless.

So, you disagree about "git log" not showing the textconv, but you
still agree with half of the proposal :-P. When the user explicitely
requests a single file, he does want textconv (requesting a diff for a
single file and be happy with "binary files differ" would be
strange ...).

It seems quite clear to me that we won't get a heuristic right for
everybody (some diff driver are fast, some are slow, some require an
external GUI, some don't, ...). Better let the thing be nicely
configurable IMHO.

One proposal: have a diff.<driver>.activate with several values:

* "always": activate the diff driver in any porcelain
* "diff": activate it only for "git diff", as currently
* "singlefile": Johannes's heuristic proposal

That way, one could say easily "activate exiftags filter all the time,
but MS-Word only when I request a diff for a single file", and this
leaves room for other values if the need be. Well, there's no room for
"use MS-Word native diff tool in git-gui but antiword/catdoc +
textconv in 'git log -p'" here, but do we want it?

Or is all that just overkill?

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-05 21:43             ` [PATCH 3/4] diff: introduce diff.<driver>.binary Jeff King
@ 2008-10-07 15:17               ` Johannes Sixt
  2008-10-07 15:35                 ` Jeff King
  2008-10-24  2:46               ` Jeff King
  1 sibling, 1 reply; 85+ messages in thread
From: Johannes Sixt @ 2008-10-07 15:17 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> The "diff" gitattribute is somewhat overloaded right now. It
> can say one of three things:
> 
>   1. this file is definitely binary, or definitely not
>      (i.e., diff or !diff)
>   2. this file should use an external diff engine (i.e.,
>      diff=foo, diff.foo.command = custom-script)
>   3. this file should use particular funcname patterns
>      (i.e., diff=foo, diff.foo.(x?)funcname = some-regex)
> 
> Most of the time, there is no conflict between these uses,
> since using one implies that the other is irrelevant (e.g.,
> an external diff engine will decide for itself whether the
> file is binary).
> 
> However, there is at least one conflicting situation: there
> is no way to say "use the regular rules to determine whether
> this file is binary, but if we do diff it textually, use
> this funcname pattern." That is, currently setting diff=foo
> indicates that the file is definitely text.

I don't get your point. Can you provide an example?

The reason why I'd like to understand the issue is because I like the
diff.foo.textconv that you introduce in patch 4/4, but I dislike that I
have to set diff.foo.binary to false in order to use the textconv. So, why
is this .binary needed?

> This patch introduces a "binary" config option for a diff
> driver, so that one can explicitly set diff.foo.binary. We
> default this value to "don't know". That is, setting a diff
> attribute to "foo" and using "diff.foo.funcname" will have
> no effect on the binaryness of a file. To get the current
> behavior, one can set diff.foo.binary to true.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-07 15:17               ` Johannes Sixt
@ 2008-10-07 15:35                 ` Jeff King
  2008-10-07 15:54                   ` Johannes Sixt
  2008-10-12  5:24                   ` Junio C Hamano
  0 siblings, 2 replies; 85+ messages in thread
From: Jeff King @ 2008-10-07 15:35 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Matthieu Moy, git

On Tue, Oct 07, 2008 at 05:17:08PM +0200, Johannes Sixt wrote:

> > However, there is at least one conflicting situation: there
> > is no way to say "use the regular rules to determine whether
> > this file is binary, but if we do diff it textually, use
> > this funcname pattern." That is, currently setting diff=foo
> > indicates that the file is definitely text.
> 
> I don't get your point. Can you provide an example?

Let's say I have a subdirectory of files, some of which are binary. But
for those that _aren't_ binary, I want to use a particular funcname
pattern. So I do this:

  echo '* diff=foo' >subdir/.gitattributes
  git config diff.foo.funcname some-regex

Under the old behavior, I have just claimed all of the subdir as text.
But that's not necessarily what I wanted.

In practice, this doesn't happen much, because funcname tends to follow
the file extension, as does binary-ness. So you get "*.java diff=java",
and you really would be insane to have binary files named *.java. But I
think it makes the concept clearer to explain, and the code simpler, if
the various facets of a diff driver are orthogonal. In particular, I
think this makes things cleaner for adding new driver properties in the
future.

As to your complaint (which I think is valid)...

> The reason why I'd like to understand the issue is because I like the
> diff.foo.textconv that you introduce in patch 4/4, but I dislike that I
> have to set diff.foo.binary to false in order to use the textconv. So, why
> is this .binary needed?

I think this .binary is something we can and should get rid of; as it
stands now, you always end up having to set it along with .textconv. I
have considered two ways:

  - because textconv is for converting binary to text for diffing, the
    result should always be text. So whenever we do the conversion, we
    should note that it is no longer binary, and automatically treat the
    result as a text diff. So in this case, we are explicitly saying
    that binaryness is _not_ orthogonal to this property of the diff
    driver.

  - textconv should arguably just be "canonicalize" or similar. That is,
    there is no reason you couldn't take something like text XML and
    canonicalize it for a more readable diff. Or even canonicalize a
    binary file to get a smaller or more sensible binary diff, if you
    wanted to.

    In that case, I think the right thing is to set it back to "unknown,
    check the file contents" if .binary is not set. So you really are
    saying "this is just a conversion, treat the canonicalized contents
    exactly as you would have treated the actual contents, including
    binary detection magic".

And both of those obviously involve changes to patch 4/4.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07  6:15                       ` Matthieu Moy
@ 2008-10-07 15:46                         ` Jeff King
  2008-10-07 16:15                           ` Johannes Sixt
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-07 15:46 UTC (permalink / raw)
  To: Matthieu Moy; +Cc: Johannes Sixt, git

On Tue, Oct 07, 2008 at 08:15:45AM +0200, Matthieu Moy wrote:

> One proposal: have a diff.<driver>.activate with several values:
> 
> * "always": activate the diff driver in any porcelain
> * "diff": activate it only for "git diff", as currently
> * "singlefile": Johannes's heuristic proposal
> 
> That way, one could say easily "activate exiftags filter all the time,
> but MS-Word only when I request a diff for a single file", and this
> leaves room for other values if the need be. Well, there's no room for
> "use MS-Word native diff tool in git-gui but antiword/catdoc +
> textconv in 'git log -p'" here, but do we want it?
> 
> Or is all that just overkill?

Actually, as soon as I read the first part of your mail I thought of the
"ms-word vs antiword" situation. The example you gave seems plausible.
And it seems to me that it is really a superset of the problem we are
discussing. That is, gitattributes is really just saying "this is a
'foo' type file". And if we have a mechanism flexible enough to say "in
this situation, this is how you handle 'foo' type files", then that
would work as the basis for implementing these heuristics.

For example, maybe you could set up some mapping like:

  git diff --diffdriver=foo,bar

where a file with gitattribute diff=x would look for the driver config
for x.foo, then x.bar, and then finally fall back to just x. Then you
could easily have (ignoring the fact that I'm not sure about the config
syntax for having _3_ section parts):

  git config diff.doc.graphical.command ms-word-diff-script
  git config diff.doc.textconv antiword

and then git-gui would be configured to diff with:

  git diff --diffdriver=graphical

whereas a regular "git diff" would always fall back to
"diff.doc.textconv". Make sense?

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-07 15:35                 ` Jeff King
@ 2008-10-07 15:54                   ` Johannes Sixt
  2008-10-12  5:24                   ` Junio C Hamano
  1 sibling, 0 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-07 15:54 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> On Tue, Oct 07, 2008 at 05:17:08PM +0200, Johannes Sixt wrote:
> 
>>> However, there is at least one conflicting situation: there
>>> is no way to say "use the regular rules to determine whether
>>> this file is binary, but if we do diff it textually, use
>>> this funcname pattern." That is, currently setting diff=foo
>>> indicates that the file is definitely text.
>> I don't get your point. Can you provide an example?
> 
> Let's say I have a subdirectory of files, some of which are binary. But
> for those that _aren't_ binary, I want to use a particular funcname
> pattern. So I do this:
> 
>   echo '* diff=foo' >subdir/.gitattributes
>   git config diff.foo.funcname some-regex
> 
> Under the old behavior, I have just claimed all of the subdir as text.
> But that's not necessarily what I wanted.

No, you claimed that all of the files are of type "foo". If this is not
what you wanted, be more specific.

> In practice, this doesn't happen much, because funcname tends to follow
> the file extension, as does binary-ness. So you get "*.java diff=java",
> and you really would be insane to have binary files named *.java. But I
> think it makes the concept clearer to explain, and the code simpler, if
> the various facets of a diff driver are orthogonal. In particular, I
> think this makes things cleaner for adding new driver properties in the
> future.

I tend to disagree. "Binaryness" is subordinate to the "type" of the file,
which is what the diff attribute basically specifies.

> As to your complaint (which I think is valid)...
> 
>> The reason why I'd like to understand the issue is because I like the
>> diff.foo.textconv that you introduce in patch 4/4, but I dislike that I
>> have to set diff.foo.binary to false in order to use the textconv. So, why
>> is this .binary needed?
> 
> I think this .binary is something we can and should get rid of; as it
> stands now, you always end up having to set it along with .textconv. I
> have considered two ways:
> 
>   - because textconv is for converting binary to text for diffing, the
>     result should always be text. So whenever we do the conversion, we
>     should note that it is no longer binary, and automatically treat the
>     result as a text diff. So in this case, we are explicitly saying
>     that binaryness is _not_ orthogonal to this property of the diff
>     driver.
> 
>   - textconv should arguably just be "canonicalize" or similar. That is,
>     there is no reason you couldn't take something like text XML and
>     canonicalize it for a more readable diff. Or even canonicalize a
>     binary file to get a smaller or more sensible binary diff, if you
>     wanted to.
> 
>     In that case, I think the right thing is to set it back to "unknown,
>     check the file contents" if .binary is not set. So you really are
>     saying "this is just a conversion, treat the canonicalized contents
>     exactly as you would have treated the actual contents, including
>     binary detection magic".

I see your point. But this second item goes too far, in particular,
canonicalizing binary files to some other binary form is certainly not
something that should happen under 'git diff' porcelain. (We already have
clean/smudge filters for this purpose.)

For the purpose of generating diffs at the porcelain level (*not*
generating patches to be applied, aka format-patch), we can safely stay
with the interpretation in the first item above.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07 15:46                         ` Jeff King
@ 2008-10-07 16:15                           ` Johannes Sixt
  2008-10-13  1:29                             ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Johannes Sixt @ 2008-10-07 16:15 UTC (permalink / raw)
  To: Jeff King; +Cc: Matthieu Moy, git

Jeff King schrieb:
> On Tue, Oct 07, 2008 at 08:15:45AM +0200, Matthieu Moy wrote:
> 
>> One proposal: have a diff.<driver>.activate with several values:
>>
>> * "always": activate the diff driver in any porcelain
>> * "diff": activate it only for "git diff", as currently
>> * "singlefile": Johannes's heuristic proposal
>>
>> That way, one could say easily "activate exiftags filter all the time,
>> but MS-Word only when I request a diff for a single file", and this
>> leaves room for other values if the need be. Well, there's no room for
>> "use MS-Word native diff tool in git-gui but antiword/catdoc +
>> textconv in 'git log -p'" here, but do we want it?
>>
>> Or is all that just overkill?
> 
> Actually, as soon as I read the first part of your mail I thought of the
> "ms-word vs antiword" situation. The example you gave seems plausible.
> And it seems to me that it is really a superset of the problem we are
> discussing. That is, gitattributes is really just saying "this is a
> 'foo' type file". And if we have a mechanism flexible enough to say "in
> this situation, this is how you handle 'foo' type files", then that
> would work as the basis for implementing these heuristics.
> 
> For example, maybe you could set up some mapping like:
> 
>   git diff --diffdriver=foo,bar
> 
> where a file with gitattribute diff=x would look for the driver config
> for x.foo, then x.bar, and then finally fall back to just x. Then you
> could easily have (ignoring the fact that I'm not sure about the config
> syntax for having _3_ section parts):
> 
>   git config diff.doc.graphical.command ms-word-diff-script
>   git config diff.doc.textconv antiword
> 
> and then git-gui would be configured to diff with:
> 
>   git diff --diffdriver=graphical
> 
> whereas a regular "git diff" would always fall back to
> "diff.doc.textconv". Make sense?

I think you are complicating things. We already have

   diff.doc.command ms-word-diff-script

and with your 4-patch-series-under-discussion we would have

   diff.doc.texconv antiword

and that should be sufficient. I'm proposing this heuristics:

 * If only textconv is given, all porcelains pick it.
 * If only command is given, all porcelains pick it.
 * If both are given, then
   - git log picks textconv.
   - git show and git diff:
     . if exactly one pathspec was given, pick command;
     . otherwise pick textconv

Plumbing never picks any of them, just like today, nor should git
format-patch. The are other porcelains that could be sorted into this
list, like git blame and (the summary line of) git commit.

BTW, please don't take git-gui as an example that would lauch MS-Word on
each diff. (Neither would gitk.) Both rely on plumbing, and that's good.
gitk has a menu entry "External diff", where the diff.doc.command could be
hooked into.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-07 15:35                 ` Jeff King
  2008-10-07 15:54                   ` Johannes Sixt
@ 2008-10-12  5:24                   ` Junio C Hamano
  2008-10-13  1:23                     ` Jeff King
  1 sibling, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-12  5:24 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

> Let's say I have a subdirectory of files, some of which are binary. But
> for those that _aren't_ binary, I want to use a particular funcname
> pattern. So I do this:
>
>   echo '* diff=foo' >subdir/.gitattributes
>   git config diff.foo.funcname some-regex
> ...
> In practice, this doesn't happen much, because funcname tends to follow
> the file extension, as does binary-ness.

I find this a highly contrived example.  Is this ever be useful in
practice?

>> The reason why I'd like to understand the issue is because I like the
>> diff.foo.textconv that you introduce in patch 4/4, but I dislike that I
>> have to set diff.foo.binary to false in order to use the textconv. So, why
>> is this .binary needed?
>
> I think this .binary is something we can and should get rid of; as it
> stands now, you always end up having to set it along with .textconv. I
> have considered two ways:

The logic behind the original behaviour was that the file ought to be
"diff-able" if you are setting up funcname pattern because the funcname
pattern only makes sense if you are doing the textual diff.  In other
words, "should we do textual diff?" and "what funcname pattern should we
use?" are _not_ orthogonal, as wanting to configure the latter does imply
that you do want to see the change in the textual diff format.

For the same rationale, if you have .textconv, I think it is natural for
us to say that you do want to see the change in the textual diff format.
So I'd agree that you can get rid of this .binary business by saying that
having .textconv marks it diffable (IOW, I think your first alternative
makes more sense).

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-12  5:24                   ` Junio C Hamano
@ 2008-10-13  1:23                     ` Jeff King
  2008-10-13  4:00                       ` Junio C Hamano
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-13  1:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sat, Oct 11, 2008 at 10:24:44PM -0700, Junio C Hamano wrote:

> >   echo '* diff=foo' >subdir/.gitattributes
> >   git config diff.foo.funcname some-regex
> > ...
> > In practice, this doesn't happen much, because funcname tends to follow
> > the file extension, as does binary-ness.
> 
> I find this a highly contrived example.  Is this ever be useful in
> practice?

Well, I was doing something like it. But after reading JSixt's messages,
I think I agree that I was probably abusing the attributes system.

> The logic behind the original behaviour was that the file ought to be
> "diff-able" if you are setting up funcname pattern because the funcname
> pattern only makes sense if you are doing the textual diff.  In other
> words, "should we do textual diff?" and "what funcname pattern should we
> use?" are _not_ orthogonal, as wanting to configure the latter does imply
> that you do want to see the change in the textual diff format.

Yeah, I don't think I can really disagree with that. I had some vague
notion that it opens the path for adding orthogonal options later. But
really, I'm not sure that any exist, since they are, by definition
related to the diff. Unless we want to have diff driver options for how
to do a binary diff.

> For the same rationale, if you have .textconv, I think it is natural for
> us to say that you do want to see the change in the textual diff format.
> So I'd agree that you can get rid of this .binary business by saying that
> having .textconv marks it diffable (IOW, I think your first alternative
> makes more sense).

OK. My re-rolled series will keep the assumption that a diff=* attribute
makes a file non-binary. However, I will still include the 'binary'
struct member in the diff driver, as it greatly simplifies the code. It
is trivial then to support "diff.*.binary" (which would default to
'false') if we feel like it.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 0/4] diff text conversion filter
  2008-10-07 16:15                           ` Johannes Sixt
@ 2008-10-13  1:29                             ` Jeff King
  0 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-13  1:29 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: Matthieu Moy, git

On Tue, Oct 07, 2008 at 06:15:24PM +0200, Johannes Sixt wrote:

> and that should be sufficient. I'm proposing this heuristics:
> 
>  * If only textconv is given, all porcelains pick it.
>  * If only command is given, all porcelains pick it.
>  * If both are given, then
>    - git log picks textconv.
>    - git show and git diff:
>      . if exactly one pathspec was given, pick command;
>      . otherwise pick textconv

I am not 100% convinced that is sufficient, given Matthieu's other post.
But I will not personally be using external diff at all, so I am hard
pressed to come up with a counter example.

> Plumbing never picks any of them, just like today, nor should git
> format-patch. The are other porcelains that could be sorted into this
> list, like git blame and (the summary line of) git commit.

Actually, I think blaming a text-conv'd version would be useful (at
least for my case). But cleary it should be optional, and probably
defaulting to off.

Once again, I think this is a good reason to move to an explicit diff
option for "respect textconv" rather than relying on maybe or maybe not
loading the config.

> BTW, please don't take git-gui as an example that would lauch MS-Word on
> each diff. (Neither would gitk.) Both rely on plumbing, and that's good.
> gitk has a menu entry "External diff", where the diff.doc.command could be
> hooked into.

But they are both porcelain. So in theory, they could behave differently
depending on what config is available (or if not them, some other
third-party porcelain). But I was just responding to Matthieu's
use-case, so I am not sure what exactly people prefer; I am lucky enough
not to version any MS-Word documents at all. :)

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-13  1:23                     ` Jeff King
@ 2008-10-13  4:00                       ` Junio C Hamano
  2008-10-13  4:15                         ` Jeff King
  2008-10-13  8:12                         ` Matthieu Moy
  0 siblings, 2 replies; 85+ messages in thread
From: Junio C Hamano @ 2008-10-13  4:00 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

>> The logic behind the original behaviour was that the file ought to be
>> "diff-able" if you are setting up funcname pattern because the funcname
>> pattern only makes sense if you are doing the textual diff.  In other
>> words, "should we do textual diff?" and "what funcname pattern should we
>> use?" are _not_ orthogonal, as wanting to configure the latter does imply
>> that you do want to see the change in the textual diff format.
>
> Yeah, I don't think I can really disagree with that. I had some vague
> notion that it opens the path for adding orthogonal options later. But
> really, I'm not sure that any exist, since they are, by definition
> related to the diff. Unless we want to have diff driver options for how
> to do a binary diff.

I actually have an independent worry about this whole thing.

The textconv filter is primarily for humans to be able to view the diff,
as far as I understand it, but what would this facility do to the patch
exchange workflow?  There needs either one or both of the following:

 - A command line option to Porcelains to override textconv so that an
   applicable binary diff can be obtained upon request (or format-patch
   always disables textconv); and/or

 - You teach git-apply to use a reverse transformation of textconv, so
   that it does, upon reception of a textconv diff:

   (1) pass existing preimage through textconv;
   (2) apply the patch;
   (3) convert the result back to binary.

Even if we do not implement the latter right now in the first round, if we
want to leave the door open for later for it, we would somehow need to
mark such a patch between textconv output so that what kind of reverse
transformation needs to be applied.

I'd say that format-patch should always disable textconv so that we won't
have to worry about it for now.  If we enable textconv for format-patch,
we do need to include what textconv filter (or "diff attribute") was used
for the path in the output, so that the receiving end can pick a
corresponding reverse transformation.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-13  4:00                       ` Junio C Hamano
@ 2008-10-13  4:15                         ` Jeff King
  2008-10-13  6:10                           ` Johannes Sixt
  2008-10-13 13:54                           ` Junio C Hamano
  2008-10-13  8:12                         ` Matthieu Moy
  1 sibling, 2 replies; 85+ messages in thread
From: Jeff King @ 2008-10-13  4:15 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sun, Oct 12, 2008 at 09:00:50PM -0700, Junio C Hamano wrote:

> exchange workflow?  There needs either one or both of the following:
> 
>  - A command line option to Porcelains to override textconv so that an
>    applicable binary diff can be obtained upon request (or format-patch
>    always disables textconv); and/or

format-patch should always disable textconv. I admit that I didn't test
it, but assumed it was covered under the same code path as external
diff (i.e., just not reading in the config). But it doesn't seem to be.

So that is definitely broken in my patch series. But even once that is
fixed, I agree there should be a porcelain switch for turning this off.
I sometimes do "git diff >patch" and read the result into my MUA.
Obviously I would not want textconv'ing there.

>  - You teach git-apply to use a reverse transformation of textconv, so
>    that it does, upon reception of a textconv diff:
> 
>    (1) pass existing preimage through textconv;
>    (2) apply the patch;
>    (3) convert the result back to binary.

The problem with this approach is that it requires that the textconv be
a reversible mapping. And the two motivating examples (dumping exif tags
and converting word processor documents to text) are not; they are lossy
conversions.

It's possible that one could, given the binary preimage and the two
lossy textconv'd versions, produce a custom binary merge that would just
munge the tags, or just munge the text, or whatever. But that is an
order of magnitude more work than writing a textconv, which is usually
as simple as "/path/to/existing/conversion-script".

And the whole point of this exercise was to make it simple to set this
conversion up.

> I'd say that format-patch should always disable textconv so that we won't
> have to worry about it for now.

Agreed, if you remove "for now". I had never intended for these to be
anything but a human-readable display (and while I am generally OK with
generalizing functionality when we can, I think there is real value in
the simplicity here).

If somebody really wants to send patches with converted text for
reference, I would suggest producing a patch with the textconv'd output
as a comment, and including the full binary patch to actually be
applied (and yes, obviously they a malicious attacker could make them
not match, but given the binary patch, we can trivially regenerate the
textconv'd version and confirm it).

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-13  4:15                         ` Jeff King
@ 2008-10-13  6:10                           ` Johannes Sixt
  2008-10-13 13:54                           ` Junio C Hamano
  1 sibling, 0 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-13  6:10 UTC (permalink / raw)
  To: Jeff King; +Cc: Junio C Hamano, Matthieu Moy, git

Jeff King schrieb:
> On Sun, Oct 12, 2008 at 09:00:50PM -0700, Junio C Hamano wrote:
>>  - You teach git-apply to use a reverse transformation of textconv, so
>>    that it does, upon reception of a textconv diff:
>>
>>    (1) pass existing preimage through textconv;
>>    (2) apply the patch;
>>    (3) convert the result back to binary.
> 
> The problem with this approach is that it requires that the textconv be
> a reversible mapping. And the two motivating examples (dumping exif tags
> and converting word processor documents to text) are not; they are lossy
> conversions.
> 
> It's possible that one could, given the binary preimage and the two
> lossy textconv'd versions, produce a custom binary merge that would just
> munge the tags, or just munge the text, or whatever. But that is an
> order of magnitude more work than writing a textconv, which is usually
> as simple as "/path/to/existing/conversion-script".

I fully agree with you. .texconv should only be used for human
consumption. We already have a reversible binary<->text conversion: the
binary diffs.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-13  4:00                       ` Junio C Hamano
  2008-10-13  4:15                         ` Jeff King
@ 2008-10-13  8:12                         ` Matthieu Moy
  1 sibling, 0 replies; 85+ messages in thread
From: Matthieu Moy @ 2008-10-13  8:12 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Jeff King, Johannes Sixt, git

Junio C Hamano <gitster@pobox.com> writes:

> The textconv filter is primarily for humans to be able to view the diff,
> as far as I understand it, but what would this facility do to the patch
> exchange workflow?  There needs either one or both of the following:
>
>  - A command line option to Porcelains to override textconv so that an
>    applicable binary diff can be obtained upon request (or format-patch
>    always disables textconv); and/or

To me (and others in this thread AIUI), the textconv should only
replace the (frustrating) "binary files differ" in porcelain.

>  - You teach git-apply to use a reverse transformation of textconv, so
>    that it does, upon reception of a textconv diff:
>
>    (1) pass existing preimage through textconv;
>    (2) apply the patch;
>    (3) convert the result back to binary.

I can imagine corner-case scenarios where this would be applicable and
somehow usefull (like: modifying _only_ the EXIF tags on one end, and
expect to be able to view and apply the tags modification on the other
end), but I don't think that's something important to worry about. As
other said, it would be really hard to set up for users, with little
benefits.

One possibility that would be simpler to set up would be to include
both the binary diff and the textconved diff in format-patch, and have
"git apply" just verify that the textconv actually matches the binary
diff after applying it. But that reduces the security issue without
really solving it: one could very well send a binary patch that
changes both the image and the exiftags, and claim it only changes the
tags.

In short: agreed with Peff ;-).

-- 
Matthieu

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-13  4:15                         ` Jeff King
  2008-10-13  6:10                           ` Johannes Sixt
@ 2008-10-13 13:54                           ` Junio C Hamano
  1 sibling, 0 replies; 85+ messages in thread
From: Junio C Hamano @ 2008-10-13 13:54 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

> It's possible that one could, given the binary preimage and the two
> lossy textconv'd versions, produce a custom binary merge that would just
> munge the tags, or just munge the text, or whatever. But that is an
> order of magnitude more work than writing a textconv, which is usually
> as simple as "/path/to/existing/conversion-script".
>
> And the whole point of this exercise was to make it simple to set this
> conversion up.
>
>> I'd say that format-patch should always disable textconv so that we won't
>> have to worry about it for now.
>
> Agreed, if you remove "for now". I had never intended for these to be
> anything but a human-readable display (and while I am generally OK with
> generalizing functionality when we can, I think there is real value in
> the simplicity here).

Ok.  I don't disagree with any of the above.

We however need to make it very clear that reversibility (aka ability to
produce applicable diff) is never the goal of the textconv mechanism (and
suggest your "for applicable and reviewable diff, you produce the standard
binary diff and add the textconv diff as a comment" in the documentation),
as I am quite sure that 6 months down the road somebody who was not
involved in this thread would complain.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-05 21:43             ` [PATCH 3/4] diff: introduce diff.<driver>.binary Jeff King
  2008-10-07 15:17               ` Johannes Sixt
@ 2008-10-24  2:46               ` Jeff King
  2008-10-24  2:48                 ` [PATCH 1/5] diff: add missing static declaration Jeff King
                                   ` (5 more replies)
  1 sibling, 6 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:46 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

On Sun, Oct 05, 2008 at 05:43:36PM -0400, Jeff King wrote:

> However, there is at least one conflicting situation: there
> is no way to say "use the regular rules to determine whether
> this file is binary, but if we do diff it textually, use
> this funcname pattern." That is, currently setting diff=foo
> indicates that the file is definitely text.

Hrm. I don't know what crack I was smoking when I wrote this (and then
argued about it for weeks afterward). It is actually the _opposite_
situation.  That is, once you have said "diff=foo", there is no way to
say "btw, foo files are _definitely_ text."

See, this is the old code:

> -static void diff_filespec_check_attr(struct diff_filespec *one)
> +void diff_filespec_load_driver(struct diff_filespec *one)
>  {
> -	struct userdiff_driver *drv;
> -	int check_from_data = 0;
> -
> -	if (one->checked_attr)
> -		return;
> -
> -	drv = userdiff_find_by_path(one->path);
> -	one->is_binary = 0;
> -
> -	/* binaryness */
> -	if (drv == USERDIFF_ATTR_TRUE)
> -		;
> -	else if (drv == USERDIFF_ATTR_FALSE)
> -		one->is_binary = 1;
> -	else
> -		check_from_data = 1;
> -
> -	if (check_from_data) {
> -		if (!one->data && DIFF_FILE_VALID(one))
> -			diff_populate_filespec(one, 0);
> -
> -		if (one->data)
> -			one->is_binary = buffer_is_binary(one->data, one->size);
> -	}
> +	if (!one->driver)
> +		one->driver = userdiff_find_by_path(one->path);
> +	if (!one->driver)
> +		one->driver = userdiff_find_by_name("default");
>  }

You can clearly see that we use check_from_data as long as the value is
not true or false. Meaning if it is unspecified _or_ if it has a string
value (actually, this text is hacked up by my previous patch, but you
can look at maint:diff.c and see that it is similar).

And this makes sense. Otherwise, plumbing like "git diff-tree" would
get mightily confused by external diff commands. For example, consider
if you had "foo diff=bar" in your attributes file, and defined
"diff.bar.command". That external diff would be used for git-diff, but
_not_ for diff-tree. But it would be stupid for diff-tree to look at the
attribute and say "oh, we have a diff attr, it must be text."

So the patch is right to keep the binary value to "-1" except for the
few cases where it has been specified explicitly. I found this out when
I tried to "fix" it to the old behavior tonight and discovered lots of
breakage.

And this also means that diff.*.binary really _does_ do something
useful. If you have, for example, a ".foo" file that looks like binary,
but really isn't, _and_ you want to set a custom hunk header for it,
previously you were out of luck. You could do one or the other. Now you
can do:

  git config diff.foo.xfuncname whatever
  git config diff.foo.binary false

and get the desired effect.

As for the fallout from this with regards to our discussion last week,
there were two relevant points:

 - Junio suggested that anytime we use funcname, we always want text
   anyway. I think that is reasonable, but it has never been the case up
   until now (in fact, you were stuck with the _opposite_ until now, so
   my series at least makes it possible to say "always text", though
   you have to do it manually). So I will leave it for now, and
   if people feel strongly, my series provides a sane basis for a patch
   that does this automatically.

 - Johannes complained about having to set "diff.foo.binary = false"
   when we have set "diff.foo.textconv".  I agree that having to set it
   is cumbersome, but what's worse is that it is wrong. You are saying
   "this file is not binary" which is only _sometimes_ true. That is, it
   is only true if we are in a command which actually performs the text
   conversion. Plumbing sees _just_ the binary half, which is outright
   wrong (and which became painfully obvious once I wrote some tests).

   The solution is that textconv'd data should always be treated as
   text, and that takes some refactoring of my patches. I will post a
   series dealing with this in a minute.

Hopefully that explanation made sense. This turned out to be a lot
trickier than I thought (in combination with my apparent crack habit),
and I just spent several hours trying to figure out all of the niggling
details. But I realize the rest of you haven't thought about it for at
least a week. :)

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH 1/5] diff: add missing static declaration
  2008-10-24  2:46               ` Jeff King
@ 2008-10-24  2:48                 ` Jeff King
  2008-10-24  2:50                 ` [PATCH 2/5] add userdiff textconv tests Jeff King
                                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:48 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

This function isn't used outside of diff.c; the 'static' was
simply overlooked in the original writing.

Signed-off-by: Jeff King <peff@peff.net>
---
Just a cleanup.

 diff.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/diff.c b/diff.c
index e368fef..d1fd594 100644
--- a/diff.c
+++ b/diff.c
@@ -1282,7 +1282,7 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-void diff_filespec_load_driver(struct diff_filespec *one)
+static void diff_filespec_load_driver(struct diff_filespec *one)
 {
 	if (!one->driver)
 		one->driver = userdiff_find_by_path(one->path);
-- 
1.6.0.3.518.gdb328.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 2/5] add userdiff textconv tests
  2008-10-24  2:46               ` Jeff King
  2008-10-24  2:48                 ` [PATCH 1/5] diff: add missing static declaration Jeff King
@ 2008-10-24  2:50                 ` Jeff King
  2008-10-24  2:53                 ` [PATCH 3/5] refactor userdiff textconv code Jeff King
                                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:50 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

These tests provide a basic sanity check that textconv'd
files work. The tests try to describe how this configuration
_should_ work; thus some of the tests are marked to expect
failure.

In particular, we fail to actually textconv anything because
the 'diff.foo.binary' config option is not set, which will
be fixed in the next patch.

Signed-off-by: Jeff King <peff@peff.net>
---
Note that this just tests actual patch generation. Diffstat, etc, are
left for later, as is discussed in the next patch.

 t/t4030-diff-textconv.sh |   80 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 80 insertions(+), 0 deletions(-)
 create mode 100755 t/t4030-diff-textconv.sh

diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
new file mode 100755
index 0000000..d0d7691
--- /dev/null
+++ b/t/t4030-diff-textconv.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+test_description='diff.*.textconv tests'
+. ./test-lib.sh
+
+find_diff() {
+	sed '1,/^index /d' | sed '/^-- $/,$d'
+}
+
+cat >expect.binary <<'EOF'
+Binary files a/file and b/file differ
+EOF
+
+cat >expect.text <<'EOF'
+--- a/file
++++ b/file
+@@ -1 +1,2 @@
+ 0
++1
+EOF
+
+cat >hexdump <<'EOF'
+#!/bin/sh
+perl -e '$/ = undef; $_ = <>; s/./ord($&)/ge; print $_' "$1"
+EOF
+chmod +x hexdump
+
+test_expect_success 'setup binary file with history' '
+	printf "\\0\\n" >file &&
+	git add file &&
+	git commit -m one &&
+	printf "\\1\\n" >>file &&
+	git add file &&
+	git commit -m two
+'
+
+test_expect_success 'file is considered binary by porcelain' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'file is considered binary by plumbing' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'setup textconv filter' '
+	echo file diff=foo >.gitattributes &&
+	git config diff.foo.textconv "$PWD"/hexdump
+'
+
+test_expect_failure 'diff produces text' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.text actual
+'
+
+test_expect_success 'diff-tree produces binary' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_failure 'log produces text' '
+	git log -1 -p >log &&
+	find_diff <log >actual &&
+	test_cmp expect.text actual
+'
+
+# actually passes, but only because textconv is broken
+# and its failure mode happens to be the same as success
+test_expect_failure 'format-patch produces binary' '
+	git format-patch --no-binary --stdout HEAD^ >patch &&
+	find_diff <patch >actual &&
+	test_cmp expect.binary actual
+'
+
+test_done
-- 
1.6.0.3.518.gdb328.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 3/5] refactor userdiff textconv code
  2008-10-24  2:46               ` Jeff King
  2008-10-24  2:48                 ` [PATCH 1/5] diff: add missing static declaration Jeff King
  2008-10-24  2:50                 ` [PATCH 2/5] add userdiff textconv tests Jeff King
@ 2008-10-24  2:53                 ` Jeff King
  2008-10-24  7:15                   ` Johannes Sixt
  2008-10-24 21:12                   ` Junio C Hamano
  2008-10-24  2:55                 ` [PATCH 4/5] userdiff: require explicitly allowing textconv Jeff King
                                   ` (2 subsequent siblings)
  5 siblings, 2 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:53 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

The initial implementation inserted itself at the level of
fill_mmfile. That is, the diff_filespec always contained the
actual file data, but we set up the xdl mmfile with the fake
data. This had several disadvantages:

 - fill_mmfile gets called from several places, but we don't
   necessarily want all of them to see the textconv'd
   version (e.g., whitespace checking should probably
   consider it binary)

 - the filespec itself carried no information about
   binaryness. If we have textconv'd it, then it is no
   longer binary and we need to mark that somehow (which
   lets us avoid having to manually set the filetype to
   non-binary in the diff driver config)

This patch moves the text conversion to a
diff_filespec_textconv function; this function can be called
to insert the text-converted contents of the file into the
filespec. These contents will survive through any
diff_populate_filespec calls, meaning that it is safe to
pass the resulting filespec to other diff functions which
will look at the content.

We now turn on the text conversion only for actual diffs,
not diffstats or whitespace checking. We may eventually want
it for other formats, too, but we will be conservative for
now.

Signed-off-by: Jeff King <peff@peff.net>
---
I think this is much cleaner. I have a nagging worry that a
text-converted filespec might live longer than I expect. Maybe it would
make sense to free the filespec data after the diff has been generated
(and then further populate_filespec calls would just restore the
original data).

Also, I'm happy to hear comments on which functionality should get
text-converted. I think I like seeing the --stat report the binary
changes.

 diff.c                   |   51 +++++++++++++++++++++++++++++++++++----------
 diffcore.h               |    1 +
 t/t4030-diff-textconv.sh |    6 +---
 3 files changed, 42 insertions(+), 16 deletions(-)

diff --git a/diff.c b/diff.c
index d1fd594..89bd2ff 100644
--- a/diff.c
+++ b/diff.c
@@ -284,6 +284,36 @@ static void emit_rewrite_diff(const char *name_a,
 		copy_file_with_prefix(o->file, '+', two->data, two->size, new, reset);
 }
 
+static int diff_filespec_textconv(struct diff_filespec *one)
+{
+	size_t size;
+	char *buf;
+
+	if (one->data_is_textconv)
+		return 0;
+
+	if (!DIFF_FILE_VALID(one))
+		return 0;
+
+	diff_filespec_load_driver(one);
+	if (!one->driver->textconv)
+		return 0;
+
+	if (diff_populate_filespec(one, 0) < 0)
+		return -1;
+
+	buf = run_textconv(one->driver->textconv, one, &size);
+	if (!buf)
+		return -1;
+
+	diff_free_filespec_blob(one);
+	one->data = buf;
+	one->size = size;
+	one->should_free = 1;
+	one->data_is_textconv = 1;
+	return 0;
+}
+
 static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 {
 	if (!DIFF_FILE_VALID(one)) {
@@ -294,18 +324,8 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	else if (diff_populate_filespec(one, 0))
 		return -1;
 
-	diff_filespec_load_driver(one);
-	if (one->driver->textconv) {
-		size_t size;
-		mf->ptr = run_textconv(one->driver->textconv, one, &size);
-		if (!mf->ptr)
-			return -1;
-		mf->size = size;
-	}
-	else {
-		mf->ptr = one->data;
-		mf->size = one->size;
-	}
+	mf->ptr = one->data;
+	mf->size = one->size;
 	return 0;
 }
 
@@ -1292,6 +1312,8 @@ static void diff_filespec_load_driver(struct diff_filespec *one)
 
 int diff_filespec_is_binary(struct diff_filespec *one)
 {
+	if (one->data_is_textconv)
+		return 0;
 	if (one->is_binary == -1) {
 		diff_filespec_load_driver(one);
 		if (one->driver->binary != -1)
@@ -1387,6 +1409,10 @@ static void builtin_diff(const char *name_a,
 		}
 	}
 
+	if (diff_filespec_textconv(one) < 0 ||
+	    diff_filespec_textconv(two) < 0)
+			die("unable to read files to diff");
+
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
@@ -1778,6 +1804,7 @@ void diff_free_filespec_blob(struct diff_filespec *s)
 		s->should_free = s->should_munmap = 0;
 		s->data = NULL;
 	}
+	s->data_is_textconv = 0;
 }
 
 void diff_free_filespec_data(struct diff_filespec *s)
diff --git a/diffcore.h b/diffcore.h
index 713cca7..33aebc2 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -42,6 +42,7 @@ struct diff_filespec {
 #define DIFF_FILE_VALID(spec) (((spec)->mode) != 0)
 	unsigned should_free : 1; /* data should be free()'ed */
 	unsigned should_munmap : 1; /* data should be munmap()'ed */
+	unsigned data_is_textconv : 1;
 
 	struct userdiff_driver *driver;
 	/* data should be considered "binary"; -1 means "don't know yet" */
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index d0d7691..28d3640 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -51,7 +51,7 @@ test_expect_success 'setup textconv filter' '
 	git config diff.foo.textconv "$PWD"/hexdump
 '
 
-test_expect_failure 'diff produces text' '
+test_expect_success 'diff produces text' '
 	git diff HEAD^ HEAD >diff &&
 	find_diff <diff >actual &&
 	test_cmp expect.text actual
@@ -63,14 +63,12 @@ test_expect_success 'diff-tree produces binary' '
 	test_cmp expect.binary actual
 '
 
-test_expect_failure 'log produces text' '
+test_expect_success 'log produces text' '
 	git log -1 -p >log &&
 	find_diff <log >actual &&
 	test_cmp expect.text actual
 '
 
-# actually passes, but only because textconv is broken
-# and its failure mode happens to be the same as success
 test_expect_failure 'format-patch produces binary' '
 	git format-patch --no-binary --stdout HEAD^ >patch &&
 	find_diff <patch >actual &&
-- 
1.6.0.3.518.gdb328.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 4/5] userdiff: require explicitly allowing textconv
  2008-10-24  2:46               ` Jeff King
                                   ` (2 preceding siblings ...)
  2008-10-24  2:53                 ` [PATCH 3/5] refactor userdiff textconv code Jeff King
@ 2008-10-24  2:55                 ` Jeff King
  2008-10-24  7:04                   ` Johannes Sixt
  2008-10-24  2:56                 ` [PATCH 5/5] document the diff driver textconv feature Jeff King
  2008-10-24  7:02                 ` [PATCH 3/4] diff: introduce diff.<driver>.binary Johannes Sixt
  5 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:55 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

Diffs that have been produced with textconv almost certainly
cannot be applied, so we want to be careful not to generate
them in things like format-patch.

This introduces a new diff options, ALLOW_TEXTCONV, which
controls this behavior. It is off by default, but is
explicitly turned on for the "log" family of commands, as
well as the "diff" porcelain.

Because both text conversion and external diffing are
controlled by these diff options, we can get rid of the
"plumbing versus porcelain" distinction when reading the
config. This was an attempt to control the same thing, but
suffered from being too coarse-grained.

Signed-off-by: Jeff King <peff@peff.net>
---
This gives us a nice run-time knob to tweak. There was a lot of
discussion last time around about exactly when text conversion should
happen. The more I think about it, I'm wondering if perhaps this should
be disabled by default, and require "git log --textconv" to activate.

Johannes, you had mentioned some heuristics. Maybe you would like to
take a stab at implementing them on top of this?

 builtin-diff.c           |    1 +
 builtin-log.c            |    1 +
 diff.c                   |   24 ++++++++++--------------
 diff.h                   |    1 +
 t/t4030-diff-textconv.sh |    2 +-
 userdiff.c               |   10 +---------
 userdiff.h               |    3 +--
 7 files changed, 16 insertions(+), 26 deletions(-)

diff --git a/builtin-diff.c b/builtin-diff.c
index 9c8c295..2de5834 100644
--- a/builtin-diff.c
+++ b/builtin-diff.c
@@ -300,6 +300,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
 	}
 	DIFF_OPT_SET(&rev.diffopt, ALLOW_EXTERNAL);
 	DIFF_OPT_SET(&rev.diffopt, RECURSIVE);
+	DIFF_OPT_SET(&rev.diffopt, ALLOW_TEXTCONV);
 
 	/*
 	 * If the user asked for our exit code then don't start a
diff --git a/builtin-log.c b/builtin-log.c
index a0944f7..75d698f 100644
--- a/builtin-log.c
+++ b/builtin-log.c
@@ -59,6 +59,7 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
 		} else
 			die("unrecognized argument: %s", arg);
 	}
+	DIFF_OPT_SET(&rev->diffopt, ALLOW_TEXTCONV);
 }
 
 /*
diff --git a/diff.c b/diff.c
index 89bd2ff..52feba7 100644
--- a/diff.c
+++ b/diff.c
@@ -93,12 +93,6 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	if (!strcmp(var, "diff.external"))
 		return git_config_string(&external_diff_cmd_cfg, var, value);
 
-	switch (userdiff_config_porcelain(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_diff_basic_config(var, value, cb);
 }
 
@@ -109,6 +103,12 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	switch (userdiff_config(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
+	}
+
 	if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
 		int slot = parse_diff_color_slot(var, 11);
 		if (!value)
@@ -123,12 +123,6 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
-	switch (userdiff_config_basic(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_color_default_config(var, value, cb);
 }
 
@@ -1409,9 +1403,11 @@ static void builtin_diff(const char *name_a,
 		}
 	}
 
-	if (diff_filespec_textconv(one) < 0 ||
-	    diff_filespec_textconv(two) < 0)
+	if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) {
+		if (diff_filespec_textconv(one) < 0 ||
+		    diff_filespec_textconv(two) < 0)
 			die("unable to read files to diff");
+	}
 
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
diff --git a/diff.h b/diff.h
index a49d865..42582ed 100644
--- a/diff.h
+++ b/diff.h
@@ -65,6 +65,7 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
 #define DIFF_OPT_IGNORE_SUBMODULES   (1 << 18)
 #define DIFF_OPT_DIRSTAT_CUMULATIVE  (1 << 19)
 #define DIFF_OPT_DIRSTAT_BY_FILE     (1 << 20)
+#define DIFF_OPT_ALLOW_TEXTCONV      (1 << 21)
 #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)
 #define DIFF_OPT_CLR(opts, flag)    ((opts)->flags &= ~DIFF_OPT_##flag)
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 28d3640..e456746 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -69,7 +69,7 @@ test_expect_success 'log produces text' '
 	test_cmp expect.text actual
 '
 
-test_expect_failure 'format-patch produces binary' '
+test_expect_success 'format-patch produces binary' '
 	git format-patch --no-binary --stdout HEAD^ >patch &&
 	find_diff <patch >actual &&
 	test_cmp expect.binary actual
diff --git a/userdiff.c b/userdiff.c
index d95257a..3681062 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -120,7 +120,7 @@ static int parse_tristate(int *b, const char *k, const char *v)
 	return 1;
 }
 
-int userdiff_config_basic(const char *k, const char *v)
+int userdiff_config(const char *k, const char *v)
 {
 	struct userdiff_driver *drv;
 
@@ -130,14 +130,6 @@ int userdiff_config_basic(const char *k, const char *v)
 		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
 	if ((drv = parse_driver(k, v, "binary")))
 		return parse_tristate(&drv->binary, k, v);
-
-	return 0;
-}
-
-int userdiff_config_porcelain(const char *k, const char *v)
-{
-	struct userdiff_driver *drv;
-
 	if ((drv = parse_driver(k, v, "command")))
 		return parse_string(&drv->external, k, v);
 	if ((drv = parse_driver(k, v, "textconv")))
diff --git a/userdiff.h b/userdiff.h
index f29c18f..ba29457 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -14,8 +14,7 @@ struct userdiff_driver {
 	const char *textconv;
 };
 
-int userdiff_config_basic(const char *k, const char *v);
-int userdiff_config_porcelain(const char *k, const char *v);
+int userdiff_config(const char *k, const char *v);
 struct userdiff_driver *userdiff_find_by_name(const char *name);
 struct userdiff_driver *userdiff_find_by_path(const char *path);
 
-- 
1.6.0.3.518.gdb328.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 5/5] document the diff driver textconv feature
  2008-10-24  2:46               ` Jeff King
                                   ` (3 preceding siblings ...)
  2008-10-24  2:55                 ` [PATCH 4/5] userdiff: require explicitly allowing textconv Jeff King
@ 2008-10-24  2:56                 ` Jeff King
  2008-10-24  7:02                 ` [PATCH 3/4] diff: introduce diff.<driver>.binary Johannes Sixt
  5 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24  2:56 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Johannes Sixt, Matthieu Moy

This patch also changes the term "custom diff driver" to
"external diff driver"; now that there are more facets of a
"custom driver" than just external diffing, it makes sense
to refer to the configuration of "diff.foo.*" as the "foo
diff driver", with "diff.foo.command" as the "external
driver for foo".

Signed-off-by: Jeff King <peff@peff.net>
---
 Documentation/gitattributes.txt |   66 +++++++++++++++++++++++++++++++--------
 1 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 2694559..314e2d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -213,10 +213,12 @@ with `crlf`, and then `ident` and fed to `filter`.
 Generating diff text
 ~~~~~~~~~~~~~~~~~~~~
 
-The attribute `diff` affects if 'git-diff' generates textual
-patch for the path or just says `Binary files differ`.  It also
-can affect what line is shown on the hunk header `@@ -k,l +n,m @@`
-line.
+The attribute `diff` affects how 'git' generates diffs for particular
+files. It can tell git whether to generate a textual patch for the path
+or to treat the path as a binary file.  It can also affect what line is
+shown on the hunk header `@@ -k,l +n,m @@` line, tell git to use an
+external command to generate the diff, or ask git to convert binary
+files to a text format before generating the diff.
 
 Set::
 
@@ -227,7 +229,8 @@ Set::
 Unset::
 
 	A path to which the `diff` attribute is unset will
-	generate `Binary files differ`.
+	generate `Binary files differ` (or a binary patch, if
+	binary patches are enabled).
 
 Unspecified::
 
@@ -238,21 +241,21 @@ Unspecified::
 
 String::
 
-	Diff is shown using the specified custom diff driver.
-	The driver program is given its input using the same
-	calling convention as used for GIT_EXTERNAL_DIFF
-	program.  This name is also used for custom hunk header
-	selection.
+	Diff is shown using the specified diff driver.  Each driver may
+	specify one or more options, as described in the following
+	section. The options for the diff driver "foo" are defined
+	by the configuration variables in the "diff.foo" section of the
+	git config file.
 
 
-Defining a custom diff driver
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Defining an external diff driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The definition of a diff driver is done in `gitconfig`, not
 `gitattributes` file, so strictly speaking this manual page is a
 wrong place to talk about it.  However...
 
-To define a custom diff driver `jcdiff`, add a section to your
+To define an external diff driver `jcdiff`, add a section to your
 `$GIT_DIR/config` file (or `$HOME/.gitconfig` file) like this:
 
 ----------------------------------------------------------------
@@ -328,6 +331,43 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Performing text diffs of binary files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it is desirable to see the diff of a text-converted
+version of some binary files. For example, a word processor
+document can be converted to an ASCII text representation, and
+the diff of the text shown. Even though this conversion loses
+some information, the resulting diff is useful for human
+viewing (but cannot be applied directly).
+
+The `textconv` config option is used to define a program for
+performing such a conversion. The program should take a single
+argument, the name of a file to convert, and produce the
+resulting text on stdout.
+
+For example, to show the diff of the exif information of a
+file instead of the binary information (assuming you have the
+exif tool installed):
+
+------------------------
+[diff "jpg"]
+	textconv = exif
+------------------------
+
+NOTE: The text conversion is generally a one-way conversion;
+in this example, we lose the actual image contents and focus
+just on the text data. This means that diffs generated by
+textconv are _not_ suitable for applying. For this reason,
+only `git diff` and the `git log` family of commands (i.e.,
+log, whatchanged, show) will perform text conversion. `git
+format-patch` will never generate this output. If you want to
+send somebody a text-converted diff of a binary file (e.g.,
+because it quickly conveys the changes you have made), you
+should generate it separately and send it as a comment _in
+addition to_ the usual binary diff that you might send.
+
+
 Performing a three-way merge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- 
1.6.0.3.518.gdb328.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/4] diff: introduce diff.<driver>.binary
  2008-10-24  2:46               ` Jeff King
                                   ` (4 preceding siblings ...)
  2008-10-24  2:56                 ` [PATCH 5/5] document the diff driver textconv feature Jeff King
@ 2008-10-24  7:02                 ` Johannes Sixt
  5 siblings, 0 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-24  7:02 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Matthieu Moy

Thank you for picking up the topic again and continuing on it. Your series
works nicely here, even without setting diff.*.binary.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/5] userdiff: require explicitly allowing textconv
  2008-10-24  2:55                 ` [PATCH 4/5] userdiff: require explicitly allowing textconv Jeff King
@ 2008-10-24  7:04                   ` Johannes Sixt
  0 siblings, 0 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-24  7:04 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Matthieu Moy

Jeff King schrieb:
> This introduces a new diff options, ALLOW_TEXTCONV, which
> controls this behavior. It is off by default, but is
> explicitly turned on for the "log" family of commands, as
> well as the "diff" porcelain.
...
> Johannes, you had mentioned some heuristics. Maybe you would like to
> take a stab at implementing them on top of this?

I'll put it on my todo list for this weekend.

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24  2:53                 ` [PATCH 3/5] refactor userdiff textconv code Jeff King
@ 2008-10-24  7:15                   ` Johannes Sixt
  2008-10-24 12:40                     ` Jeff King
  2008-10-24 13:51                     ` Jeff King
  2008-10-24 21:12                   ` Junio C Hamano
  1 sibling, 2 replies; 85+ messages in thread
From: Johannes Sixt @ 2008-10-24  7:15 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Matthieu Moy

Jeff King schrieb:
> This patch moves the text conversion to a
> diff_filespec_textconv function; this function can be called
> to insert the text-converted contents of the file into the
> filespec. These contents will survive through any
> diff_populate_filespec calls, meaning that it is safe to
> pass the resulting filespec to other diff functions which
> will look at the content.

What do we do when symlinks are involved? Pilot error? Or should we
exclude them from textconv (and diff.*.command, for that matter)
at all times? See a test case below.

> Also, I'm happy to hear comments on which functionality should get
> text-converted. I think I like seeing the --stat report the binary
> changes.

At this time I also think that it is enough to apply textconv only
to generate diffs.

Test case follows:
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index e456746..dab4338 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -19,6 +19,18 @@ cat >expect.text <<'EOF'
 +1
 EOF

+cat >expect.typechange <<'EOF'
+Binary files a/file and /dev/null differ
+diff --git a/file b/file
+new file mode 120000
+index ad8b3d2..67be421
+--- /dev/null
++++ b/file
+@@ -0,0 +1 @@
++frotz
+\ No newline at end of file
+EOF
+
 cat >hexdump <<'EOF'
 #!/bin/sh
 perl -e '$/ = undef; $_ = <>; s/./ord($&)/ge; print $_' "$1"
@@ -75,4 +87,16 @@ test_expect_success 'format-patch produces binary' '
 	test_cmp expect.binary actual
 '

+# make a symlink the hard way that works on symlink-challenged file systems
+test_expect_failure 'textconv does not kick in if there is a type change' '
+	echo -n frotz > file &&
+	git add file &&
+	git ls-files -s | sed -e s/100644/120000/ |
+		git update-index --index-info &&
+	git commit -m typechange &&
+	git show >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.typechange actual
+'
+
 test_done

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24  7:15                   ` Johannes Sixt
@ 2008-10-24 12:40                     ` Jeff King
  2008-10-24 13:51                     ` Jeff King
  1 sibling, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24 12:40 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git, Junio C Hamano, Matthieu Moy

On Fri, Oct 24, 2008 at 09:15:54AM +0200, Johannes Sixt wrote:

> What do we do when symlinks are involved? Pilot error? Or should we
> exclude them from textconv (and diff.*.command, for that matter)
> at all times? See a test case below.

Hrm, I hadn't really considered that. I would guess they should not be
excluded from diff.*.command; the external driver gets the mode, so it
is free to represent symlinks in whatever way it wants.

But clearly they are going to always be text, so there is no point in
textconv'ing them.

> +cat >expect.typechange <<'EOF'
> +Binary files a/file and /dev/null differ
> +diff --git a/file b/file
> +new file mode 120000
> +index ad8b3d2..67be421
> +--- /dev/null
> ++++ b/file
> +@@ -0,0 +1 @@
> ++frotz
> +\ No newline at end of file
> +EOF
> +

I find this output a bit confusing, since it actually breaks it into a
deletion and an addition. But I guess that is orthogonal to the textconv
issue, and intended. I don't think I've ever actually needed to diff a
symlink before.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24  7:15                   ` Johannes Sixt
  2008-10-24 12:40                     ` Jeff King
@ 2008-10-24 13:51                     ` Jeff King
  2008-10-24 14:01                       ` Johannes Sixt
  1 sibling, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-24 13:51 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git, Junio C Hamano, Matthieu Moy

On Fri, Oct 24, 2008 at 09:15:54AM +0200, Johannes Sixt wrote:

> +cat >expect.typechange <<'EOF'
> +Binary files a/file and /dev/null differ
> +diff --git a/file b/file
> +new file mode 120000
> +index ad8b3d2..67be421
> +--- /dev/null
> ++++ b/file
> +@@ -0,0 +1 @@
> ++frotz
> +\ No newline at end of file
> +EOF

Actually, I don't think this is right. The typechange has been broken
into two parts: the removal of the file contents and the addition of the
symlink. So the first part _should_ use the textconv, since it is just
comparing the file contents to /dev/null. But the second part should
not, since it is by definition just the text of the symlink. Ditto for
gitlinks, which have a special text representation. So how about this?

-- >8 --
only textconv regular files

We treat symlinks as text containing the results of the
symlink, so it doesn't make much sense to text-convert them.

Similarly gitlink components just end up as the text
"Subproject commit $sha1", which we should leave intact.

Note that a typechange may be broken into two parts: the
removal of the old part and the addition of the new. In that
case, we _do_ show the textconv for any part which is the
addition or removal of a file we would ordinarily textconv,
since it is purely acting on the file contents.

Signed-off-by: Jeff King <peff@peff.net>
---
 diff.c                   |    3 +++
 t/t4030-diff-textconv.sh |   28 ++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 0 deletions(-)

diff --git a/diff.c b/diff.c
index 52feba7..ce1317d 100644
--- a/diff.c
+++ b/diff.c
@@ -289,6 +289,9 @@ static int diff_filespec_textconv(struct diff_filespec *one)
 	if (!DIFF_FILE_VALID(one))
 		return 0;
 
+	if (!S_ISREG(one->mode))
+		return 0;
+
 	diff_filespec_load_driver(one);
 	if (!one->driver->textconv)
 		return 0;
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index e456746..a5cd99b 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -19,6 +19,22 @@ cat >expect.text <<'EOF'
 +1
 EOF
 
+cat >expect.typechange <<'EOF'
+--- a/file
++++ /dev/null
+@@ -1,2 +0,0 @@
+-0
+-1
+diff --git a/file b/file
+new file mode 120000
+index ad8b3d2..67be421
+--- /dev/null
++++ b/file
+@@ -0,0 +1 @@
++frotz
+\ No newline at end of file
+EOF
+
 cat >hexdump <<'EOF'
 #!/bin/sh
 perl -e '$/ = undef; $_ = <>; s/./ord($&)/ge; print $_' "$1"
@@ -75,4 +91,16 @@ test_expect_success 'format-patch produces binary' '
 	test_cmp expect.binary actual
 '
 
+# make a symlink the hard way that works on symlink-challenged file systems
+test_expect_failure 'textconv does not act on symlinks' '
+	echo -n frotz > file &&
+	git add file &&
+	git ls-files -s | sed -e s/100644/120000/ |
+		git update-index --index-info &&
+	git commit -m typechange &&
+	git show >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.typechange actual
+'
+
 test_done
-- 
1.6.0.3.523.ge05eb.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24 13:51                     ` Jeff King
@ 2008-10-24 14:01                       ` Johannes Sixt
  2008-10-24 14:08                         ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Johannes Sixt @ 2008-10-24 14:01 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano, Matthieu Moy

Jeff King schrieb:
> On Fri, Oct 24, 2008 at 09:15:54AM +0200, Johannes Sixt wrote:
> Actually, I don't think this is right. The typechange has been broken
> into two parts: the removal of the file contents and the addition of the
> symlink. So the first part _should_ use the textconv, since it is just
> comparing the file contents to /dev/null. But the second part should
> not, since it is by definition just the text of the symlink. Ditto for
> gitlinks, which have a special text representation. So how about this?

Your reasoning makes sense, of course. (I thought that forgoing textconv
in these cases would have a simpler implementation; but it can't be a lot
simpler than yours.)

> diff --git a/diff.c b/diff.c
> index 52feba7..ce1317d 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -289,6 +289,9 @@ static int diff_filespec_textconv(struct diff_filespec *one)
>  	if (!DIFF_FILE_VALID(one))
>  		return 0;
>  
> +	if (!S_ISREG(one->mode))
> +		return 0;
> +
>  	diff_filespec_load_driver(one);
>  	if (!one->driver->textconv)
>  		return 0;
...
> +test_expect_failure 'textconv does not act on symlinks' '

Can we trust your solution if you still have 'test_expect_failure' here? ;-)

-- Hannes

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24 14:01                       ` Johannes Sixt
@ 2008-10-24 14:08                         ` Jeff King
  0 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-24 14:08 UTC (permalink / raw)
  To: Johannes Sixt; +Cc: git, Junio C Hamano, Matthieu Moy

On Fri, Oct 24, 2008 at 04:01:59PM +0200, Johannes Sixt wrote:

> Your reasoning makes sense, of course. (I thought that forgoing textconv
> in these cases would have a simpler implementation; but it can't be a lot
> simpler than yours.)

Yeah, I think preventing it entirely for this case would be much more
complex, since you would have to realize much sooner that a textconv was
going to happen.

> > +test_expect_failure 'textconv does not act on symlinks' '
> Can we trust your solution if you still have 'test_expect_failure' here? ;-)

Heh. It helps if I commit before format-patch. ;) Fortunately, this was
the only change I missed out on sending.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24  2:53                 ` [PATCH 3/5] refactor userdiff textconv code Jeff King
  2008-10-24  7:15                   ` Johannes Sixt
@ 2008-10-24 21:12                   ` Junio C Hamano
  2008-10-24 22:50                     ` Jeff King
  1 sibling, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-24 21:12 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Johannes Sixt, Matthieu Moy

Jeff King <peff@peff.net> writes:

> I think this is much cleaner. I have a nagging worry that a
> text-converted filespec might live longer than I expect. Maybe it would
> make sense to free the filespec data after the diff has been generated
> (and then further populate_filespec calls would just restore the
> original data).

Either that or drop data_is_textconv and have two sets of <ptr,size> pair
in filespec, one for real data and another purely for diff text
generation.

But I have a very naïve question.

Why isn't this just primarily a patch to diff.c::builtin_diff() function?

That is, you let fill_mmfile() to fill the real data in mf1 and mf2 as
before, ask one/two if they have textconv, and if so, convert mf1 and mf2
in place, and let xdl work on it, like...


	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
		die("unable to read files to diff");

+	if (has_textconv(one))
+		apply_textconv(one, &mf1);
+	if (has_textconv(two))
+		apply_textconv(two, &mf2);

	if (!DIFF_OPT_TST(o, TEXT) &&
-	    (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
+	    ( (diff_filespec_is_binary(one) && !has_textconv(one)) ||
+	      (diff_filespec_is_binary(two) && !has_textconv(two)) )) {
		/* Quite common confusing case */
		...
	}
	else {
		/* Crazy xdl interfaces.. */
		const char *diffopts = getenv("GIT_DIFF_OPTS");
		...

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24 21:12                   ` Junio C Hamano
@ 2008-10-24 22:50                     ` Jeff King
  2008-10-24 22:56                       ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-24 22:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Sixt, Matthieu Moy

On Fri, Oct 24, 2008 at 02:12:17PM -0700, Junio C Hamano wrote:

> Either that or drop data_is_textconv and have two sets of <ptr,size> pair
> in filespec, one for real data and another purely for diff text
> generation.

I thought about that. My reasoning against it was two-fold:

 1. We don't want to keep two copies in memory unnecessarily. Of
    course, we could easily free the original, but just store the
    information in a different pointer to make sure they never get
    confused. So that is a non-issue.

 2. I had some vague notion that it is more convenient in the long run
    to do this to the filespec, since we can then transparently pass the
    munged filespec around and pretend like the converted text was the
    original content.

    However, I'm not sure exactly _where_ we would want to do this.
    The obvious places are for patch, for diffstat, or for whitespace
    checking. But all of those places use mmfile, so we can munge them
    in the same way. I haven't looked at using this with blame, but I do
    think "git blame --textconv foo.jpg" would be useful.

    (Actually not true. I did just look for 30 seconds at using this
     with blame, but blame doesn't seem to to use builtin_diff at all).

> That is, you let fill_mmfile() to fill the real data in mf1 and mf2 as
> before, ask one/two if they have textconv, and if so, convert mf1 and mf2
> in place, and let xdl work on it, like...

I think your example would work fine, if there is no other advantage to
having a transparently-munged diff_filespec (as above).

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24 22:50                     ` Jeff King
@ 2008-10-24 22:56                       ` Jeff King
  2008-10-25  0:48                         ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-24 22:56 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Sixt, Matthieu Moy

On Fri, Oct 24, 2008 at 06:50:01PM -0400, Jeff King wrote:

>     However, I'm not sure exactly _where_ we would want to do this.
>     The obvious places are for patch, for diffstat, or for whitespace
>     checking. But all of those places use mmfile, so we can munge them
>     in the same way. I haven't looked at using this with blame, but I do
>     think "git blame --textconv foo.jpg" would be useful.
> 
>     (Actually not true. I did just look for 30 seconds at using this
>      with blame, but blame doesn't seem to to use builtin_diff at all).

Ah, I see. It looks like we would have to munge fill_blob_sha1 in
builtin-blame.c. In which case totally splitting this out from
diff_filespec is even better, since we don't have one there.

So let me try to re-roll my series based on your suggestion, and then
I'll see if I can add "blame --textconv" on top.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 3/5] refactor userdiff textconv code
  2008-10-24 22:56                       ` Jeff King
@ 2008-10-25  0:48                         ` Jeff King
  2008-10-25  0:50                           ` [PATCH 1/7] diff: add missing static declaration Jeff King
                                             ` (6 more replies)
  0 siblings, 7 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Johannes Sixt, Matthieu Moy

On Fri, Oct 24, 2008 at 06:56:05PM -0400, Jeff King wrote:

> Ah, I see. It looks like we would have to munge fill_blob_sha1 in
> builtin-blame.c. In which case totally splitting this out from
> diff_filespec is even better, since we don't have one there.
> 
> So let me try to re-roll my series based on your suggestion, and then
> I'll see if I can add "blame --textconv" on top.

Actually, the blame thing is a little trickier than that. The textconv
stuff is still tied to a diff_filespec because it uses the diff
prepare_temp_file interface. So I will have to work around that if I
want to add blame support.

However, I re-did the existing patches according to your suggestions,
and I think the result is pretty reasonable. Six patches to follow.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH 1/7] diff: add missing static declaration
  2008-10-25  0:48                         ` Jeff King
@ 2008-10-25  0:50                           ` Jeff King
  2008-10-25  0:51                           ` [PATCH 2/7] add userdiff textconv tests Jeff King
                                             ` (5 subsequent siblings)
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

This function isn't used outside of diff.c; the 'static' was
simply overlooked in the original writing.

Signed-off-by: Jeff King <peff@peff.net>
---
Oops, it is actually 7 patches. This one is the same as before.

 diff.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/diff.c b/diff.c
index e368fef..d1fd594 100644
--- a/diff.c
+++ b/diff.c
@@ -1282,7 +1282,7 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-void diff_filespec_load_driver(struct diff_filespec *one)
+static void diff_filespec_load_driver(struct diff_filespec *one)
 {
 	if (!one->driver)
 		one->driver = userdiff_find_by_path(one->path);
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 2/7] add userdiff textconv tests
  2008-10-25  0:48                         ` Jeff King
  2008-10-25  0:50                           ` [PATCH 1/7] diff: add missing static declaration Jeff King
@ 2008-10-25  0:51                           ` Jeff King
  2008-10-25  0:52                           ` [PATCH 3/7] textconv: assume text-converted contents are not binary Jeff King
                                             ` (4 subsequent siblings)
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:51 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

These tests provide a basic sanity check that textconv'd
files work. The tests try to describe how this configuration
_should_ work; thus some of the tests are marked to expect
failure.

In particular, we fail to actually textconv anything because
the 'diff.foo.binary' config option is not set, which will
be fixed in the next patch.

This also means that some "expect_failure" tests actually
seem to be fixed; in reality, this is just because textconv
is broken and its failure mode happens to make these tests
work.

Signed-off-by: Jeff King <peff@peff.net>
---
This brings in Johannes' typechange test, and adds a diffstat test
which we will fix along the way.

 t/t4030-diff-textconv.sh |  118 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 118 insertions(+), 0 deletions(-)
 create mode 100755 t/t4030-diff-textconv.sh

diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
new file mode 100755
index 0000000..1b09648
--- /dev/null
+++ b/t/t4030-diff-textconv.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+
+test_description='diff.*.textconv tests'
+. ./test-lib.sh
+
+find_diff() {
+	sed '1,/^index /d' | sed '/^-- $/,$d'
+}
+
+cat >expect.binary <<'EOF'
+Binary files a/file and b/file differ
+EOF
+
+cat >expect.text <<'EOF'
+--- a/file
++++ b/file
+@@ -1 +1,2 @@
+ 0
++1
+EOF
+
+cat >hexdump <<'EOF'
+#!/bin/sh
+perl -e '$/ = undef; $_ = <>; s/./ord($&)/ge; print $_' "$1"
+EOF
+chmod +x hexdump
+
+test_expect_success 'setup binary file with history' '
+	printf "\\0\\n" >file &&
+	git add file &&
+	git commit -m one &&
+	printf "\\1\\n" >>file &&
+	git add file &&
+	git commit -m two
+'
+
+test_expect_success 'file is considered binary by porcelain' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'file is considered binary by plumbing' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'setup textconv filters' '
+	echo file diff=foo >.gitattributes &&
+	git config diff.foo.textconv "$PWD"/hexdump &&
+	git config diff.fail.textconv false
+'
+
+test_expect_failure 'diff produces text' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.text actual
+'
+
+test_expect_success 'diff-tree produces binary' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_failure 'log produces text' '
+	git log -1 -p >log &&
+	find_diff <log >actual &&
+	test_cmp expect.text actual
+'
+
+test_expect_failure 'format-patch produces binary' '
+	git format-patch --no-binary --stdout HEAD^ >patch &&
+	find_diff <patch >actual &&
+	test_cmp expect.binary actual
+'
+
+cat >expect.stat <<'EOF'
+ file |  Bin 2 -> 4 bytes
+ 1 files changed, 0 insertions(+), 0 deletions(-)
+EOF
+test_expect_failure 'diffstat does not run textconv' '
+	echo file diff=fail >.gitattributes &&
+	git diff --stat HEAD^ HEAD >actual &&
+	test_cmp expect.stat actual
+'
+# restore working setup
+echo file diff=foo >.gitattributes
+
+cat >expect.typechange <<'EOF'
+--- a/file
++++ /dev/null
+@@ -1,2 +0,0 @@
+-0
+-1
+diff --git a/file b/file
+new file mode 120000
+index ad8b3d2..67be421
+--- /dev/null
++++ b/file
+@@ -0,0 +1 @@
++frotz
+\ No newline at end of file
+EOF
+# make a symlink the hard way that works on symlink-challenged file systems
+test_expect_failure 'textconv does not act on symlinks' '
+	echo -n frotz > file &&
+	git add file &&
+	git ls-files -s | sed -e s/100644/120000/ |
+		git update-index --index-info &&
+	git commit -m typechange &&
+	git show >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.typechange actual
+'
+
+test_done
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 3/7] textconv: assume text-converted contents are not binary
  2008-10-25  0:48                         ` Jeff King
  2008-10-25  0:50                           ` [PATCH 1/7] diff: add missing static declaration Jeff King
  2008-10-25  0:51                           ` [PATCH 2/7] add userdiff textconv tests Jeff King
@ 2008-10-25  0:52                           ` Jeff King
  2008-10-25  0:52                           ` [PATCH 4/7] textconv: don't convert for every operation Jeff King
                                             ` (3 subsequent siblings)
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

Previously, we would use a 'textconv' filter when filling
the mmfile_t, but then still check the diff_filespec struct,
which was not modified at all by the text conversion,
whether it was binary. And in general, it was, since the
point of text conversion is to munge binary contents. This
meant that the user had to manually set "diff.foo.binary =
false" to convince the text conversion to actually happen.

This patch pulls the logic for "can we textconv this
diff_filespec?" into a separate function. We can then check
this to see if a filespec which is binary would have been
converted.

Signed-off-by: Jeff King <peff@peff.net>
---
This used to be part 1 of the big refactoring patch.

 diff.c                   |   18 ++++++++++++++----
 t/t4030-diff-textconv.sh |    4 ++--
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/diff.c b/diff.c
index d1fd594..8fad215 100644
--- a/diff.c
+++ b/diff.c
@@ -284,8 +284,18 @@ static void emit_rewrite_diff(const char *name_a,
 		copy_file_with_prefix(o->file, '+', two->data, two->size, new, reset);
 }
 
+static const char *get_textconv(struct diff_filespec *one)
+{
+	if (!DIFF_FILE_VALID(one))
+		return NULL;
+	diff_filespec_load_driver(one);
+	return one->driver->textconv;
+}
+
 static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 {
+	const char *textconv;
+
 	if (!DIFF_FILE_VALID(one)) {
 		mf->ptr = (char *)""; /* does not matter */
 		mf->size = 0;
@@ -294,10 +304,9 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	else if (diff_populate_filespec(one, 0))
 		return -1;
 
-	diff_filespec_load_driver(one);
-	if (one->driver->textconv) {
+	if ((textconv = get_textconv(one))) {
 		size_t size;
-		mf->ptr = run_textconv(one->driver->textconv, one, &size);
+		mf->ptr = run_textconv(textconv, one, &size);
 		if (!mf->ptr)
 			return -1;
 		mf->size = size;
@@ -1391,7 +1400,8 @@ static void builtin_diff(const char *name_a,
 		die("unable to read files to diff");
 
 	if (!DIFF_OPT_TST(o, TEXT) &&
-	    (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
+	    ( (diff_filespec_is_binary(one) && !get_textconv(one)) ||
+	      (diff_filespec_is_binary(two) && !get_textconv(two)) )) {
 		/* Quite common confusing case */
 		if (mf1.size == mf2.size &&
 		    !memcmp(mf1.ptr, mf2.ptr, mf1.size))
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 1b09648..af94e1a 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -52,7 +52,7 @@ test_expect_success 'setup textconv filters' '
 	git config diff.fail.textconv false
 '
 
-test_expect_failure 'diff produces text' '
+test_expect_success 'diff produces text' '
 	git diff HEAD^ HEAD >diff &&
 	find_diff <diff >actual &&
 	test_cmp expect.text actual
@@ -64,7 +64,7 @@ test_expect_success 'diff-tree produces binary' '
 	test_cmp expect.binary actual
 '
 
-test_expect_failure 'log produces text' '
+test_expect_success 'log produces text' '
 	git log -1 -p >log &&
 	find_diff <log >actual &&
 	test_cmp expect.text actual
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25  0:48                         ` Jeff King
                                             ` (2 preceding siblings ...)
  2008-10-25  0:52                           ` [PATCH 3/7] textconv: assume text-converted contents are not binary Jeff King
@ 2008-10-25  0:52                           ` Jeff King
  2008-10-25  5:41                             ` Junio C Hamano
  2008-10-25  0:54                           ` [PATCH 5/7] userdiff: require explicitly allowing textconv Jeff King
                                             ` (2 subsequent siblings)
  6 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

Since we do the text conversion in fill_mmfile, the
conversion was happening any time we fed data to xdiff,
including diffstat, whitespace checking, etc.

This patch makes it optional for each caller, and
conservatively chooses to turn it on only for actual patch
generation.

Signed-off-by: Jeff King <peff@peff.net>
---
And this was part 2 of the big refactoring patch.

 diff.c                   |   19 ++++++++++---------
 t/t4030-diff-textconv.sh |    2 +-
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/diff.c b/diff.c
index 8fad215..fcdfd7b 100644
--- a/diff.c
+++ b/diff.c
@@ -292,7 +292,8 @@ static const char *get_textconv(struct diff_filespec *one)
 	return one->driver->textconv;
 }
 
-static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
+static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one,
+		int try_textconv)
 {
 	const char *textconv;
 
@@ -304,7 +305,7 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	else if (diff_populate_filespec(one, 0))
 		return -1;
 
-	if ((textconv = get_textconv(one))) {
+	if (try_textconv && (textconv = get_textconv(one))) {
 		size_t size;
 		mf->ptr = run_textconv(textconv, one, &size);
 		if (!mf->ptr)
@@ -1396,7 +1397,7 @@ static void builtin_diff(const char *name_a,
 		}
 	}
 
-	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
+	if (fill_mmfile(&mf1, one, 1) < 0 || fill_mmfile(&mf2, two, 1) < 0)
 		die("unable to read files to diff");
 
 	if (!DIFF_OPT_TST(o, TEXT) &&
@@ -1486,7 +1487,7 @@ static void builtin_diffstat(const char *name_a, const char *name_b,
 		data->added = count_lines(two->data, two->size);
 		goto free_and_return;
 	}
-	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
+	if (fill_mmfile(&mf1, one, 0) < 0 || fill_mmfile(&mf2, two, 0) < 0)
 		die("unable to read files to diff");
 
 	if (diff_filespec_is_binary(one) || diff_filespec_is_binary(two)) {
@@ -1528,7 +1529,7 @@ static void builtin_checkdiff(const char *name_a, const char *name_b,
 	data.o = o;
 	data.ws_rule = whitespace_rule(attr_path);
 
-	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
+	if (fill_mmfile(&mf1, one, 0) < 0 || fill_mmfile(&mf2, two, 0) < 0)
 		die("unable to read files to diff");
 
 	/*
@@ -2085,8 +2086,8 @@ static void run_diff(struct diff_filepair *p, struct diff_options *o)
 
 		if (DIFF_OPT_TST(o, BINARY)) {
 			mmfile_t mf;
-			if ((!fill_mmfile(&mf, one) && diff_filespec_is_binary(one)) ||
-			    (!fill_mmfile(&mf, two) && diff_filespec_is_binary(two)))
+			if ((!fill_mmfile(&mf, one, 0) && diff_filespec_is_binary(one)) ||
+			    (!fill_mmfile(&mf, two, 0) && diff_filespec_is_binary(two)))
 				abbrev = 40;
 		}
 		strbuf_addf(&msg, "index %.*s..%.*s",
@@ -2983,8 +2984,8 @@ static int diff_get_patch_id(struct diff_options *options, unsigned char *sha1)
 
 		diff_fill_sha1_info(p->one);
 		diff_fill_sha1_info(p->two);
-		if (fill_mmfile(&mf1, p->one) < 0 ||
-				fill_mmfile(&mf2, p->two) < 0)
+		if (fill_mmfile(&mf1, p->one, 0) < 0 ||
+				fill_mmfile(&mf2, p->two, 0) < 0)
 			return error("unable to read files to diff");
 
 		len1 = remove_space(p->one->path, strlen(p->one->path));
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index af94e1a..090a21d 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -80,7 +80,7 @@ cat >expect.stat <<'EOF'
  file |  Bin 2 -> 4 bytes
  1 files changed, 0 insertions(+), 0 deletions(-)
 EOF
-test_expect_failure 'diffstat does not run textconv' '
+test_expect_success 'diffstat does not run textconv' '
 	echo file diff=fail >.gitattributes &&
 	git diff --stat HEAD^ HEAD >actual &&
 	test_cmp expect.stat actual
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 5/7] userdiff: require explicitly allowing textconv
  2008-10-25  0:48                         ` Jeff King
                                             ` (3 preceding siblings ...)
  2008-10-25  0:52                           ` [PATCH 4/7] textconv: don't convert for every operation Jeff King
@ 2008-10-25  0:54                           ` Jeff King
  2008-10-25  0:54                           ` [PATCH 6/7] document the diff driver textconv feature Jeff King
  2008-10-25  0:55                           ` [PATCH 7/7] only textconv regular files Jeff King
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

Diffs that have been produced with textconv almost certainly
cannot be applied, so we want to be careful not to generate
them in things like format-patch.

This introduces a new diff options, ALLOW_TEXTCONV, which
controls this behavior. It is off by default, but is
explicitly turned on for the "log" family of commands, as
well as the "diff" porcelain.

Because both text conversion and external diffing are
controlled by these diff options, we can get rid of the
"plumbing versus porcelain" distinction when reading the
config. This was an attempt to control the same thing, but
suffered from being too coarse-grained.

Signed-off-by: Jeff King <peff@peff.net>
---
Same idea as before, but adapted to the new calling convention for
fill_mmfile. It makes the "did we textconv this?" test a little bit
more hairy. We could perhaps just set a "we did a textconv" flag on the
mmfile, but I am also fine with it as-is (especially since there is only
one call-site).

 builtin-diff.c           |    1 +
 builtin-log.c            |    1 +
 diff.c                   |   27 ++++++++++++---------------
 diff.h                   |    1 +
 t/t4030-diff-textconv.sh |    2 +-
 userdiff.c               |   10 +---------
 userdiff.h               |    3 +--
 7 files changed, 18 insertions(+), 27 deletions(-)

diff --git a/builtin-diff.c b/builtin-diff.c
index 9c8c295..2de5834 100644
--- a/builtin-diff.c
+++ b/builtin-diff.c
@@ -300,6 +300,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
 	}
 	DIFF_OPT_SET(&rev.diffopt, ALLOW_EXTERNAL);
 	DIFF_OPT_SET(&rev.diffopt, RECURSIVE);
+	DIFF_OPT_SET(&rev.diffopt, ALLOW_TEXTCONV);
 
 	/*
 	 * If the user asked for our exit code then don't start a
diff --git a/builtin-log.c b/builtin-log.c
index a0944f7..75d698f 100644
--- a/builtin-log.c
+++ b/builtin-log.c
@@ -59,6 +59,7 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
 		} else
 			die("unrecognized argument: %s", arg);
 	}
+	DIFF_OPT_SET(&rev->diffopt, ALLOW_TEXTCONV);
 }
 
 /*
diff --git a/diff.c b/diff.c
index fcdfd7b..e3bb1ed 100644
--- a/diff.c
+++ b/diff.c
@@ -93,12 +93,6 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	if (!strcmp(var, "diff.external"))
 		return git_config_string(&external_diff_cmd_cfg, var, value);
 
-	switch (userdiff_config_porcelain(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_diff_basic_config(var, value, cb);
 }
 
@@ -109,6 +103,12 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	switch (userdiff_config(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
+	}
+
 	if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
 		int slot = parse_diff_color_slot(var, 11);
 		if (!value)
@@ -123,12 +123,6 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
-	switch (userdiff_config_basic(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_color_default_config(var, value, cb);
 }
 
@@ -1397,12 +1391,15 @@ static void builtin_diff(const char *name_a,
 		}
 	}
 
-	if (fill_mmfile(&mf1, one, 1) < 0 || fill_mmfile(&mf2, two, 1) < 0)
+	if (fill_mmfile(&mf1, one, DIFF_OPT_TST(o, ALLOW_TEXTCONV)) < 0 ||
+	    fill_mmfile(&mf2, two, DIFF_OPT_TST(o, ALLOW_TEXTCONV)) < 0)
 		die("unable to read files to diff");
 
 	if (!DIFF_OPT_TST(o, TEXT) &&
-	    ( (diff_filespec_is_binary(one) && !get_textconv(one)) ||
-	      (diff_filespec_is_binary(two) && !get_textconv(two)) )) {
+	    ( (diff_filespec_is_binary(one) &&
+	       !(DIFF_OPT_TST(o, ALLOW_TEXTCONV) && get_textconv(one))) ||
+	      (diff_filespec_is_binary(two) &&
+	       !(DIFF_OPT_TST(o, ALLOW_TEXTCONV) && get_textconv(two))) )) {
 		/* Quite common confusing case */
 		if (mf1.size == mf2.size &&
 		    !memcmp(mf1.ptr, mf2.ptr, mf1.size))
diff --git a/diff.h b/diff.h
index a49d865..42582ed 100644
--- a/diff.h
+++ b/diff.h
@@ -65,6 +65,7 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
 #define DIFF_OPT_IGNORE_SUBMODULES   (1 << 18)
 #define DIFF_OPT_DIRSTAT_CUMULATIVE  (1 << 19)
 #define DIFF_OPT_DIRSTAT_BY_FILE     (1 << 20)
+#define DIFF_OPT_ALLOW_TEXTCONV      (1 << 21)
 #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)
 #define DIFF_OPT_CLR(opts, flag)    ((opts)->flags &= ~DIFF_OPT_##flag)
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 090a21d..1df48ae 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -70,7 +70,7 @@ test_expect_success 'log produces text' '
 	test_cmp expect.text actual
 '
 
-test_expect_failure 'format-patch produces binary' '
+test_expect_success 'format-patch produces binary' '
 	git format-patch --no-binary --stdout HEAD^ >patch &&
 	find_diff <patch >actual &&
 	test_cmp expect.binary actual
diff --git a/userdiff.c b/userdiff.c
index d95257a..3681062 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -120,7 +120,7 @@ static int parse_tristate(int *b, const char *k, const char *v)
 	return 1;
 }
 
-int userdiff_config_basic(const char *k, const char *v)
+int userdiff_config(const char *k, const char *v)
 {
 	struct userdiff_driver *drv;
 
@@ -130,14 +130,6 @@ int userdiff_config_basic(const char *k, const char *v)
 		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
 	if ((drv = parse_driver(k, v, "binary")))
 		return parse_tristate(&drv->binary, k, v);
-
-	return 0;
-}
-
-int userdiff_config_porcelain(const char *k, const char *v)
-{
-	struct userdiff_driver *drv;
-
 	if ((drv = parse_driver(k, v, "command")))
 		return parse_string(&drv->external, k, v);
 	if ((drv = parse_driver(k, v, "textconv")))
diff --git a/userdiff.h b/userdiff.h
index f29c18f..ba29457 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -14,8 +14,7 @@ struct userdiff_driver {
 	const char *textconv;
 };
 
-int userdiff_config_basic(const char *k, const char *v);
-int userdiff_config_porcelain(const char *k, const char *v);
+int userdiff_config(const char *k, const char *v);
 struct userdiff_driver *userdiff_find_by_name(const char *name);
 struct userdiff_driver *userdiff_find_by_path(const char *path);
 
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 6/7] document the diff driver textconv feature
  2008-10-25  0:48                         ` Jeff King
                                             ` (4 preceding siblings ...)
  2008-10-25  0:54                           ` [PATCH 5/7] userdiff: require explicitly allowing textconv Jeff King
@ 2008-10-25  0:54                           ` Jeff King
  2008-10-25  0:55                           ` [PATCH 7/7] only textconv regular files Jeff King
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:54 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

This patch also changes the term "custom diff driver" to
"external diff driver"; now that there are more facets of a
"custom driver" than just external diffing, it makes sense
to refer to the configuration of "diff.foo.*" as the "foo
diff driver", with "diff.foo.command" as the "external
driver for foo".

Signed-off-by: Jeff King <peff@peff.net>
---
Same as before.

 Documentation/gitattributes.txt |   66 +++++++++++++++++++++++++++++++--------
 1 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 2694559..314e2d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -213,10 +213,12 @@ with `crlf`, and then `ident` and fed to `filter`.
 Generating diff text
 ~~~~~~~~~~~~~~~~~~~~
 
-The attribute `diff` affects if 'git-diff' generates textual
-patch for the path or just says `Binary files differ`.  It also
-can affect what line is shown on the hunk header `@@ -k,l +n,m @@`
-line.
+The attribute `diff` affects how 'git' generates diffs for particular
+files. It can tell git whether to generate a textual patch for the path
+or to treat the path as a binary file.  It can also affect what line is
+shown on the hunk header `@@ -k,l +n,m @@` line, tell git to use an
+external command to generate the diff, or ask git to convert binary
+files to a text format before generating the diff.
 
 Set::
 
@@ -227,7 +229,8 @@ Set::
 Unset::
 
 	A path to which the `diff` attribute is unset will
-	generate `Binary files differ`.
+	generate `Binary files differ` (or a binary patch, if
+	binary patches are enabled).
 
 Unspecified::
 
@@ -238,21 +241,21 @@ Unspecified::
 
 String::
 
-	Diff is shown using the specified custom diff driver.
-	The driver program is given its input using the same
-	calling convention as used for GIT_EXTERNAL_DIFF
-	program.  This name is also used for custom hunk header
-	selection.
+	Diff is shown using the specified diff driver.  Each driver may
+	specify one or more options, as described in the following
+	section. The options for the diff driver "foo" are defined
+	by the configuration variables in the "diff.foo" section of the
+	git config file.
 
 
-Defining a custom diff driver
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Defining an external diff driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The definition of a diff driver is done in `gitconfig`, not
 `gitattributes` file, so strictly speaking this manual page is a
 wrong place to talk about it.  However...
 
-To define a custom diff driver `jcdiff`, add a section to your
+To define an external diff driver `jcdiff`, add a section to your
 `$GIT_DIR/config` file (or `$HOME/.gitconfig` file) like this:
 
 ----------------------------------------------------------------
@@ -328,6 +331,43 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Performing text diffs of binary files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it is desirable to see the diff of a text-converted
+version of some binary files. For example, a word processor
+document can be converted to an ASCII text representation, and
+the diff of the text shown. Even though this conversion loses
+some information, the resulting diff is useful for human
+viewing (but cannot be applied directly).
+
+The `textconv` config option is used to define a program for
+performing such a conversion. The program should take a single
+argument, the name of a file to convert, and produce the
+resulting text on stdout.
+
+For example, to show the diff of the exif information of a
+file instead of the binary information (assuming you have the
+exif tool installed):
+
+------------------------
+[diff "jpg"]
+	textconv = exif
+------------------------
+
+NOTE: The text conversion is generally a one-way conversion;
+in this example, we lose the actual image contents and focus
+just on the text data. This means that diffs generated by
+textconv are _not_ suitable for applying. For this reason,
+only `git diff` and the `git log` family of commands (i.e.,
+log, whatchanged, show) will perform text conversion. `git
+format-patch` will never generate this output. If you want to
+send somebody a text-converted diff of a binary file (e.g.,
+because it quickly conveys the changes you have made), you
+should generate it separately and send it as a comment _in
+addition to_ the usual binary diff that you might send.
+
+
 Performing a three-way merge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH 7/7] only textconv regular files
  2008-10-25  0:48                         ` Jeff King
                                             ` (5 preceding siblings ...)
  2008-10-25  0:54                           ` [PATCH 6/7] document the diff driver textconv feature Jeff King
@ 2008-10-25  0:55                           ` Jeff King
  6 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25  0:55 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

We treat symlinks as text containing the results of the
symlink, so it doesn't make much sense to text-convert them.

Similarly gitlink components just end up as the text
"Subproject commit $sha1", which we should leave intact.

Note that a typechange may be broken into two parts: the
removal of the old part and the addition of the new. In that
case, we _do_ show the textconv for any part which is the
addition or removal of a file we would ordinarily textconv,
since it is purely acting on the file contents.

Signed-off-by: Jeff King <peff@peff.net>
---
Similar to before, except we already have the expect_failure test now.

 diff.c                   |    2 ++
 t/t4030-diff-textconv.sh |    2 +-
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/diff.c b/diff.c
index e3bb1ed..a87ce97 100644
--- a/diff.c
+++ b/diff.c
@@ -282,6 +282,8 @@ static const char *get_textconv(struct diff_filespec *one)
 {
 	if (!DIFF_FILE_VALID(one))
 		return NULL;
+	if (!S_ISREG(one->mode))
+		return NULL;
 	diff_filespec_load_driver(one);
 	return one->driver->textconv;
 }
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 1df48ae..3945731 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -104,7 +104,7 @@ index ad8b3d2..67be421
 \ No newline at end of file
 EOF
 # make a symlink the hard way that works on symlink-challenged file systems
-test_expect_failure 'textconv does not act on symlinks' '
+test_expect_success 'textconv does not act on symlinks' '
 	echo -n frotz > file &&
 	git add file &&
 	git ls-files -s | sed -e s/100644/120000/ |
-- 
1.6.0.3.523.g38597.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25  0:52                           ` [PATCH 4/7] textconv: don't convert for every operation Jeff King
@ 2008-10-25  5:41                             ` Junio C Hamano
  2008-10-25  7:19                               ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-25  5:41 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

> -static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
> +static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one,
> +		int try_textconv)
>  {
>  	const char *textconv;
>  
> @@ -304,7 +305,7 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
>  	else if (diff_populate_filespec(one, 0))
>  		return -1;
>  
> -	if ((textconv = get_textconv(one))) {
> +	if (try_textconv && (textconv = get_textconv(one))) {
>  		size_t size;
>  		mf->ptr = run_textconv(textconv, one, &size);
>  		if (!mf->ptr)
> @@ -1396,7 +1397,7 @@ static void builtin_diff(const char *name_a,
>  		}
>  	}

Isn't this function "fill_mmfile()" and its callers leaky as a sieve?

The original fill_mmfile() was to only borrow one->data and users of
mmfile_t never had to worry about freeing what's in mf->ptr (and freeing
them would have been actively wrong).

I am also somewhat worried about the performance impact of running
get_textconv() to the same filespec many times when no textconv is
defined, which is the normal case we should optimize for.  It appears that
diff_filespec_load_driver() is optimized for a wrong case (i.e. "we
already know this needs a custom driver and we know which one").

I am inclined to suggest reverting the whole series (including the ones
that are already in 'master') and start over from scratch, limiting the
run_textconv() to only inside diff.c::builtin_diff() (in the else {} block
where "Crazy xcl interfaces.." comment appears).

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25  5:41                             ` Junio C Hamano
@ 2008-10-25  7:19                               ` Jeff King
  2008-10-25 18:32                                 ` Junio C Hamano
  0 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-25  7:19 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Fri, Oct 24, 2008 at 10:41:09PM -0700, Junio C Hamano wrote:

> Isn't this function "fill_mmfile()" and its callers leaky as a sieve?

Yes, it is (for the textconv case; the other is unaffected). That was
another reason I had attached the data to the diff_filespec, but
obviously I forgot about that when re-rolling the patch.

> I am also somewhat worried about the performance impact of running
> get_textconv() to the same filespec many times when no textconv is
> defined, which is the normal case we should optimize for.  It appears that
> diff_filespec_load_driver() is optimized for a wrong case (i.e. "we
> already know this needs a custom driver and we know which one").

No, it is the same as before. We always end up with a driver at the end
of the function, so further calls will be no-ops. So we do exactly one
attribute lookup per filespec, caching the result.

> I am inclined to suggest reverting the whole series (including the ones
> that are already in 'master') and start over from scratch, limiting the
> run_textconv() to only inside diff.c::builtin_diff() (in the else {} block
> where "Crazy xcl interfaces.." comment appears).

I am not terribly opposed to that (I was quite surprised to see the
original make it to 'next', let alone 'master'). OTOH, the real reason
to do so would be to keep a clean history, and it is already too late
for that.

I think it makes sense to figure out _what_ needs fixed first, because
it might be somewhat minor. So far I see:

  - leak from fill_mmfile; this definitely needs fixed. The quick fix is
    minor (free if we did a textconv). A more involved fix is to pull it
    out of fill_mmfile entirely and put the code directly into
    builtin_diff, which would be part of a re-roll of this latest
    series.  But see below.

  - Keep fill_mmfile allocation semantics clear.  I was trying to keep
    it simple for other fill_mmfile callers to opt-in to textconv, even
    if they chose to do it by some user-controlled mechanism instead of
    by default (e.g., diff.TextconvDiffstat or something). But maybe
    that is not of value to us. Again, that is a re-roll of this series.

  - performance considerations with driver loading. I believe this is a
    non-issue. So either you are reading the code wrong, or I am not
    understanding your concern correctly (or _I_ am reading the code
    wrong, of course).

Others?

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25  7:19                               ` Jeff King
@ 2008-10-25 18:32                                 ` Junio C Hamano
  2008-10-25 19:35                                   ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-25 18:32 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

>> I am also somewhat worried about the performance impact of running
>> get_textconv() to the same filespec many times when no textconv is
>> defined, which is the normal case we should optimize for.  It appears that
>> diff_filespec_load_driver() is optimized for a wrong case (i.e. "we
>> already know this needs a custom driver and we know which one").
>
> No, it is the same as before. We always end up with a driver at the end
> of the function, so further calls will be no-ops. So we do exactly one
> attribute lookup per filespec, caching the result.

Ah, I missed that driver named "default".  Sorry for the noise.

> I think it makes sense to figure out _what_ needs fixed first, because
> it might be somewhat minor. So far I see:
>
>   - leak from fill_mmfile; this definitely needs fixed. The quick fix is
>     minor (free if we did a textconv). A more involved fix is to pull it
>     out of fill_mmfile entirely and put the code directly into
>     builtin_diff, which would be part of a re-roll of this latest
>     series.  But see below.
>
>   - Keep fill_mmfile allocation semantics clear.  I was trying to keep
>     it simple for other fill_mmfile callers to opt-in to textconv, even
>     if they chose to do it by some user-controlled mechanism instead of
>     by default (e.g., diff.TextconvDiffstat or something). But maybe
>     that is not of value to us. Again, that is a re-roll of this series.

It would be either:

 (1) non-textconv users call fill_mmfile(&mf, ..., 0), use mf and return
     without clean-up as before, while textconv users do:

 	fill_mmfile(&mf, ..., 1);
        use mf;
        if (mf->ptr != one->data)
		clean up mf->ptr;
        return;

or

 (2) non-textconv users are unchanged from v1.6.0, while textconv users
     do:

	const char *textconv = get_textconv(...);
	fill_mmfile(&mf, ...); /* no change to fill_mmfile() */
	if (textconv)
        	munge_mmfile(&mf);
	use mf;
        if (textconv)
        	cleanup_mmfile(&mf);

The end result may not be that much different, but I find the latter
easier to follow for three reasons:

 * we expect that majority of the users of fill_mmfile() are non textconv
   users.  I'd feel safer to keep their codepath the same as v1.6.0;

 * fill_mmfile() semantics is the same as long before -- it just gives it
   a borrowed pointer;

 * the code that makes a change to mmfile that requires clean-up does so
   explicitly, and that is followed by an explicit clean-up, both
   contained in the same function;

>   - performance considerations with driver loading. I believe this is a
>     non-issue. So either you are reading the code wrong,...

Yes I was.  Thanks and sorry.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25 18:32                                 ` Junio C Hamano
@ 2008-10-25 19:35                                   ` Jeff King
  2008-10-25 23:35                                     ` Junio C Hamano
                                                       ` (2 more replies)
  0 siblings, 3 replies; 85+ messages in thread
From: Jeff King @ 2008-10-25 19:35 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sat, Oct 25, 2008 at 11:32:20AM -0700, Junio C Hamano wrote:

> The end result may not be that much different, but I find the latter
> easier to follow for three reasons:

Your reasoning here is sound. My initial thought was to make it a
one-liner change to "turn on" text conversion in other spots. But

  a) it didn't turn out that way, since you still have to keep track of
     the "did I textconv" when determining binary-ness, and whether to
     free()

  b) we don't actually want to do it anywhere else (except perhaps, as I
     mentioned, in blame, but that is a totally different interface
     anyway)

I will re-roll my latest series according to your recommendation
(unless you are set on reverting the first part; if so, please tell me
asap so I can work under that assumption).

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25 19:35                                   ` Jeff King
@ 2008-10-25 23:35                                     ` Junio C Hamano
  2008-10-25 23:48                                     ` Junio C Hamano
  2008-10-26  4:38                                     ` Jeff King
  2 siblings, 0 replies; 85+ messages in thread
From: Junio C Hamano @ 2008-10-25 23:35 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

> I will re-roll my latest series according to your recommendation
> (unless you are set on reverting the first part; if so, please tell me
> asap so I can work under that assumption).

No, reverting and reapplying is more work than incremental updates.
Please refine on top of what's already in.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25 19:35                                   ` Jeff King
  2008-10-25 23:35                                     ` Junio C Hamano
@ 2008-10-25 23:48                                     ` Junio C Hamano
  2008-10-26  4:52                                       ` Jeff King
  2008-10-26  4:38                                     ` Jeff King
  2 siblings, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-25 23:48 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Another place that would benefit from ALLOW_TEXTCONV is the function
wt_status_print_verbose() in wt-status.c, where we generate preview diff
for "git commit -v".  The output from that function is purely for human
consumption and does not have to be applicable.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25 19:35                                   ` Jeff King
  2008-10-25 23:35                                     ` Junio C Hamano
  2008-10-25 23:48                                     ` Junio C Hamano
@ 2008-10-26  4:38                                     ` Jeff King
  2008-10-26  4:41                                       ` [PATCH v3 1/8] diff: add missing static declaration Jeff King
                                                         ` (7 more replies)
  2 siblings, 8 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:38 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sat, Oct 25, 2008 at 03:35:54PM -0400, Jeff King wrote:

> I will re-roll my latest series according to your recommendation
> (unless you are set on reverting the first part; if so, please tell me
> asap so I can work under that assumption).

And here it is. 8 patches this time, and I tried to reorder with
"obviously correct" at the beginning and "I'm not so sure" at the end.

     1/8: diff: add missing static declaration
     2/8: document the diff driver textconv feature
     3/8: add userdiff textconv tests
     4/8: refactor userdiff textconv code
     5/8: userdiff: require explicitly allowing textconv
     6/8: only textconv regular files
     7/8: wt-status: load diff ui config
     8/8: enable textconv for diff in verbose status/commit

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* [PATCH v3 1/8] diff: add missing static declaration
  2008-10-26  4:38                                     ` Jeff King
@ 2008-10-26  4:41                                       ` Jeff King
  2008-10-26  4:41                                       ` [PATCH v3 2/8] document the diff driver textconv feature Jeff King
                                                         ` (6 subsequent siblings)
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

This function isn't used outside of diff.c; the 'static' was
simply overlooked in the original writing.

Signed-off-by: Jeff King <peff@peff.net>
---
Same as before.

 diff.c |    2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/diff.c b/diff.c
index e368fef..d1fd594 100644
--- a/diff.c
+++ b/diff.c
@@ -1282,7 +1282,7 @@ static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two)
 	emit_binary_diff_body(file, two, one);
 }
 
-void diff_filespec_load_driver(struct diff_filespec *one)
+static void diff_filespec_load_driver(struct diff_filespec *one)
 {
 	if (!one->driver)
 		one->driver = userdiff_find_by_path(one->path);
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 2/8] document the diff driver textconv feature
  2008-10-26  4:38                                     ` Jeff King
  2008-10-26  4:41                                       ` [PATCH v3 1/8] diff: add missing static declaration Jeff King
@ 2008-10-26  4:41                                       ` Jeff King
  2008-10-26  4:42                                       ` [PATCH v3 3/8] add userdiff textconv tests Jeff King
                                                         ` (5 subsequent siblings)
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:41 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

This patch also changes the term "custom diff driver" to
"external diff driver"; now that there are more facets of a
"custom driver" than just external diffing, it makes sense
to refer to the configuration of "diff.foo.*" as the "foo
diff driver", with "diff.foo.command" as the "external
driver for foo".

Signed-off-by: Jeff King <peff@peff.net>
---
Same as before, but re-ordered to beginning since it is
non-controversial.

 Documentation/gitattributes.txt |   66 +++++++++++++++++++++++++++++++--------
 1 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/Documentation/gitattributes.txt b/Documentation/gitattributes.txt
index 2694559..314e2d3 100644
--- a/Documentation/gitattributes.txt
+++ b/Documentation/gitattributes.txt
@@ -213,10 +213,12 @@ with `crlf`, and then `ident` and fed to `filter`.
 Generating diff text
 ~~~~~~~~~~~~~~~~~~~~
 
-The attribute `diff` affects if 'git-diff' generates textual
-patch for the path or just says `Binary files differ`.  It also
-can affect what line is shown on the hunk header `@@ -k,l +n,m @@`
-line.
+The attribute `diff` affects how 'git' generates diffs for particular
+files. It can tell git whether to generate a textual patch for the path
+or to treat the path as a binary file.  It can also affect what line is
+shown on the hunk header `@@ -k,l +n,m @@` line, tell git to use an
+external command to generate the diff, or ask git to convert binary
+files to a text format before generating the diff.
 
 Set::
 
@@ -227,7 +229,8 @@ Set::
 Unset::
 
 	A path to which the `diff` attribute is unset will
-	generate `Binary files differ`.
+	generate `Binary files differ` (or a binary patch, if
+	binary patches are enabled).
 
 Unspecified::
 
@@ -238,21 +241,21 @@ Unspecified::
 
 String::
 
-	Diff is shown using the specified custom diff driver.
-	The driver program is given its input using the same
-	calling convention as used for GIT_EXTERNAL_DIFF
-	program.  This name is also used for custom hunk header
-	selection.
+	Diff is shown using the specified diff driver.  Each driver may
+	specify one or more options, as described in the following
+	section. The options for the diff driver "foo" are defined
+	by the configuration variables in the "diff.foo" section of the
+	git config file.
 
 
-Defining a custom diff driver
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+Defining an external diff driver
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The definition of a diff driver is done in `gitconfig`, not
 `gitattributes` file, so strictly speaking this manual page is a
 wrong place to talk about it.  However...
 
-To define a custom diff driver `jcdiff`, add a section to your
+To define an external diff driver `jcdiff`, add a section to your
 `$GIT_DIR/config` file (or `$HOME/.gitconfig` file) like this:
 
 ----------------------------------------------------------------
@@ -328,6 +331,43 @@ patterns are available:
 - `tex` suitable for source code for LaTeX documents.
 
 
+Performing text diffs of binary files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Sometimes it is desirable to see the diff of a text-converted
+version of some binary files. For example, a word processor
+document can be converted to an ASCII text representation, and
+the diff of the text shown. Even though this conversion loses
+some information, the resulting diff is useful for human
+viewing (but cannot be applied directly).
+
+The `textconv` config option is used to define a program for
+performing such a conversion. The program should take a single
+argument, the name of a file to convert, and produce the
+resulting text on stdout.
+
+For example, to show the diff of the exif information of a
+file instead of the binary information (assuming you have the
+exif tool installed):
+
+------------------------
+[diff "jpg"]
+	textconv = exif
+------------------------
+
+NOTE: The text conversion is generally a one-way conversion;
+in this example, we lose the actual image contents and focus
+just on the text data. This means that diffs generated by
+textconv are _not_ suitable for applying. For this reason,
+only `git diff` and the `git log` family of commands (i.e.,
+log, whatchanged, show) will perform text conversion. `git
+format-patch` will never generate this output. If you want to
+send somebody a text-converted diff of a binary file (e.g.,
+because it quickly conveys the changes you have made), you
+should generate it separately and send it as a comment _in
+addition to_ the usual binary diff that you might send.
+
+
 Performing a three-way merge
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 3/8] add userdiff textconv tests
  2008-10-26  4:38                                     ` Jeff King
  2008-10-26  4:41                                       ` [PATCH v3 1/8] diff: add missing static declaration Jeff King
  2008-10-26  4:41                                       ` [PATCH v3 2/8] document the diff driver textconv feature Jeff King
@ 2008-10-26  4:42                                       ` Jeff King
  2008-10-26  4:44                                       ` [PATCH v3 4/8] refactor userdiff textconv code Jeff King
                                                         ` (4 subsequent siblings)
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:42 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

These tests provide a basic sanity check that textconv'd
files work. The tests try to describe how this configuration
_should_ work; thus some of the tests are marked to expect
failure.

In particular, we fail to actually textconv anything because
the 'diff.foo.binary' config option is not set, which will
be fixed in the next patch.

This also means that some "expect_failure" tests actually
seem to be fixed; in reality, this is just because textconv
is broken and its failure mode happens to make these tests
work.

Signed-off-by: Jeff King <peff@peff.net>
---
Same as before.

 t/t4030-diff-textconv.sh |  118 ++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 118 insertions(+), 0 deletions(-)
 create mode 100755 t/t4030-diff-textconv.sh

diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
new file mode 100755
index 0000000..1b09648
--- /dev/null
+++ b/t/t4030-diff-textconv.sh
@@ -0,0 +1,118 @@
+#!/bin/sh
+
+test_description='diff.*.textconv tests'
+. ./test-lib.sh
+
+find_diff() {
+	sed '1,/^index /d' | sed '/^-- $/,$d'
+}
+
+cat >expect.binary <<'EOF'
+Binary files a/file and b/file differ
+EOF
+
+cat >expect.text <<'EOF'
+--- a/file
++++ b/file
+@@ -1 +1,2 @@
+ 0
++1
+EOF
+
+cat >hexdump <<'EOF'
+#!/bin/sh
+perl -e '$/ = undef; $_ = <>; s/./ord($&)/ge; print $_' "$1"
+EOF
+chmod +x hexdump
+
+test_expect_success 'setup binary file with history' '
+	printf "\\0\\n" >file &&
+	git add file &&
+	git commit -m one &&
+	printf "\\1\\n" >>file &&
+	git add file &&
+	git commit -m two
+'
+
+test_expect_success 'file is considered binary by porcelain' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'file is considered binary by plumbing' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_success 'setup textconv filters' '
+	echo file diff=foo >.gitattributes &&
+	git config diff.foo.textconv "$PWD"/hexdump &&
+	git config diff.fail.textconv false
+'
+
+test_expect_failure 'diff produces text' '
+	git diff HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.text actual
+'
+
+test_expect_success 'diff-tree produces binary' '
+	git diff-tree -p HEAD^ HEAD >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.binary actual
+'
+
+test_expect_failure 'log produces text' '
+	git log -1 -p >log &&
+	find_diff <log >actual &&
+	test_cmp expect.text actual
+'
+
+test_expect_failure 'format-patch produces binary' '
+	git format-patch --no-binary --stdout HEAD^ >patch &&
+	find_diff <patch >actual &&
+	test_cmp expect.binary actual
+'
+
+cat >expect.stat <<'EOF'
+ file |  Bin 2 -> 4 bytes
+ 1 files changed, 0 insertions(+), 0 deletions(-)
+EOF
+test_expect_failure 'diffstat does not run textconv' '
+	echo file diff=fail >.gitattributes &&
+	git diff --stat HEAD^ HEAD >actual &&
+	test_cmp expect.stat actual
+'
+# restore working setup
+echo file diff=foo >.gitattributes
+
+cat >expect.typechange <<'EOF'
+--- a/file
++++ /dev/null
+@@ -1,2 +0,0 @@
+-0
+-1
+diff --git a/file b/file
+new file mode 120000
+index ad8b3d2..67be421
+--- /dev/null
++++ b/file
+@@ -0,0 +1 @@
++frotz
+\ No newline at end of file
+EOF
+# make a symlink the hard way that works on symlink-challenged file systems
+test_expect_failure 'textconv does not act on symlinks' '
+	echo -n frotz > file &&
+	git add file &&
+	git ls-files -s | sed -e s/100644/120000/ |
+		git update-index --index-info &&
+	git commit -m typechange &&
+	git show >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.typechange actual
+'
+
+test_done
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 4/8] refactor userdiff textconv code
  2008-10-26  4:38                                     ` Jeff King
                                                         ` (2 preceding siblings ...)
  2008-10-26  4:42                                       ` [PATCH v3 3/8] add userdiff textconv tests Jeff King
@ 2008-10-26  4:44                                       ` Jeff King
  2008-10-26  4:45                                       ` [PATCH v3 5/8] userdiff: require explicitly allowing textconv Jeff King
                                                         ` (3 subsequent siblings)
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:44 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

The original implementation of textconv put the conversion
into fill_mmfile. This was a bad idea for a number of
reasons:

 - it made the semantics of fill_mmfile unclear. In some
   cases, it was allocating data (if a text conversion
   occurred), and in some cases not (if we could use the
   data directly from the filespec). But the caller had
   no idea which had happened, and so didn't know whether
   the memory should be freed

 - similarly, the caller had no idea if a text conversion
   had occurred, and so didn't know whether the contents
   should be treated as binary or not. This meant that we
   incorrectly guessed that text-converted content was
   binary and didn't actually show it (unless the user
   overrode us with "diff.foo.binary = false", which then
   created problems in plumbing where the text conversion
   did _not_ occur)

 - not all callers of fill_mmfile want the text contents. In
   particular, we don't really want diffstat, whitespace
   checks, patch id generation, etc, to look at the
   converted contents.

This patch pulls the conversion code directly into
builtin_diff, so that we only see the conversion when
generating an actual patch. We also then know whether we are
doing a conversion, so we can check the binary-ness and free
the data from the mmfile appropriately (the previous version
leaked quite badly when text conversion was used)

Signed-off-by: Jeff King <peff@peff.net>
---
This patch is the interesting one. I think your suggestion turned out
fairly clean (and this reverts fill_mmfile back to its state before the
original textconv series).

 diff.c                   |   48 +++++++++++++++++++++++++++++++++------------
 t/t4030-diff-textconv.sh |    6 ++--
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/diff.c b/diff.c
index d1fd594..6f01595 100644
--- a/diff.c
+++ b/diff.c
@@ -294,18 +294,8 @@ static int fill_mmfile(mmfile_t *mf, struct diff_filespec *one)
 	else if (diff_populate_filespec(one, 0))
 		return -1;
 
-	diff_filespec_load_driver(one);
-	if (one->driver->textconv) {
-		size_t size;
-		mf->ptr = run_textconv(one->driver->textconv, one, &size);
-		if (!mf->ptr)
-			return -1;
-		mf->size = size;
-	}
-	else {
-		mf->ptr = one->data;
-		mf->size = one->size;
-	}
+	mf->ptr = one->data;
+	mf->size = one->size;
 	return 0;
 }
 
@@ -1323,6 +1313,14 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
 		options->b_prefix = b;
 }
 
+static const char *get_textconv(struct diff_filespec *one)
+{
+	if (!DIFF_FILE_VALID(one))
+		return NULL;
+	diff_filespec_load_driver(one);
+	return one->driver->textconv;
+}
+
 static void builtin_diff(const char *name_a,
 			 const char *name_b,
 			 struct diff_filespec *one,
@@ -1337,6 +1335,7 @@ static void builtin_diff(const char *name_a,
 	const char *set = diff_get_color_opt(o, DIFF_METAINFO);
 	const char *reset = diff_get_color_opt(o, DIFF_RESET);
 	const char *a_prefix, *b_prefix;
+	const char *textconv_one, *textconv_two;
 
 	diff_set_mnemonic_prefix(o, "a/", "b/");
 	if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
@@ -1390,8 +1389,12 @@ static void builtin_diff(const char *name_a,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
+	textconv_one = get_textconv(one);
+	textconv_two = get_textconv(two);
+
 	if (!DIFF_OPT_TST(o, TEXT) &&
-	    (diff_filespec_is_binary(one) || diff_filespec_is_binary(two))) {
+	    ( (diff_filespec_is_binary(one) && !textconv_one) ||
+	      (diff_filespec_is_binary(two) && !textconv_two) )) {
 		/* Quite common confusing case */
 		if (mf1.size == mf2.size &&
 		    !memcmp(mf1.ptr, mf2.ptr, mf1.size))
@@ -1412,6 +1415,21 @@ static void builtin_diff(const char *name_a,
 		struct emit_callback ecbdata;
 		const struct userdiff_funcname *pe;
 
+		if (textconv_one) {
+			size_t size;
+			mf1.ptr = run_textconv(textconv_one, one, &size);
+			if (!mf1.ptr)
+				die("unable to read files to diff");
+			mf1.size = size;
+		}
+		if (textconv_two) {
+			size_t size;
+			mf2.ptr = run_textconv(textconv_two, two, &size);
+			if (!mf2.ptr)
+				die("unable to read files to diff");
+			mf2.size = size;
+		}
+
 		pe = diff_funcname_pattern(one);
 		if (!pe)
 			pe = diff_funcname_pattern(two);
@@ -1443,6 +1461,10 @@ static void builtin_diff(const char *name_a,
 			      &xpp, &xecfg, &ecb);
 		if (DIFF_OPT_TST(o, COLOR_DIFF_WORDS))
 			free_diff_words_data(&ecbdata);
+		if (textconv_one)
+			free(mf1.ptr);
+		if (textconv_two)
+			free(mf2.ptr);
 	}
 
  free_ab_and_return:
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 1b09648..090a21d 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -52,7 +52,7 @@ test_expect_success 'setup textconv filters' '
 	git config diff.fail.textconv false
 '
 
-test_expect_failure 'diff produces text' '
+test_expect_success 'diff produces text' '
 	git diff HEAD^ HEAD >diff &&
 	find_diff <diff >actual &&
 	test_cmp expect.text actual
@@ -64,7 +64,7 @@ test_expect_success 'diff-tree produces binary' '
 	test_cmp expect.binary actual
 '
 
-test_expect_failure 'log produces text' '
+test_expect_success 'log produces text' '
 	git log -1 -p >log &&
 	find_diff <log >actual &&
 	test_cmp expect.text actual
@@ -80,7 +80,7 @@ cat >expect.stat <<'EOF'
  file |  Bin 2 -> 4 bytes
  1 files changed, 0 insertions(+), 0 deletions(-)
 EOF
-test_expect_failure 'diffstat does not run textconv' '
+test_expect_success 'diffstat does not run textconv' '
 	echo file diff=fail >.gitattributes &&
 	git diff --stat HEAD^ HEAD >actual &&
 	test_cmp expect.stat actual
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 5/8] userdiff: require explicitly allowing textconv
  2008-10-26  4:38                                     ` Jeff King
                                                         ` (3 preceding siblings ...)
  2008-10-26  4:44                                       ` [PATCH v3 4/8] refactor userdiff textconv code Jeff King
@ 2008-10-26  4:45                                       ` Jeff King
  2008-10-26  4:46                                       ` [PATCH v3 6/8] only textconv regular files Jeff King
                                                         ` (2 subsequent siblings)
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:45 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

Diffs that have been produced with textconv almost certainly
cannot be applied, so we want to be careful not to generate
them in things like format-patch.

This introduces a new diff options, ALLOW_TEXTCONV, which
controls this behavior. It is off by default, but is
explicitly turned on for the "log" family of commands, as
well as the "diff" porcelain (but not diff-* plumbing).

Because both text conversion and external diffing are
controlled by these diff options, we can get rid of the
"plumbing versus porcelain" distinction when reading the
config. This was an attempt to control the same thing, but
suffered from being too coarse-grained.

Signed-off-by: Jeff King <peff@peff.net>
---
The new changes in 4/8 make this even cleaner than before.

 builtin-diff.c           |    1 +
 builtin-log.c            |    1 +
 diff.c                   |   26 +++++++++++---------------
 diff.h                   |    1 +
 t/t4030-diff-textconv.sh |    2 +-
 userdiff.c               |   10 +---------
 userdiff.h               |    3 +--
 7 files changed, 17 insertions(+), 27 deletions(-)

diff --git a/builtin-diff.c b/builtin-diff.c
index 9c8c295..2de5834 100644
--- a/builtin-diff.c
+++ b/builtin-diff.c
@@ -300,6 +300,7 @@ int cmd_diff(int argc, const char **argv, const char *prefix)
 	}
 	DIFF_OPT_SET(&rev.diffopt, ALLOW_EXTERNAL);
 	DIFF_OPT_SET(&rev.diffopt, RECURSIVE);
+	DIFF_OPT_SET(&rev.diffopt, ALLOW_TEXTCONV);
 
 	/*
 	 * If the user asked for our exit code then don't start a
diff --git a/builtin-log.c b/builtin-log.c
index a0944f7..75d698f 100644
--- a/builtin-log.c
+++ b/builtin-log.c
@@ -59,6 +59,7 @@ static void cmd_log_init(int argc, const char **argv, const char *prefix,
 		} else
 			die("unrecognized argument: %s", arg);
 	}
+	DIFF_OPT_SET(&rev->diffopt, ALLOW_TEXTCONV);
 }
 
 /*
diff --git a/diff.c b/diff.c
index 6f01595..608223a 100644
--- a/diff.c
+++ b/diff.c
@@ -93,12 +93,6 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	if (!strcmp(var, "diff.external"))
 		return git_config_string(&external_diff_cmd_cfg, var, value);
 
-	switch (userdiff_config_porcelain(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_diff_basic_config(var, value, cb);
 }
 
@@ -109,6 +103,12 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	switch (userdiff_config(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
+	}
+
 	if (!prefixcmp(var, "diff.color.") || !prefixcmp(var, "color.diff.")) {
 		int slot = parse_diff_color_slot(var, 11);
 		if (!value)
@@ -123,12 +123,6 @@ int git_diff_basic_config(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
-	switch (userdiff_config_basic(var, value)) {
-		case 0: break;
-		case -1: return -1;
-		default: return 0;
-	}
-
 	return git_color_default_config(var, value, cb);
 }
 
@@ -1335,7 +1329,7 @@ static void builtin_diff(const char *name_a,
 	const char *set = diff_get_color_opt(o, DIFF_METAINFO);
 	const char *reset = diff_get_color_opt(o, DIFF_RESET);
 	const char *a_prefix, *b_prefix;
-	const char *textconv_one, *textconv_two;
+	const char *textconv_one = NULL, *textconv_two = NULL;
 
 	diff_set_mnemonic_prefix(o, "a/", "b/");
 	if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
@@ -1389,8 +1383,10 @@ static void builtin_diff(const char *name_a,
 	if (fill_mmfile(&mf1, one) < 0 || fill_mmfile(&mf2, two) < 0)
 		die("unable to read files to diff");
 
-	textconv_one = get_textconv(one);
-	textconv_two = get_textconv(two);
+	if (DIFF_OPT_TST(o, ALLOW_TEXTCONV)) {
+		textconv_one = get_textconv(one);
+		textconv_two = get_textconv(two);
+	}
 
 	if (!DIFF_OPT_TST(o, TEXT) &&
 	    ( (diff_filespec_is_binary(one) && !textconv_one) ||
diff --git a/diff.h b/diff.h
index a49d865..42582ed 100644
--- a/diff.h
+++ b/diff.h
@@ -65,6 +65,7 @@ typedef void (*diff_format_fn_t)(struct diff_queue_struct *q,
 #define DIFF_OPT_IGNORE_SUBMODULES   (1 << 18)
 #define DIFF_OPT_DIRSTAT_CUMULATIVE  (1 << 19)
 #define DIFF_OPT_DIRSTAT_BY_FILE     (1 << 20)
+#define DIFF_OPT_ALLOW_TEXTCONV      (1 << 21)
 #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)
 #define DIFF_OPT_CLR(opts, flag)    ((opts)->flags &= ~DIFF_OPT_##flag)
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 090a21d..1df48ae 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -70,7 +70,7 @@ test_expect_success 'log produces text' '
 	test_cmp expect.text actual
 '
 
-test_expect_failure 'format-patch produces binary' '
+test_expect_success 'format-patch produces binary' '
 	git format-patch --no-binary --stdout HEAD^ >patch &&
 	find_diff <patch >actual &&
 	test_cmp expect.binary actual
diff --git a/userdiff.c b/userdiff.c
index d95257a..3681062 100644
--- a/userdiff.c
+++ b/userdiff.c
@@ -120,7 +120,7 @@ static int parse_tristate(int *b, const char *k, const char *v)
 	return 1;
 }
 
-int userdiff_config_basic(const char *k, const char *v)
+int userdiff_config(const char *k, const char *v)
 {
 	struct userdiff_driver *drv;
 
@@ -130,14 +130,6 @@ int userdiff_config_basic(const char *k, const char *v)
 		return parse_funcname(&drv->funcname, k, v, REG_EXTENDED);
 	if ((drv = parse_driver(k, v, "binary")))
 		return parse_tristate(&drv->binary, k, v);
-
-	return 0;
-}
-
-int userdiff_config_porcelain(const char *k, const char *v)
-{
-	struct userdiff_driver *drv;
-
 	if ((drv = parse_driver(k, v, "command")))
 		return parse_string(&drv->external, k, v);
 	if ((drv = parse_driver(k, v, "textconv")))
diff --git a/userdiff.h b/userdiff.h
index f29c18f..ba29457 100644
--- a/userdiff.h
+++ b/userdiff.h
@@ -14,8 +14,7 @@ struct userdiff_driver {
 	const char *textconv;
 };
 
-int userdiff_config_basic(const char *k, const char *v);
-int userdiff_config_porcelain(const char *k, const char *v);
+int userdiff_config(const char *k, const char *v);
 struct userdiff_driver *userdiff_find_by_name(const char *name);
 struct userdiff_driver *userdiff_find_by_path(const char *path);
 
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 6/8] only textconv regular files
  2008-10-26  4:38                                     ` Jeff King
                                                         ` (4 preceding siblings ...)
  2008-10-26  4:45                                       ` [PATCH v3 5/8] userdiff: require explicitly allowing textconv Jeff King
@ 2008-10-26  4:46                                       ` Jeff King
  2008-10-26  4:49                                       ` [PATCH v3 7/8] wt-status: load diff ui config Jeff King
  2008-10-26  4:50                                       ` [PATCH v3 8/8] enable textconv for diff in verbose status/commit Jeff King
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:46 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

We treat symlinks as text containing the results of the
symlink, so it doesn't make much sense to text-convert them.

Similarly gitlink components just end up as the text
"Subproject commit $sha1", which we should leave intact.

Note that a typechange may be broken into two parts: the
removal of the old part and the addition of the new. In that
case, we _do_ show the textconv for any part which is the
addition or removal of a file we would ordinarily textconv,
since it is purely acting on the file contents.

Signed-off-by: Jeff King <peff@peff.net>
---
Same as before.

 diff.c                   |    2 ++
 t/t4030-diff-textconv.sh |    2 +-
 2 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/diff.c b/diff.c
index 608223a..23d454e 100644
--- a/diff.c
+++ b/diff.c
@@ -1311,6 +1311,8 @@ static const char *get_textconv(struct diff_filespec *one)
 {
 	if (!DIFF_FILE_VALID(one))
 		return NULL;
+	if (!S_ISREG(one->mode))
+		return NULL;
 	diff_filespec_load_driver(one);
 	return one->driver->textconv;
 }
diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 1df48ae..3945731 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -104,7 +104,7 @@ index ad8b3d2..67be421
 \ No newline at end of file
 EOF
 # make a symlink the hard way that works on symlink-challenged file systems
-test_expect_failure 'textconv does not act on symlinks' '
+test_expect_success 'textconv does not act on symlinks' '
 	echo -n frotz > file &&
 	git add file &&
 	git ls-files -s | sed -e s/100644/120000/ |
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 7/8] wt-status: load diff ui config
  2008-10-26  4:38                                     ` Jeff King
                                                         ` (5 preceding siblings ...)
  2008-10-26  4:46                                       ` [PATCH v3 6/8] only textconv regular files Jeff King
@ 2008-10-26  4:49                                       ` Jeff King
  2008-10-27  5:30                                         ` Junio C Hamano
  2008-10-26  4:50                                       ` [PATCH v3 8/8] enable textconv for diff in verbose status/commit Jeff King
  7 siblings, 1 reply; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:49 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

When "git status -v" shows a diff, we did not respect the
user's usual diff preferences at all. Loading just
git_diff_basic_config would give us things like rename
limits and diff drivers. But it makes even more sense to
load git_diff_ui_config, which gives us colorization if the
user has requested it.

Note that we need to take special care to cancel
colorization when writing to the commit template file, as
described in the code comments.

Signed-off-by: Jeff King <peff@peff.net>
---
This is necessary to do textconvs in the "status -v" diff (which will
come in the next patch).

But it makes me a little nervous. On one hand, I think it is definitely
the right thing for "status -v" to respect user options. But we do
several _other_ diffs in addition, and those are more "plumbing" diffs.
I think they should probably at least have diff_basic_config (e.g., for
rename limits). But we are applying the diff_ui_config options to all
diffs. Looking over the available options, I _think_ there are no nasty
surprises. But you never know.

 t/t7502-commit.sh |    8 ++++++++
 wt-status.c       |   10 +++++++++-
 2 files changed, 17 insertions(+), 1 deletions(-)

diff --git a/t/t7502-commit.sh b/t/t7502-commit.sh
index 3eb9fae..ad42c78 100755
--- a/t/t7502-commit.sh
+++ b/t/t7502-commit.sh
@@ -89,6 +89,14 @@ test_expect_success 'verbose' '
 
 '
 
+test_expect_success 'verbose respects diff config' '
+
+	git config color.diff always &&
+	git status -v >actual &&
+	grep "\[1mdiff --git" actual &&
+	git config --unset color.diff
+'
+
 test_expect_success 'cleanup commit messages (verbatim,-t)' '
 
 	echo >>negative &&
diff --git a/wt-status.c b/wt-status.c
index c3a9cab..54d2f58 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -303,6 +303,14 @@ static void wt_status_print_verbose(struct wt_status *s)
 	rev.diffopt.detect_rename = 1;
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
+	/*
+	 * If we're not going to stdout, then we definitely don't
+	 * want color, since we are going to the commit message
+	 * file (and even the "auto" setting won't work, since it
+	 * will have checked isatty on stdout).
+	 */
+	if (s->fp != stdout)
+		DIFF_OPT_CLR(&rev.diffopt, COLOR_DIFF);
 	run_diff_index(&rev, 1);
 }
 
@@ -422,5 +430,5 @@ int git_status_config(const char *k, const char *v, void *cb)
 			return error("Invalid untracked files mode '%s'", v);
 		return 0;
 	}
-	return git_color_default_config(k, v, cb);
+	return git_diff_ui_config(k, v, cb);
 }
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* [PATCH v3 8/8] enable textconv for diff in verbose status/commit
  2008-10-26  4:38                                     ` Jeff King
                                                         ` (6 preceding siblings ...)
  2008-10-26  4:49                                       ` [PATCH v3 7/8] wt-status: load diff ui config Jeff King
@ 2008-10-26  4:50                                       ` Jeff King
  7 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:50 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

This diff is meant for human consumption, so it makes sense
to apply text conversion here, as we would for the regular
diff porcelain.

Signed-off-by: Jeff King <peff@peff.net>
---
Pretty straightforward, but depends on whether we find 7/8 acceptable.

 t/t4030-diff-textconv.sh |    8 ++++++++
 wt-status.c              |    1 +
 2 files changed, 9 insertions(+), 0 deletions(-)

diff --git a/t/t4030-diff-textconv.sh b/t/t4030-diff-textconv.sh
index 3945731..09967f5 100755
--- a/t/t4030-diff-textconv.sh
+++ b/t/t4030-diff-textconv.sh
@@ -76,6 +76,14 @@ test_expect_success 'format-patch produces binary' '
 	test_cmp expect.binary actual
 '
 
+test_expect_success 'status -v produces text' '
+	git reset --soft HEAD^ &&
+	git status -v >diff &&
+	find_diff <diff >actual &&
+	test_cmp expect.text actual &&
+	git reset --soft HEAD@{1}
+'
+
 cat >expect.stat <<'EOF'
  file |  Bin 2 -> 4 bytes
  1 files changed, 0 insertions(+), 0 deletions(-)
diff --git a/wt-status.c b/wt-status.c
index 54d2f58..6a7645e 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -301,6 +301,7 @@ static void wt_status_print_verbose(struct wt_status *s)
 	setup_revisions(0, NULL, &rev, s->reference);
 	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
 	rev.diffopt.detect_rename = 1;
+	DIFF_OPT_SET(&rev.diffopt, ALLOW_TEXTCONV);
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
 	/*
-- 
1.6.0.3.524.ge8b2e.dirty

^ permalink raw reply related	[flat|nested] 85+ messages in thread

* Re: [PATCH 4/7] textconv: don't convert for every operation
  2008-10-25 23:48                                     ` Junio C Hamano
@ 2008-10-26  4:52                                       ` Jeff King
  0 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-26  4:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sat, Oct 25, 2008 at 04:48:19PM -0700, Junio C Hamano wrote:

> Another place that would benefit from ALLOW_TEXTCONV is the function
> wt_status_print_verbose() in wt-status.c, where we generate preview diff
> for "git commit -v".  The output from that function is purely for human
> consumption and does not have to be applicable.

I agree, but it turned out not to be as straightforward as I had hoped,
since status doesn't even parse diff.* config.  See patches 7 and 8 in
the series I just posted.

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH v3 7/8] wt-status: load diff ui config
  2008-10-26  4:49                                       ` [PATCH v3 7/8] wt-status: load diff ui config Jeff King
@ 2008-10-27  5:30                                         ` Junio C Hamano
  2008-10-27  8:23                                           ` Jeff King
  0 siblings, 1 reply; 85+ messages in thread
From: Junio C Hamano @ 2008-10-27  5:30 UTC (permalink / raw)
  To: Jeff King; +Cc: Johannes Sixt, Matthieu Moy, git

Jeff King <peff@peff.net> writes:

> But it makes me a little nervous. On one hand, I think it is definitely
> the right thing for "status -v" to respect user options. But we do
> several _other_ diffs in addition, and those are more "plumbing" diffs.
> I think they should probably at least have diff_basic_config (e.g., for
> rename limits). But we are applying the diff_ui_config options to all
> diffs. Looking over the available options, I _think_ there are no nasty
> surprises. But you never know.

Up to 6/8 are indisputably good changes.  The next one means well, and
this one is a requisite step for it, but I agree that this feels somewhat
risky.

^ permalink raw reply	[flat|nested] 85+ messages in thread

* Re: [PATCH v3 7/8] wt-status: load diff ui config
  2008-10-27  5:30                                         ` Junio C Hamano
@ 2008-10-27  8:23                                           ` Jeff King
  0 siblings, 0 replies; 85+ messages in thread
From: Jeff King @ 2008-10-27  8:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Johannes Sixt, Matthieu Moy, git

On Sun, Oct 26, 2008 at 10:30:48PM -0700, Junio C Hamano wrote:

> > But it makes me a little nervous. On one hand, I think it is definitely
> > the right thing for "status -v" to respect user options. But we do
> > several _other_ diffs in addition, and those are more "plumbing" diffs.
> > I think they should probably at least have diff_basic_config (e.g., for
> > rename limits). But we are applying the diff_ui_config options to all
> > diffs. Looking over the available options, I _think_ there are no nasty
> > surprises. But you never know.
> 
> Up to 6/8 are indisputably good changes.  The next one means well, and
> this one is a requisite step for it, but I agree that this feels somewhat
> risky.

I have to wonder that nobody ever complained about the lack of diff
config here before. If I ever used "git status -v", I would certainly
have been turned off by the lack of color. But maybe everybody is using
"git commit -v" and their editor is colorizing it. Or maybe they just
aren't using color (the only other noticeable thing would be rename
options).

-Peff

^ permalink raw reply	[flat|nested] 85+ messages in thread

end of thread, other threads:[~2008-10-27  8:25 UTC | newest]

Thread overview: 85+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-09-28  2:06 Implementation of a "textconv" filter for easy custom diff Matthieu Moy
2008-09-28  2:06 ` [PATCH] Facility to have multiple kinds of drivers for diff Matthieu Moy
2008-09-28  2:06   ` [PATCH] Implement run_command_to_buf (spawn a process and reads its stdout) Matthieu Moy
2008-09-28  2:06     ` [PATCH] Implement a textconv filter for "git diff" Matthieu Moy
2008-09-28  2:06       ` [PATCH] Document the textconv filter Matthieu Moy
2008-09-28  2:06         ` [PATCH] Add a basic test for " Matthieu Moy
2008-09-28 11:07         ` [PATCH] Document " Johannes Sixt
2008-09-28 12:29           ` Matthieu Moy
2008-09-28  4:15       ` [PATCH] Implement a textconv filter for "git diff" Jeff King
2008-09-28 10:00         ` Matthieu Moy
2008-09-28 16:12           ` Jeff King
2008-09-28  4:10 ` Implementation of a "textconv" filter for easy custom diff Jeff King
2008-09-28  9:57   ` Matthieu Moy
2008-09-28 16:11     ` Jeff King
2008-09-30 15:19       ` Matthieu Moy
2008-09-30 16:45         ` Jeff King
2008-10-05 21:41           ` [PATCH 0/4] diff text conversion filter Jeff King
2008-10-05 21:42             ` [PATCH 1/4] t4012: use test_cmp instead of cmp Jeff King
2008-10-05 21:43             ` [PATCH 2/4] diff: unify external diff and funcname parsing code Jeff King
2008-10-05 21:43             ` [PATCH 3/4] diff: introduce diff.<driver>.binary Jeff King
2008-10-07 15:17               ` Johannes Sixt
2008-10-07 15:35                 ` Jeff King
2008-10-07 15:54                   ` Johannes Sixt
2008-10-12  5:24                   ` Junio C Hamano
2008-10-13  1:23                     ` Jeff King
2008-10-13  4:00                       ` Junio C Hamano
2008-10-13  4:15                         ` Jeff King
2008-10-13  6:10                           ` Johannes Sixt
2008-10-13 13:54                           ` Junio C Hamano
2008-10-13  8:12                         ` Matthieu Moy
2008-10-24  2:46               ` Jeff King
2008-10-24  2:48                 ` [PATCH 1/5] diff: add missing static declaration Jeff King
2008-10-24  2:50                 ` [PATCH 2/5] add userdiff textconv tests Jeff King
2008-10-24  2:53                 ` [PATCH 3/5] refactor userdiff textconv code Jeff King
2008-10-24  7:15                   ` Johannes Sixt
2008-10-24 12:40                     ` Jeff King
2008-10-24 13:51                     ` Jeff King
2008-10-24 14:01                       ` Johannes Sixt
2008-10-24 14:08                         ` Jeff King
2008-10-24 21:12                   ` Junio C Hamano
2008-10-24 22:50                     ` Jeff King
2008-10-24 22:56                       ` Jeff King
2008-10-25  0:48                         ` Jeff King
2008-10-25  0:50                           ` [PATCH 1/7] diff: add missing static declaration Jeff King
2008-10-25  0:51                           ` [PATCH 2/7] add userdiff textconv tests Jeff King
2008-10-25  0:52                           ` [PATCH 3/7] textconv: assume text-converted contents are not binary Jeff King
2008-10-25  0:52                           ` [PATCH 4/7] textconv: don't convert for every operation Jeff King
2008-10-25  5:41                             ` Junio C Hamano
2008-10-25  7:19                               ` Jeff King
2008-10-25 18:32                                 ` Junio C Hamano
2008-10-25 19:35                                   ` Jeff King
2008-10-25 23:35                                     ` Junio C Hamano
2008-10-25 23:48                                     ` Junio C Hamano
2008-10-26  4:52                                       ` Jeff King
2008-10-26  4:38                                     ` Jeff King
2008-10-26  4:41                                       ` [PATCH v3 1/8] diff: add missing static declaration Jeff King
2008-10-26  4:41                                       ` [PATCH v3 2/8] document the diff driver textconv feature Jeff King
2008-10-26  4:42                                       ` [PATCH v3 3/8] add userdiff textconv tests Jeff King
2008-10-26  4:44                                       ` [PATCH v3 4/8] refactor userdiff textconv code Jeff King
2008-10-26  4:45                                       ` [PATCH v3 5/8] userdiff: require explicitly allowing textconv Jeff King
2008-10-26  4:46                                       ` [PATCH v3 6/8] only textconv regular files Jeff King
2008-10-26  4:49                                       ` [PATCH v3 7/8] wt-status: load diff ui config Jeff King
2008-10-27  5:30                                         ` Junio C Hamano
2008-10-27  8:23                                           ` Jeff King
2008-10-26  4:50                                       ` [PATCH v3 8/8] enable textconv for diff in verbose status/commit Jeff King
2008-10-25  0:54                           ` [PATCH 5/7] userdiff: require explicitly allowing textconv Jeff King
2008-10-25  0:54                           ` [PATCH 6/7] document the diff driver textconv feature Jeff King
2008-10-25  0:55                           ` [PATCH 7/7] only textconv regular files Jeff King
2008-10-24  2:55                 ` [PATCH 4/5] userdiff: require explicitly allowing textconv Jeff King
2008-10-24  7:04                   ` Johannes Sixt
2008-10-24  2:56                 ` [PATCH 5/5] document the diff driver textconv feature Jeff King
2008-10-24  7:02                 ` [PATCH 3/4] diff: introduce diff.<driver>.binary Johannes Sixt
2008-10-05 21:43             ` [PATCH 4/4] diff: add filter for converting binary to text Jeff King
2008-10-05 22:03             ` [PATCH 0/4] diff text conversion filter Jakub Narebski
2008-10-06  6:29             ` Johannes Sixt
2008-10-06  6:52               ` Jeff King
2008-10-06  8:55                 ` Johannes Sixt
2008-10-06 15:15               ` Matthieu Moy
2008-10-07  1:20                 ` Jeff King
2008-10-07  5:52                   ` Johannes Sixt
2008-10-07  6:00                     ` Jeff King
2008-10-07  6:15                       ` Matthieu Moy
2008-10-07 15:46                         ` Jeff King
2008-10-07 16:15                           ` Johannes Sixt
2008-10-13  1:29                             ` Jeff King

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).