All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v2 0/3] textconv support for blame
@ 2010-06-07 14:41 Axel Bonnet
  2010-06-07 15:23 ` [PATCH v2 1/3] textconv: make the API public Axel Bonnet
  0 siblings, 1 reply; 17+ messages in thread
From: Axel Bonnet @ 2010-06-07 14:41 UTC (permalink / raw)
  To: git; +Cc: Axel Bonnet

This is a patch series to implement textconv support for git blame.
As textconv support has already been added to git diff, so we use textconv methods of diff.
Here are the different changes:
- make the diff textconv API public
- add diff_options to blame (--textconv and --no-textconv)
- perform textconv when we meet an object with textconv driver
- t8006-blame-textconv.sh tests conversion works

Axel Bonnet (3):
  textconv: make the API public
  textconv: support for blame
  t/t8006: test textconv support for blame

 builtin/blame.c           |   82 +++++++++++++++++++++++++++++++++++++--------
 diff.c                    |   12 ++----
 diff.h                    |    8 ++++
 t/t8006-blame-textconv.sh |   80 +++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+), 22 deletions(-)
 create mode 100755 t/t8006-blame-textconv.sh

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 1/3] textconv: make the API public
  2010-06-07 14:41 [PATCH v2 0/3] textconv support for blame Axel Bonnet
@ 2010-06-07 15:23 ` Axel Bonnet
  2010-06-07 15:23   ` [PATCH v2 2/3] textconv: support for blame Axel Bonnet
  0 siblings, 1 reply; 17+ messages in thread
From: Axel Bonnet @ 2010-06-07 15:23 UTC (permalink / raw)
  To: git; +Cc: Axel Bonnet, Clément Poulain, Diane Gasselin

The textconv functionality allows one to convert a file into text before
running diff. But this functionality can be useful to other features
such as blame.

Signed-off-by: Axel Bonnet <axel.bonnet@ensimag.imag.fr>
Signed-off-by: Clément Poulain <clement.poulain@ensimag.imag.fr>
Signed-off-by: Diane Gasselin <diane.gasselin@ensimag.imag.fr>
---
 diff.c |   12 ++++--------
 diff.h |    8 ++++++++
 2 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 494f560..b4a830f 100644
--- a/diff.c
+++ b/diff.c
@@ -43,10 +43,6 @@ static char diff_colors[][COLOR_MAXLEN] = {
 	GIT_COLOR_NORMAL,	/* FUNCINFO */
 };
 
-static void diff_filespec_load_driver(struct diff_filespec *one);
-static size_t fill_textconv(struct userdiff_driver *driver,
-			    struct diff_filespec *df, char **outbuf);
-
 static int parse_diff_color_slot(const char *var, int ofs)
 {
 	if (!strcasecmp(var+ofs, "plain"))
@@ -1629,7 +1625,7 @@ void diff_set_mnemonic_prefix(struct diff_options *options, const char *a, const
 		options->b_prefix = b;
 }
 
-static struct userdiff_driver *get_textconv(struct diff_filespec *one)
+struct userdiff_driver *get_textconv(struct diff_filespec *one)
 {
 	if (!DIFF_FILE_VALID(one))
 		return NULL;
@@ -4002,9 +3998,9 @@ static char *run_textconv(const char *pgm, struct diff_filespec *spec,
 	return strbuf_detach(&buf, outsize);
 }
 
-static size_t fill_textconv(struct userdiff_driver *driver,
-			    struct diff_filespec *df,
-			    char **outbuf)
+size_t fill_textconv(struct userdiff_driver *driver,
+		     struct diff_filespec *df,
+		     char **outbuf)
 {
 	size_t size;
 
diff --git a/diff.h b/diff.h
index 9ace08c..2a0e36d 100644
--- a/diff.h
+++ b/diff.h
@@ -9,6 +9,8 @@
 struct rev_info;
 struct diff_options;
 struct diff_queue_struct;
+struct diff_filespec;
+struct userdiff_driver;
 
 typedef void (*change_fn_t)(struct diff_options *options,
 		 unsigned old_mode, unsigned new_mode,
@@ -287,4 +289,10 @@ extern void diff_no_index(struct rev_info *, int, const char **, int, const char
 
 extern int index_differs_from(const char *def, int diff_flags);
 
+extern size_t fill_textconv(struct userdiff_driver *driver,
+			    struct diff_filespec *df,
+			    char **outbuf);
+
+extern struct userdiff_driver *get_textconv(struct diff_filespec *one);
+
 #endif /* DIFF_H */
-- 
1.6.6.7.ga5fe3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 2/3] textconv: support for blame
  2010-06-07 15:23 ` [PATCH v2 1/3] textconv: make the API public Axel Bonnet
@ 2010-06-07 15:23   ` Axel Bonnet
  2010-06-07 15:23     ` [PATCH v2 3/3] t/t8006: test textconv " Axel Bonnet
                       ` (2 more replies)
  0 siblings, 3 replies; 17+ messages in thread
From: Axel Bonnet @ 2010-06-07 15:23 UTC (permalink / raw)
  To: git; +Cc: Axel Bonnet, Clément Poulain, Diane Gasselin

This patches enables to perform textconv with blame if a textconv driver is
available for the file.

The main task is performed by the textconv_object function which prepares
diff_filespec and if possible converts the file using diff textconv API.
Only regular files are converted, so the mode of diff_filespec is faked.

Textconv conversion is enabled by default (equivalent to the option
--textconv), since blaming binary files is useless in most cases.
The option --no-textconv is used to disable textconv conversion.

The declarations of several functions are modified to give access to a
diff_options, in order to know whether the textconv option is activated or not.

Signed-off-by: Axel Bonnet <axel.bonnet@ensimag.imag.fr>
Signed-off-by: Clément Poulain <clement.poulain@ensimag.imag.fr>
Signed-off-by: Diane Gasselin <diane.gasselin@ensimag.imag.fr>
---
 builtin/blame.c |   82 +++++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 68 insertions(+), 14 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index fc15863..f831e3a 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -20,6 +20,7 @@
 #include "mailmap.h"
 #include "parse-options.h"
 #include "utf8.h"
+#include "userdiff.h"
 
 static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file";
 
@@ -86,16 +87,49 @@ struct origin {
 };
 
 /*
+ * Prepare diff_filespec and convert it using diff textconv API
+ * if the textconv driver exists.
+ * Return 1 if the conversion succeeds, 0 otherwise.
+ */
+static int textconv_object(const char *path,
+			   const unsigned char *sha1,
+			   char **buf,
+			   size_t *buf_size)
+{
+	struct diff_filespec *df;
+	struct userdiff_driver *textconv;
+
+	df = alloc_filespec(path);
+	fill_filespec(df, sha1, S_IFREG | 0664);
+	textconv = get_textconv(df);
+	if (!textconv) {
+		free_filespec(df);
+		return 0;
+	}
+
+	*buf_size = fill_textconv(textconv, df, buf);
+	free_filespec(df);
+	return 1;
+}
+
+/*
  * Given an origin, prepare mmfile_t structure to be used by the
  * diff machinery
  */
-static void fill_origin_blob(struct origin *o, mmfile_t *file)
+static void fill_origin_blob(struct diff_options *opt,
+			     struct origin *o, mmfile_t *file)
 {
 	if (!o->file.ptr) {
 		enum object_type type;
 		num_read_blob++;
-		file->ptr = read_sha1_file(o->blob_sha1, &type,
-					   (unsigned long *)(&(file->size)));
+
+		if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
+		    textconv_object(o->path, o->blob_sha1, &file->ptr,
+				    (size_t *) &file->size))
+			;
+		else
+			file->ptr = read_sha1_file(o->blob_sha1, &type,
+						   (unsigned long *)(&(file->size)));
 		if (!file->ptr)
 			die("Cannot read blob %s for path %s",
 			    sha1_to_hex(o->blob_sha1),
@@ -282,7 +316,6 @@ static struct origin *get_origin(struct scoreboard *sb,
 static int fill_blob_sha1(struct origin *origin)
 {
 	unsigned mode;
-
 	if (!is_null_sha1(origin->blob_sha1))
 		return 0;
 	if (get_tree_entry(origin->commit->object.sha1,
@@ -741,8 +774,8 @@ static int pass_blame_to_parent(struct scoreboard *sb,
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	fill_origin_blob(parent, &file_p);
-	fill_origin_blob(target, &file_o);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
+	fill_origin_blob(&sb->revs->diffopt, target, &file_o);
 	num_get_patch++;
 
 	memset(&xpp, 0, sizeof(xpp));
@@ -922,7 +955,7 @@ static int find_move_in_parent(struct scoreboard *sb,
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	fill_origin_blob(parent, &file_p);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
 	if (!file_p.ptr)
 		return 0;
 
@@ -1063,7 +1096,7 @@ static int find_copy_in_parent(struct scoreboard *sb,
 
 			norigin = get_origin(sb, parent, p->one->path);
 			hashcpy(norigin->blob_sha1, p->one->sha1);
-			fill_origin_blob(norigin, &file_p);
+			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p);
 			if (!file_p.ptr)
 				continue;
 
@@ -1983,6 +2016,13 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 		blame_date_mode = parse_date_format(value);
 		return 0;
 	}
+
+	switch (userdiff_config(var, value)) {
+		case 0: break;
+		case -1: return -1;
+		default: return 0;
+	}
+
 	return git_default_config(var, value, cb);
 }
 
@@ -1990,7 +2030,9 @@ static int git_blame_config(const char *var, const char *value, void *cb)
  * Prepare a dummy commit that represents the work tree (or staged) item.
  * Note that annotating work tree item never works in the reverse.
  */
-static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
+static struct commit *fake_working_tree_commit(struct diff_options *opt,
+					       const char *path,
+					       const char *contents_from)
 {
 	struct commit *commit;
 	struct origin *origin;
@@ -2030,10 +2072,14 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 			read_from = path;
 		}
 		mode = canon_mode(st.st_mode);
+
 		switch (st.st_mode & S_IFMT) {
 		case S_IFREG:
-			if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
-				die_errno("cannot open or read '%s'", read_from);
+			if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
+			    textconv_object(read_from, null_sha1, &buf.buf, &buf.len))
+				;
+			else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
+				 die_errno("cannot open or read '%s'", read_from);
 			break;
 		case S_IFLNK:
 			if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
@@ -2248,6 +2294,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	git_config(git_blame_config, NULL);
 	init_revisions(&revs, NULL);
 	revs.date_mode = blame_date_mode;
+	DIFF_OPT_SET(&revs.diffopt, ALLOW_TEXTCONV);
 
 	save_commit_buffer = 0;
 	dashdash_pos = 0;
@@ -2384,7 +2431,8 @@ parse_done:
 		 * or "--contents".
 		 */
 		setup_work_tree();
-		sb.final = fake_working_tree_commit(path, contents_from);
+		sb.final = fake_working_tree_commit(&sb.revs->diffopt,
+						    path, contents_from);
 		add_pending_object(&revs, &(sb.final->object), ":");
 	}
 	else if (contents_from)
@@ -2411,8 +2459,14 @@ parse_done:
 		if (fill_blob_sha1(o))
 			die("no such path %s in %s", path, final_commit_name);
 
-		sb.final_buf = read_sha1_file(o->blob_sha1, &type,
-					      &sb.final_buf_size);
+		if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) &&
+		    textconv_object(path, o->blob_sha1, (char **) &sb.final_buf,
+				    (size_t *) &sb.final_buf_size))
+			;
+		else
+			sb.final_buf = read_sha1_file(o->blob_sha1, &type,
+						      &sb.final_buf_size);
+
 		if (!sb.final_buf)
 			die("Cannot read blob %s for path %s",
 			    sha1_to_hex(o->blob_sha1),
-- 
1.6.6.7.ga5fe3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* [PATCH v2 3/3] t/t8006: test textconv support for blame
  2010-06-07 15:23   ` [PATCH v2 2/3] textconv: support for blame Axel Bonnet
@ 2010-06-07 15:23     ` Axel Bonnet
  2010-06-11 23:52       ` Junio C Hamano
  2010-06-11 23:52     ` [PATCH v2 2/3] textconv: " Junio C Hamano
  2010-06-14 20:40     ` Junio C Hamano
  2 siblings, 1 reply; 17+ messages in thread
From: Axel Bonnet @ 2010-06-07 15:23 UTC (permalink / raw)
  To: git; +Cc: Axel Bonnet, Clément Poulain, Diane Gasselin

Test the correct functionning of textconv with blame <file> and blame HEAD^ <file>.
Test the case when no driver is specified.

Signed-off-by: Axel Bonnet <axel.bonnet@ensimag.imag.fr>
Signed-off-by: Clément Poulain <clement.poulain@ensimag.imag.fr>
Signed-off-by: Diane Gasselin <diane.gasselin@ensimag.imag.fr>
---
 t/t8006-blame-textconv.sh |   80 +++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 80 insertions(+), 0 deletions(-)
 create mode 100755 t/t8006-blame-textconv.sh

diff --git a/t/t8006-blame-textconv.sh b/t/t8006-blame-textconv.sh
new file mode 100755
index 0000000..db51d4c
--- /dev/null
+++ b/t/t8006-blame-textconv.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+
+test_description='git blame textconv support'
+. ./test-lib.sh
+
+find_blame() {
+	sed -e 's/^.*(/(/g'
+}
+
+cat >helper <<'EOF'
+#!/bin/sh
+sed 's/^/converted: /' "$@"
+EOF
+chmod +x helper
+
+test_expect_success 'setup ' '
+	echo test 1 >one.bin &&
+	echo test number 2 >two.bin &&
+	git add . &&
+	GIT_AUTHOR_NAME=Number1 git commit -a -m First --date="2010-01-01 18:00:00" &&
+	echo test 1 version 2 >one.bin &&
+	echo test number 2 version 2 >>two.bin &&
+	GIT_AUTHOR_NAME=Number2 git commit -a -m Second --date="2010-01-01 20:00:00"
+'
+
+cat >expected <<EOF
+(Number2 2010-01-01 20:00:00 +0000 1) test 1 version 2
+EOF
+
+test_expect_success 'no filter specified' '
+	git blame one.bin >blame &&
+	find_blame Number2 <blame >result &&
+	test_cmp expected result
+'
+
+test_expect_success 'setup textconv filters' '
+	echo "*.bin diff=test" >.gitattributes &&
+	git config diff.test.textconv ./helper &&
+	git config diff.test.cachetextconv false
+'
+
+test_expect_success 'blame with --no-textconv' '
+	git blame --no-textconv one.bin >blame &&
+	find_blame <blame> result &&
+	test_cmp expected result
+'
+
+cat >expected <<EOF
+(Number2 2010-01-01 20:00:00 +0000 1) converted: test 1 version 2
+EOF
+
+test_expect_success 'basic blame on last commit' '
+	git blame one.bin >blame &&
+	find_blame  <blame >result &&
+	test_cmp expected result
+'
+
+cat >expected <<EOF
+(Number1 2010-01-01 18:00:00 +0000 1) converted: test number 2
+(Number2 2010-01-01 20:00:00 +0000 2) converted: test number 2 version 2
+EOF
+
+test_expect_success 'blame --textconv going through revisions' '
+	git blame --textconv two.bin >blame &&
+	find_blame <blame >result &&
+	test_cmp expected result
+'
+
+test_expect_success 'make a new commit' '
+	echo "test number 2 version 3" >>two.bin &&
+	GIT_AUTHOR_NAME=Number3 git commit -a -m Third --date="2010-01-01 22:00:00"
+'
+
+test_expect_success 'blame from previous revision' '
+	git blame HEAD^ two.bin >blame &&
+	find_blame <blame >result &&
+	test_cmp expected result
+'
+
+test_done
-- 
1.6.6.7.ga5fe3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-07 15:23   ` [PATCH v2 2/3] textconv: support for blame Axel Bonnet
  2010-06-07 15:23     ` [PATCH v2 3/3] t/t8006: test textconv " Axel Bonnet
@ 2010-06-11 23:52     ` Junio C Hamano
  2010-06-12  4:11       ` Jeff King
  2010-06-14 20:40     ` Junio C Hamano
  2 siblings, 1 reply; 17+ messages in thread
From: Junio C Hamano @ 2010-06-11 23:52 UTC (permalink / raw)
  To: Axel Bonnet; +Cc: git, Clément Poulain, Diane Gasselin

Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:

> +	switch (userdiff_config(var, value)) {
> +		case 0: break;
> +		case -1: return -1;
> +		default: return 0;
> +	}

Style:

	switch (userdiff_config(var, value)) {
	case 0:
		break;
	case -1:
        	return -1;
	default:
        	return 0;
	}

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] t/t8006: test textconv support for blame
  2010-06-07 15:23     ` [PATCH v2 3/3] t/t8006: test textconv " Axel Bonnet
@ 2010-06-11 23:52       ` Junio C Hamano
  2010-06-14  7:52         ` Diane Gasselin
  0 siblings, 1 reply; 17+ messages in thread
From: Junio C Hamano @ 2010-06-11 23:52 UTC (permalink / raw)
  To: Axel Bonnet; +Cc: git, Clément Poulain, Diane Gasselin

Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:

> Test the correct functionning of textconv with blame <file> and blame HEAD^ <file>.
> Test the case when no driver is specified.

Good to see tests for both positive and negative cases.  Too many people
forget the latter.

> +find_blame() {
> +	sed -e 's/^.*(/(/g'
> +}

Two issues:

 - No need for "g" as your pattern is anchored at the left;

 - As ".*" is greedy, you will eat a lot more than what you expect when
   the line in the blamed contents happen to have '(' on it.

I'd rewrite it as:

    sed -e 's/^[^(]*//'

Will queue all three patches, with this fix and a style fix for 2/3; no
need to resend.

Thanks.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-11 23:52     ` [PATCH v2 2/3] textconv: " Junio C Hamano
@ 2010-06-12  4:11       ` Jeff King
  0 siblings, 0 replies; 17+ messages in thread
From: Jeff King @ 2010-06-12  4:11 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Axel Bonnet, git, Clément Poulain, Diane Gasselin

On Fri, Jun 11, 2010 at 04:52:19PM -0700, Junio C Hamano wrote:

> Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:
> 
> > +	switch (userdiff_config(var, value)) {
> > +		case 0: break;
> > +		case -1: return -1;
> > +		default: return 0;
> > +	}
> 
> Style:
> 
> 	switch (userdiff_config(var, value)) {
> 	case 0:
> 		break;
> 	case -1:
>         	return -1;
> 	default:
>         	return 0;
> 	}

This is cut-and-paste from some of my code in git_diff_basic_config. I
dunno if it is worth style-fixing that one, too.

-Peff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 3/3] t/t8006: test textconv support for blame
  2010-06-11 23:52       ` Junio C Hamano
@ 2010-06-14  7:52         ` Diane Gasselin
  0 siblings, 0 replies; 17+ messages in thread
From: Diane Gasselin @ 2010-06-14  7:52 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Axel Bonnet, git, Clément Poulain

Le 12 juin 2010 01:52, Junio C Hamano <gitster@pobox.com> a écrit :
> Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:
>
>> Test the correct functionning of textconv with blame <file> and blame HEAD^ <file>.
>> Test the case when no driver is specified.
>
> Good to see tests for both positive and negative cases.  Too many people
> forget the latter.
>
>> +find_blame() {
>> +     sed -e 's/^.*(/(/g'
>> +}
>
> Two issues:
>
>  - No need for "g" as your pattern is anchored at the left;
>
>  - As ".*" is greedy, you will eat a lot more than what you expect when
>   the line in the blamed contents happen to have '(' on it.
>
> I'd rewrite it as:
>
>    sed -e 's/^[^(]*//'
>
> Will queue all three patches, with this fix and a style fix for 2/3; no
> need to resend.
>
> Thanks.
>
Thanks. And thanks for fixing.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-07 15:23   ` [PATCH v2 2/3] textconv: support for blame Axel Bonnet
  2010-06-07 15:23     ` [PATCH v2 3/3] t/t8006: test textconv " Axel Bonnet
  2010-06-11 23:52     ` [PATCH v2 2/3] textconv: " Junio C Hamano
@ 2010-06-14 20:40     ` Junio C Hamano
  2010-06-15  9:29       ` Clément Poulain
  2010-06-15 13:58       ` Axel Bonnet
  2 siblings, 2 replies; 17+ messages in thread
From: Junio C Hamano @ 2010-06-14 20:40 UTC (permalink / raw)
  To: Axel Bonnet; +Cc: git, Clément Poulain, Diane Gasselin

Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:

> @@ -86,16 +87,49 @@ struct origin {
> ...
> +static void fill_origin_blob(struct diff_options *opt,
> +			     struct origin *o, mmfile_t *file)
>  {
>  	if (!o->file.ptr) {
>  		enum object_type type;
>  		num_read_blob++;
> -		file->ptr = read_sha1_file(o->blob_sha1, &type,
> -					   (unsigned long *)(&(file->size)));
> +
> +		if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
> +		    textconv_object(o->path, o->blob_sha1, &file->ptr,
> +				    (size_t *) &file->size))

This cast is not correct, as there is no guarantee that your size_t and
typeof(mmfile_t.size) are compatible.  Depending on the gcc version, you
would get "dereferencing type-punned pointer will break strict-aliasing
rules" error.

The same issue exists in Clément's patch to builtin/cat-file.c.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-14 20:40     ` Junio C Hamano
@ 2010-06-15  9:29       ` Clément Poulain
  2010-06-15  9:54         ` Jeff King
  2010-06-15 15:00         ` Junio C Hamano
  2010-06-15 13:58       ` Axel Bonnet
  1 sibling, 2 replies; 17+ messages in thread
From: Clément Poulain @ 2010-06-15  9:29 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Mon, 14 Jun 2010 13:40:21 -0700, Junio C Hamano <gitster@pobox.com>
wrote:
> Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:
> 
>> @@ -86,16 +87,49 @@ struct origin {
>> ...
>> +static void fill_origin_blob(struct diff_options *opt,
>> +			     struct origin *o, mmfile_t *file)
>>  {
>>  	if (!o->file.ptr) {
>>  		enum object_type type;
>>  		num_read_blob++;
>> -		file->ptr = read_sha1_file(o->blob_sha1, &type,
>> -					   (unsigned long *)(&(file->size)));
>> +
>> +		if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
>> +		    textconv_object(o->path, o->blob_sha1, &file->ptr,
>> +				    (size_t *) &file->size))
> 
> This cast is not correct, as there is no guarantee that your size_t and
> typeof(mmfile_t.size) are compatible.  Depending on the gcc version, you
> would get "dereferencing type-punned pointer will break strict-aliasing
> rules" error.
> 
> The same issue exists in Clément's patch to builtin/cat-file.c.

We did this way because we found a similar cast in prep_temp_blob(),
diff.c:

	if (convert_to_working_tree(path,
			(const char *)blob, (size_t)size, &buf)) {

where size is an unsigned long.
Is it the same issue ? Or is it different because it's not a pointer cast?

Otherwise, we thought of reversing the conversion. That is to say, instead
of casting "long *" in "size_t *" when calling textconv_object(), is it
better to cast size_t in "unsigned long" in textconv_object():

	*buf_size = (unsigned long) fill_textconv(textconv, df, buf); ?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-15  9:29       ` Clément Poulain
@ 2010-06-15  9:54         ` Jeff King
  2010-06-15 10:32           ` bonneta
       [not found]           ` <aad13a73928536f87879ef7284d6cc75@ensimag.fr>
  2010-06-15 15:00         ` Junio C Hamano
  1 sibling, 2 replies; 17+ messages in thread
From: Jeff King @ 2010-06-15  9:54 UTC (permalink / raw)
  To: Clément Poulain; +Cc: Junio C Hamano, git

On Tue, Jun 15, 2010 at 11:29:57AM +0200, Clément Poulain wrote:

> > The same issue exists in Clément's patch to builtin/cat-file.c.
> 
> We did this way because we found a similar cast in prep_temp_blob(),
> diff.c:
> 
> 	if (convert_to_working_tree(path,
> 			(const char *)blob, (size_t)size, &buf)) {
> 
> where size is an unsigned long.
> Is it the same issue ? Or is it different because it's not a pointer cast?

Right. The compiler will handle conversion between integer types during
assignment itself, converting representations as necessary (in fact,
that cast looks useless to me, as implicit conversions are allowed in
C). The only problem is dereferencing a pointer to X as something other
than X.

> Otherwise, we thought of reversing the conversion. That is to say, instead
> of casting "long *" in "size_t *" when calling textconv_object(), is it
> better to cast size_t in "unsigned long" in textconv_object():
> 
> 	*buf_size = (unsigned long) fill_textconv(textconv, df, buf); ?

You shouldn't even have to cast there, for the same reason as above.
That is why I wrote fill_textconv to return the size parameter, rather
than writing to a passed-in pointer. It avoids the annoying
size_t / unsigned long casting caused by different usage (in an ideal
world, all of our sizes would be the same type, but the strbuf and diff
code obviously differ).

-Peff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-15  9:54         ` Jeff King
@ 2010-06-15 10:32           ` bonneta
  2010-06-15 10:51             ` Matthieu Moy
       [not found]           ` <aad13a73928536f87879ef7284d6cc75@ensimag.fr>
  1 sibling, 1 reply; 17+ messages in thread
From: bonneta @ 2010-06-15 10:32 UTC (permalink / raw)
  To: git

On Tue, 15 Jun 2010 05:54:53 -0400, Jeff King <peff@peff.net> wrote:
> On Tue, Jun 15, 2010 at 11:29:57AM +0200, Clément Poulain wrote:
> 
>> > The same issue exists in Clément's patch to builtin/cat-file.c.
>> 
>> We did this way because we found a similar cast in prep_temp_blob(),
>> diff.c:
>> 
>> 	if (convert_to_working_tree(path,
>> 			(const char *)blob, (size_t)size, &buf)) {
>> 
>> where size is an unsigned long.
>> Is it the same issue ? Or is it different because it's not a pointer
>> cast?
> 
> Right. The compiler will handle conversion between integer types during
> assignment itself, converting representations as necessary (in fact,
> that cast looks useless to me, as implicit conversions are allowed in
> C). The only problem is dereferencing a pointer to X as something other
> than X.
> 
>> Otherwise, we thought of reversing the conversion. That is to say,
>> instead
>> of casting "long *" in "size_t *" when calling textconv_object(), is it
>> better to cast size_t in "unsigned long" in textconv_object():
>> 
>> 	*buf_size = (unsigned long) fill_textconv(textconv, df, buf); ?
> 
> You shouldn't even have to cast there, for the same reason as above.
> That is why I wrote fill_textconv to return the size parameter, rather
> than writing to a passed-in pointer. It avoids the annoying
> size_t / unsigned long casting caused by different usage (in an ideal
> world, all of our sizes would be the same type, but the strbuf and diff
> code obviously differ).

Thanks for your answer.

We have changed the declaration of textconv_object() to:

static int textconv_object(const char *path,
                           const unsigned char *sha1,
                           char **buf,
                           unsigned long *buf_size)

And now we can do:
*buf_size = fill_textconv(textconv, df, buf);
without any cast.

But we have to do:
textconv_object(read_from, null_sha1, &buf.buf, (unsigned long *)
&buf.len))
where buf.len is size_t.

Is that ok?
Our gcc doesn't report any strict-aliasing problem, so we don't know if it
is better than the initial version or not...

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-15 10:32           ` bonneta
@ 2010-06-15 10:51             ` Matthieu Moy
  0 siblings, 0 replies; 17+ messages in thread
From: Matthieu Moy @ 2010-06-15 10:51 UTC (permalink / raw)
  To: bonneta; +Cc: git

bonneta <bonneta@ensimag.fr> writes:

> But we have to do:
> textconv_object(read_from, null_sha1, &buf.buf, (unsigned long *)
> &buf.len))
> where buf.len is size_t.
>
> Is that ok?

I don't think it fixes the problem. You're assuming sizeof(unsigned
long) == sizeof(size_t), otherwise, textconv_object will write the
incorrect number of bytes at the given adress.

If you have to use this pass-by-adress, you want

size_t buf_len; /* textconv_object needs a last parameter of type
                   (size_t *) */
textconv_object(..., &buf_len); /* <-- no cast here */
buf.len = buf_len; /* This is a cast, but not a pointer cast. The
                      compiler will do the actual conversion if
                      needed (while pointer casts are just a matter of
                      typing, the generate no code). */

-- 
Matthieu Moy
http://www-verimag.imag.fr/~moy/

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
       [not found]           ` <aad13a73928536f87879ef7284d6cc75@ensimag.fr>
@ 2010-06-15 11:07             ` Jeff King
  2010-06-15 12:13               ` bonneta
  0 siblings, 1 reply; 17+ messages in thread
From: Jeff King @ 2010-06-15 11:07 UTC (permalink / raw)
  To: bonneta; +Cc: git

[resending to cc git@vger]

On Tue, Jun 15, 2010 at 12:29:43PM +0200, bonneta wrote:

> We have changed the declaration of textconv_object() to:
> 
> static int textconv_object(const char *path,
>                            const unsigned char *sha1,
>                            char **buf,
>                            unsigned long *buf_size)
> 
> And now we can do:
> *buf_size = fill_textconv(textconv, df, buf);
> without any cast.

I assume you mean dropping the final buf_size parameter from that
declaration, which is what your usage example has. I would return either
an "unsigned long" or a size_t rather than an int. We are dealing with
potential whole-file sizes, so it is better to use at least as large a
data type as other parts of the code (we still may run into truncation
problems, but at least you are not making things any worse).

> But we have to do:
> textconv_object(read_from, null_sha1, &buf.buf, (unsigned long *)
> &buf.len))
> where buf.len is size_t.
> 
> Is that ok?

No, that has the same problem. Imagine a big endian machine with a
32-bit unsigned long and a 64-bit size_t. You would write into the first
32 bits of buf.len, which are the high bits, giving you a ridiculously
large answer.

The only portable way in C to convert between types is by assignment. So
you have to do:

  unsigned long foo;
  textconv_object(read_from, null_sha1, &buf.buf, &foo);
  buf.len = foo;

But now I'm confused. That matches the declaration you gave in the first
part of your email, but not the usage example.

-Peff

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-15 11:07             ` Jeff King
@ 2010-06-15 12:13               ` bonneta
  0 siblings, 0 replies; 17+ messages in thread
From: bonneta @ 2010-06-15 12:13 UTC (permalink / raw)
  To: Jeff King; +Cc: git

On Tue, 15 Jun 2010 07:07:10 -0400, Jeff King <peff@peff.net> wrote:
> [resending to cc git@vger]
> 
> On Tue, Jun 15, 2010 at 12:29:43PM +0200, bonneta wrote:
> 
>> We have changed the declaration of textconv_object() to:
>> 
>> static int textconv_object(const char *path,
>>                            const unsigned char *sha1,
>>                            char **buf,
>>                            unsigned long *buf_size)
>> 
>> And now we can do:
>> *buf_size = fill_textconv(textconv, df, buf);
>> without any cast.
> 
> I assume you mean dropping the final buf_size parameter from that
> declaration, which is what your usage example has. I would return either
> an "unsigned long" or a size_t rather than an int. We are dealing with
> potential whole-file sizes, so it is better to use at least as large a
> data type as other parts of the code (we still may run into truncation
> problems, but at least you are not making things any worse).
> 
>> But we have to do:
>> textconv_object(read_from, null_sha1, &buf.buf, (unsigned long *)
>> &buf.len))
>> where buf.len is size_t.
>> 
>> Is that ok?
> 
> No, that has the same problem. Imagine a big endian machine with a
> 32-bit unsigned long and a 64-bit size_t. You would write into the first
> 32 bits of buf.len, which are the high bits, giving you a ridiculously
> large answer.
> 
> The only portable way in C to convert between types is by assignment. So
> you have to do:
> 
>   unsigned long foo;
>   textconv_object(read_from, null_sha1, &buf.buf, &foo);
>   buf.len = foo;
> 
> But now I'm confused. That matches the declaration you gave in the first
> part of your email, but not the usage example.

We now understand what we have to do, thank you.
We are currently fixing this patch.
Do we have to resend only this patch or the whole series?

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2 2/3] textconv: support for blame
  2010-06-14 20:40     ` Junio C Hamano
  2010-06-15  9:29       ` Clément Poulain
@ 2010-06-15 13:58       ` Axel Bonnet
  1 sibling, 0 replies; 17+ messages in thread
From: Axel Bonnet @ 2010-06-15 13:58 UTC (permalink / raw)
  To: git; +Cc: Axel Bonnet, Clément Poulain, Diane Gasselin

This patches enables to perform textconv with blame if a textconv driver is
available for the file.

The main task is performed by the textconv_object function which prepares
diff_filespec and if possible converts the file using diff textconv API.
Only regular files are converted, so the mode of diff_filespec is faked.

Textconv conversion is enabled by default (equivalent to the option
--textconv), since blaming binary files is useless in most cases.
The option --no-textconv is used to disable textconv conversion.

The declarations of several functions are modified to give access to a
diff_options, in order to know whether the textconv option is activated or not.

Signed-off-by: Axel Bonnet <axel.bonnet@ensimag.imag.fr>
Signed-off-by: Clément Poulain <clement.poulain@ensimag.imag.fr>
Signed-off-by: Diane Gasselin <diane.gasselin@ensimag.imag.fr>
---

The problem with cast between size_t and unsigned long is fixed.
The style problem with the case is fixed.

 builtin/blame.c |   86 ++++++++++++++++++++++++++++++++++++++++++++++---------
 1 files changed, 72 insertions(+), 14 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 8506286..5b61067 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -20,6 +20,7 @@
 #include "mailmap.h"
 #include "parse-options.h"
 #include "utf8.h"
+#include "userdiff.h"
 
 static char blame_usage[] = "git blame [options] [rev-opts] [rev] [--] file";
 
@@ -86,16 +87,49 @@ struct origin {
 };
 
 /*
+ * Prepare diff_filespec and convert it using diff textconv API
+ * if the textconv driver exists.
+ * Return 1 if the conversion succeeds, 0 otherwise.
+ */
+static int textconv_object(const char *path,
+			   const unsigned char *sha1,
+			   char **buf,
+			   unsigned long *buf_size)
+{
+	struct diff_filespec *df;
+	struct userdiff_driver *textconv;
+
+	df = alloc_filespec(path);
+	fill_filespec(df, sha1, S_IFREG | 0664);
+	textconv = get_textconv(df);
+	if (!textconv) {
+		free_filespec(df);
+		return 0;
+	}
+
+	*buf_size = fill_textconv(textconv, df, buf);
+	free_filespec(df);
+	return 1;
+}
+
+/*
  * Given an origin, prepare mmfile_t structure to be used by the
  * diff machinery
  */
-static void fill_origin_blob(struct origin *o, mmfile_t *file)
+static void fill_origin_blob(struct diff_options *opt,
+			     struct origin *o, mmfile_t *file)
 {
 	if (!o->file.ptr) {
 		enum object_type type;
 		num_read_blob++;
-		file->ptr = read_sha1_file(o->blob_sha1, &type,
-					   (unsigned long *)(&(file->size)));
+
+		if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
+		    textconv_object(o->path, o->blob_sha1, &file->ptr,
+				    (unsigned long *)(&file->size)))
+			;
+		else
+			file->ptr = read_sha1_file(o->blob_sha1, &type,
+						   (unsigned long *)(&(file->size)));
 		if (!file->ptr)
 			die("Cannot read blob %s for path %s",
 			    sha1_to_hex(o->blob_sha1),
@@ -282,7 +316,6 @@ static struct origin *get_origin(struct scoreboard *sb,
 static int fill_blob_sha1(struct origin *origin)
 {
 	unsigned mode;
-
 	if (!is_null_sha1(origin->blob_sha1))
 		return 0;
 	if (get_tree_entry(origin->commit->object.sha1,
@@ -741,8 +774,8 @@ static int pass_blame_to_parent(struct scoreboard *sb,
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	fill_origin_blob(parent, &file_p);
-	fill_origin_blob(target, &file_o);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
+	fill_origin_blob(&sb->revs->diffopt, target, &file_o);
 	num_get_patch++;
 
 	memset(&xpp, 0, sizeof(xpp));
@@ -922,7 +955,7 @@ static int find_move_in_parent(struct scoreboard *sb,
 	if (last_in_target < 0)
 		return 1; /* nothing remains for this target */
 
-	fill_origin_blob(parent, &file_p);
+	fill_origin_blob(&sb->revs->diffopt, parent, &file_p);
 	if (!file_p.ptr)
 		return 0;
 
@@ -1063,7 +1096,7 @@ static int find_copy_in_parent(struct scoreboard *sb,
 
 			norigin = get_origin(sb, parent, p->one->path);
 			hashcpy(norigin->blob_sha1, p->one->sha1);
-			fill_origin_blob(norigin, &file_p);
+			fill_origin_blob(&sb->revs->diffopt, norigin, &file_p);
 			if (!file_p.ptr)
 				continue;
 
@@ -1983,6 +2016,16 @@ static int git_blame_config(const char *var, const char *value, void *cb)
 		blame_date_mode = parse_date_format(value);
 		return 0;
 	}
+
+	switch (userdiff_config(var, value)) {
+	case 0:
+		break;
+	case -1:
+		return -1;
+	default:
+		return 0;
+	}
+
 	return git_default_config(var, value, cb);
 }
 
@@ -1990,7 +2033,9 @@ static int git_blame_config(const char *var, const char *value, void *cb)
  * Prepare a dummy commit that represents the work tree (or staged) item.
  * Note that annotating work tree item never works in the reverse.
  */
-static struct commit *fake_working_tree_commit(const char *path, const char *contents_from)
+static struct commit *fake_working_tree_commit(struct diff_options *opt,
+					       const char *path,
+					       const char *contents_from)
 {
 	struct commit *commit;
 	struct origin *origin;
@@ -2018,6 +2063,7 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 	if (!contents_from || strcmp("-", contents_from)) {
 		struct stat st;
 		const char *read_from;
+		unsigned long buf_len;
 
 		if (contents_from) {
 			if (stat(contents_from, &st) < 0)
@@ -2030,10 +2076,14 @@ static struct commit *fake_working_tree_commit(const char *path, const char *con
 			read_from = path;
 		}
 		mode = canon_mode(st.st_mode);
+
 		switch (st.st_mode & S_IFMT) {
 		case S_IFREG:
-			if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
-				die_errno("cannot open or read '%s'", read_from);
+			if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
+			    textconv_object(read_from, null_sha1, &buf.buf, &buf_len))
+				buf.len = buf_len;
+			else if (strbuf_read_file(&buf, read_from, st.st_size) != st.st_size)
+				 die_errno("cannot open or read '%s'", read_from);
 			break;
 		case S_IFLNK:
 			if (strbuf_readlink(&buf, read_from, st.st_size) < 0)
@@ -2248,6 +2298,7 @@ int cmd_blame(int argc, const char **argv, const char *prefix)
 	git_config(git_blame_config, NULL);
 	init_revisions(&revs, NULL);
 	revs.date_mode = blame_date_mode;
+	DIFF_OPT_SET(&revs.diffopt, ALLOW_TEXTCONV);
 
 	save_commit_buffer = 0;
 	dashdash_pos = 0;
@@ -2384,7 +2435,8 @@ parse_done:
 		 * or "--contents".
 		 */
 		setup_work_tree();
-		sb.final = fake_working_tree_commit(path, contents_from);
+		sb.final = fake_working_tree_commit(&sb.revs->diffopt,
+						    path, contents_from);
 		add_pending_object(&revs, &(sb.final->object), ":");
 	}
 	else if (contents_from)
@@ -2411,8 +2463,14 @@ parse_done:
 		if (fill_blob_sha1(o))
 			die("no such path %s in %s", path, final_commit_name);
 
-		sb.final_buf = read_sha1_file(o->blob_sha1, &type,
-					      &sb.final_buf_size);
+		if (DIFF_OPT_TST(&sb.revs->diffopt, ALLOW_TEXTCONV) &&
+		    textconv_object(path, o->blob_sha1, (char **) &sb.final_buf,
+				    &sb.final_buf_size))
+			;
+		else
+			sb.final_buf = read_sha1_file(o->blob_sha1, &type,
+						      &sb.final_buf_size);
+
 		if (!sb.final_buf)
 			die("Cannot read blob %s for path %s",
 			    sha1_to_hex(o->blob_sha1),
-- 
1.6.6.7.ga5fe3

^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2 2/3] textconv: support for blame
  2010-06-15  9:29       ` Clément Poulain
  2010-06-15  9:54         ` Jeff King
@ 2010-06-15 15:00         ` Junio C Hamano
  1 sibling, 0 replies; 17+ messages in thread
From: Junio C Hamano @ 2010-06-15 15:00 UTC (permalink / raw)
  To: clement.poulain; +Cc: git

Clément Poulain <clement.poulain@ensimag.imag.fr> writes:

> On Mon, 14 Jun 2010 13:40:21 -0700, Junio C Hamano <gitster@pobox.com>
> wrote:
>> Axel Bonnet <axel.bonnet@ensimag.imag.fr> writes:
>> 
>>> @@ -86,16 +87,49 @@ struct origin {
>>> ...
>>> +static void fill_origin_blob(struct diff_options *opt,
>>> +			     struct origin *o, mmfile_t *file)
>>>  {
>>>  	if (!o->file.ptr) {
>>>  		enum object_type type;
>>>  		num_read_blob++;
>>> -		file->ptr = read_sha1_file(o->blob_sha1, &type,
>>> -					   (unsigned long *)(&(file->size)));
>>> +
>>> +		if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
>>> +		    textconv_object(o->path, o->blob_sha1, &file->ptr,
>>> +				    (size_t *) &file->size))
>> 
>> This cast is not correct, as there is no guarantee that your size_t and
>> typeof(mmfile_t.size) are compatible.  Depending on the gcc version, you
>> would get "dereferencing type-punned pointer will break strict-aliasing
>> rules" error.
>> 
>> The same issue exists in Clément's patch to builtin/cat-file.c.
>
> We did this way because we found a similar cast in prep_temp_blob(),
> diff.c:
>
> 	if (convert_to_working_tree(path,
> 			(const char *)blob, (size_t)size, &buf)) {
>
> where size is an unsigned long.

That is a completely different kind of cast that is sane.  The function
takes a _value_ of type size_t.

Your cast is "This function wants to store a value to a _memory location_
that is supposed to hold a value of type size_t, and I know &(file->size)
is not such a location (it is to hold a value of type 'unsigned long');
please pretend that these two distinct pointers are compatible", which is
a big no-no.

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2010-06-15 15:00 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-07 14:41 [PATCH v2 0/3] textconv support for blame Axel Bonnet
2010-06-07 15:23 ` [PATCH v2 1/3] textconv: make the API public Axel Bonnet
2010-06-07 15:23   ` [PATCH v2 2/3] textconv: support for blame Axel Bonnet
2010-06-07 15:23     ` [PATCH v2 3/3] t/t8006: test textconv " Axel Bonnet
2010-06-11 23:52       ` Junio C Hamano
2010-06-14  7:52         ` Diane Gasselin
2010-06-11 23:52     ` [PATCH v2 2/3] textconv: " Junio C Hamano
2010-06-12  4:11       ` Jeff King
2010-06-14 20:40     ` Junio C Hamano
2010-06-15  9:29       ` Clément Poulain
2010-06-15  9:54         ` Jeff King
2010-06-15 10:32           ` bonneta
2010-06-15 10:51             ` Matthieu Moy
     [not found]           ` <aad13a73928536f87879ef7284d6cc75@ensimag.fr>
2010-06-15 11:07             ` Jeff King
2010-06-15 12:13               ` bonneta
2010-06-15 15:00         ` Junio C Hamano
2010-06-15 13:58       ` Axel Bonnet

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.