All of lore.kernel.org
 help / color / mirror / Atom feed
* [RFC/PATCH 0/2] Color moved code differently
@ 2016-09-03  3:31 Stefan Beller
  2016-09-03  3:31 ` [PATCH 1/2] diff.c: emit duplicate lines with a different color Stefan Beller
                   ` (3 more replies)
  0 siblings, 4 replies; 13+ messages in thread
From: Stefan Beller @ 2016-09-03  3:31 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

When moving code (e.g. a function is moved to another part of the file or
to a different file), the review process is different than reviewing new
code. When reviewing moved code we are only interested in the diff as
where there are differences in the moved code, e.g. namespace changes.

However the inner part of these moved texts should not change.
To aid a developer reviewing such code, we'll color pure moved stuff
differently.

A line is colored differently if that line and the surroundign 2 lines
appear as-is in the opposite part of the diff.

Example:
http://i.imgur.com/ay84q0q.png

Or apply these patches and 
    git show e28eae3184b26d3cf3293e69403babb5c575342c
    git show bc9204d4ef6e0672389fdfb0d398fa9a39dba3d5
    git show 8465541e8ce8eaf16e66ab847086779768c18f2d

The code quality sucks though, hence RFC.

Thanks,
Stefan

Stefan Beller (2):
  diff.c: emit duplicate lines with a different color
  WIP xdiff: markup duplicates differently

 diff.c        |  26 ++++++++++++
 diff.h        |   4 +-
 xdiff/xdiff.h |   1 +
 xdiff/xemit.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 156 insertions(+), 3 deletions(-)

-- 
2.10.0.rc2.23.gf336a1a.dirty


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 1/2] diff.c: emit duplicate lines with a different color
  2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
@ 2016-09-03  3:31 ` Stefan Beller
  2016-09-03  3:31 ` [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently Stefan Beller
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 13+ messages in thread
From: Stefan Beller @ 2016-09-03  3:31 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

Color is WIP, I just make space for a different case.

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 diff.c | 26 ++++++++++++++++++++++++++
 diff.h |  4 +++-
 2 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 534c12e..57d435c 100644
--- a/diff.c
+++ b/diff.c
@@ -52,6 +52,8 @@ static char diff_colors[][COLOR_MAXLEN] = {
 	GIT_COLOR_YELLOW,	/* COMMIT */
 	GIT_COLOR_BG_RED,	/* WHITESPACE */
 	GIT_COLOR_NORMAL,	/* FUNCINFO */
+	GIT_COLOR_BLUE,		/* NEW DUPLICATE */
+	GIT_COLOR_CYAN,		/* OLD DUPLICATE */
 };
 
 static int parse_diff_color_slot(const char *var)
@@ -541,6 +543,14 @@ static void emit_add_line(const char *reset,
 			  DIFF_FILE_NEW, WSEH_NEW, '+');
 }
 
+static void emit_add_line_dup(const char *reset,
+			  struct emit_callback *ecbdata,
+			  const char *line, int len)
+{
+	emit_line_checked(reset, ecbdata, line, len,
+			  DIFF_FILE_DUPLICATE_NEW, WSEH_NEW, '+');
+}
+
 static void emit_del_line(const char *reset,
 			  struct emit_callback *ecbdata,
 			  const char *line, int len)
@@ -549,6 +559,14 @@ static void emit_del_line(const char *reset,
 			  DIFF_FILE_OLD, WSEH_OLD, '-');
 }
 
+static void emit_del_line_dup(const char *reset,
+			  struct emit_callback *ecbdata,
+			  const char *line, int len)
+{
+	emit_line_checked(reset, ecbdata, line, len,
+			  DIFF_FILE_DUPLICATE_OLD, WSEH_OLD, '-');
+}
+
 static void emit_context_line(const char *reset,
 			      struct emit_callback *ecbdata,
 			      const char *line, int len)
@@ -1300,6 +1318,10 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
 	}
 
 	switch (line[0]) {
+	case '*':
+		ecbdata->lno_in_postimage++;
+		emit_add_line_dup(reset, ecbdata, line + 1, len - 1);
+		break;
 	case '+':
 		ecbdata->lno_in_postimage++;
 		emit_add_line(reset, ecbdata, line + 1, len - 1);
@@ -1308,6 +1330,10 @@ static void fn_out_consume(void *priv, char *line, unsigned long len)
 		ecbdata->lno_in_preimage++;
 		emit_del_line(reset, ecbdata, line + 1, len - 1);
 		break;
+	case '~':
+		ecbdata->lno_in_preimage++;
+		emit_del_line_dup(reset, ecbdata, line + 1, len - 1);
+		break;
 	case ' ':
 		ecbdata->lno_in_postimage++;
 		ecbdata->lno_in_preimage++;
diff --git a/diff.h b/diff.h
index 7883729..d500f0e 100644
--- a/diff.h
+++ b/diff.h
@@ -189,7 +189,9 @@ enum color_diff {
 	DIFF_FILE_NEW = 5,
 	DIFF_COMMIT = 6,
 	DIFF_WHITESPACE = 7,
-	DIFF_FUNCINFO = 8
+	DIFF_FUNCINFO = 8,
+	DIFF_FILE_DUPLICATE_NEW = 9,
+	DIFF_FILE_DUPLICATE_OLD = 10
 };
 const char *diff_get_color(int diff_use_color, enum color_diff ix);
 #define diff_get_color_opt(o, ix) \
-- 
2.10.0.rc2.23.gf336a1a.dirty


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
  2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
  2016-09-03  3:31 ` [PATCH 1/2] diff.c: emit duplicate lines with a different color Stefan Beller
@ 2016-09-03  3:31 ` Stefan Beller
  2016-09-03 12:25   ` Jakub Narębski
  2016-09-03  7:00 ` [RFC/PATCH 0/2] Color moved code differently Junio C Hamano
  2016-09-04  9:57 ` Jacob Keller
  3 siblings, 1 reply; 13+ messages in thread
From: Stefan Beller @ 2016-09-03  3:31 UTC (permalink / raw)
  To: git; +Cc: Stefan Beller

When moving code (e.g. a function is moved to another part of the file or
to a different file), the review process is different than reviewing new
code. When reviewing moved code we are only interested in the diff as
where there are differences in the moved code, e.g. namespace changes.

However the inner part of these moved texts should not change.
To aid a developer reviewing such code, emit it with a different prefix
than the usual +,- to indicate it is overlapping code.

Examples from recent history:
    git show e28eae3184b26d3cf3293e69403babb5c575342c
    git show bc9204d4ef6e0672389fdfb0d398fa9a39dba3d5
    git show 8465541e8ce8eaf16e66ab847086779768c18f2d

This doesn't work yet, but we should make this patch series work
to ignore white space changes:
9d1ca1dac0ebfd6e17d73e33b2d173926c139c2d

Signed-off-by: Stefan Beller <sbeller@google.com>
---
 xdiff/xdiff.h |   1 +
 xdiff/xemit.c | 128 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 127 insertions(+), 2 deletions(-)

diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 7423f77..0744e01 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -45,6 +45,7 @@ extern "C" {
 
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
+#define XDL_EMIT_DUPLICATE (1 << 3)
 
 #define XDL_MMB_READONLY (1 << 0)
 
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index b52b4b9..4abafae 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -22,6 +22,9 @@
 
 #include "xinclude.h"
 
+#include "git-compat-util.h"
+#include "hashmap.h"
+
 static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec) {
 
 	*rec = xdf->recs[ri]->ptr;
@@ -158,12 +161,133 @@ static int is_empty_rec(xdfile_t *xdf, long ri)
 	return !len;
 }
 
+struct hashmap *duplicates_added;
+struct hashmap *duplicates_removed;
+
+struct dup_entry {
+	struct hashmap_entry ent;
+	xdfile_t *xdf;
+	long index;
+};
+
+static int dup_entry_cmp(const struct dup_entry *a,
+			   const struct dup_entry *b,
+			   const void *unused)
+{
+	int d = XDL_MIN(strcspn(a->xdf->recs[a->index]->ptr, "\n"),
+				strcspn(b->xdf->recs[b->index]->ptr, "\n"));
+
+	if (!strncmp(a->xdf->recs[a->index]->ptr,
+			b->xdf->recs[b->index]->ptr,
+			d))
+		return 0;
+	return 1;
+}
+
+struct dup_entry *prepare_entry(xdfile_t *xdf, long ri)
+{
+	long range_start = XDL_MAX(0, ri - 2);
+	long range_end = XDL_MIN(xdf->nrec, ri + 2);
+	long hash = 0;
+	int i;
+	struct dup_entry *ret = xmalloc(sizeof(*ret));
+
+	for (i = range_start; i < range_end; i++)
+		hash ^= memhash(xdf->recs[i]->ptr, xdf->recs[i]->size);
+
+	ret->ent.hash = hash;
+	ret->xdf = xdf;
+	ret->index = ri;
+	return ret;
+}
+
+int add_removal(xdfile_t *xdf, long ri)
+{
+	hashmap_add(duplicates_removed, prepare_entry(xdf, ri));
+	return 0;
+}
+
+int add_addition(xdfile_t *xdf, long ri)
+{
+	hashmap_add(duplicates_added, prepare_entry(xdf, ri));
+	return 0;
+}
+
+int xdl_markup_duplicates(xdfenv_t *xe, xdchange_t *xscr,
+			  xdemitconf_t const *xecfg)
+{
+	long s1, s2;
+	xdchange_t *xch, *xche;
+
+	duplicates_added = xmalloc(sizeof(*duplicates_added));
+	duplicates_removed = xmalloc(sizeof(*duplicates_removed));
+	hashmap_init(duplicates_added, (hashmap_cmp_fn)dup_entry_cmp, 0);
+	hashmap_init(duplicates_removed, (hashmap_cmp_fn)dup_entry_cmp, 0);
+
+	for (xch = xscr; xch; xch = xche->next) {
+		xche = xdl_get_hunk(&xch, xecfg);
+		if (!xch)
+			break;
+
+		for (s1 = xch->i1, s2 = xch->i2;; xch = xch->next) {
+
+			/*
+			 * Removes lines from the first file.
+			 */
+			for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
+				if (add_removal(&xe->xdf1, s1) < 0)
+					return -1;
+
+			/*
+			 * Adds lines from the second file.
+			 */
+			for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
+				if (add_addition(&xe->xdf2, s2) < 0)
+					return -1;
+
+			if (xch == xche)
+				break;
+			s1 = xch->i1 + xch->chg1;
+			s2 = xch->i2 + xch->chg2;
+		}
+	}
+	return 0;
+}
+
+static int xdl_check_and_emit_record(xdfile_t *xdf, long ri,
+				     char *pre, xdemitcb_t *ecb,
+				     int duplicate_handling)
+{
+	const char *hacked_pre = pre;
+
+	if (duplicate_handling) {
+		struct dup_entry *keydata = prepare_entry(xdf, ri);
+
+		if (*pre == '+' &&
+		    hashmap_get(duplicates_removed, keydata, keydata))
+				hacked_pre = "*";
+
+		if (*pre == '-' &&
+		    hashmap_get(duplicates_added, keydata, keydata))
+				hacked_pre = "~";
+		free(keydata);
+	}
+
+	return xdl_emit_record(xdf, ri, hacked_pre, ecb);
+}
+
 int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 		  xdemitconf_t const *xecfg) {
 	long s1, s2, e1, e2, lctx;
 	xdchange_t *xch, *xche;
 	long funclineprev = -1;
 	struct func_line func_line = { 0 };
+	int duplicate_handling = 0;
+
+	/* for testing I added a `|| 1` */
+	duplicate_handling = xecfg->flags & XDL_EMIT_DUPLICATE || 1;
+	if (duplicate_handling)
+		xdl_markup_duplicates(xe, xscr, xecfg);
 
 	for (xch = xscr; xch; xch = xche->next) {
 		xche = xdl_get_hunk(&xch, xecfg);
@@ -279,14 +403,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb,
 			 * Removes lines from the first file.
 			 */
 			for (s1 = xch->i1; s1 < xch->i1 + xch->chg1; s1++)
-				if (xdl_emit_record(&xe->xdf1, s1, "-", ecb) < 0)
+				if (xdl_check_and_emit_record(&xe->xdf1, s1, "-", ecb, duplicate_handling) < 0)
 					return -1;
 
 			/*
 			 * Adds lines from the second file.
 			 */
 			for (s2 = xch->i2; s2 < xch->i2 + xch->chg2; s2++)
-				if (xdl_emit_record(&xe->xdf2, s2, "+", ecb) < 0)
+				if (xdl_check_and_emit_record(&xe->xdf2, s2, "+", ecb, duplicate_handling) < 0)
 					return -1;
 
 			if (xch == xche)
-- 
2.10.0.rc2.23.gf336a1a.dirty


^ permalink raw reply related	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
  2016-09-03  3:31 ` [PATCH 1/2] diff.c: emit duplicate lines with a different color Stefan Beller
  2016-09-03  3:31 ` [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently Stefan Beller
@ 2016-09-03  7:00 ` Junio C Hamano
  2016-09-04  5:23   ` Stefan Beller
  2016-09-04  9:57 ` Jacob Keller
  3 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2016-09-03  7:00 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

Stefan Beller <sbeller@google.com> writes:

> A line is colored differently if that line and the surroundign 2 lines
> appear as-is in the opposite part of the diff.
>
> Example:
> http://i.imgur.com/ay84q0q.png
>
> Or apply these patches and 
>     git show e28eae3184b26d3cf3293e69403babb5c575342c
>     git show bc9204d4ef6e0672389fdfb0d398fa9a39dba3d5
>     git show 8465541e8ce8eaf16e66ab847086779768c18f2d

I like this as a concept.  Two quick comments are

 * On 1/2, you would also need to teach diff-color-slot the
   correspondence between the name used by configuration and the
   enum used as the index into the diff_colors[] array.  I think
   these are not "DUPLICATE", but "MOVE", so I'd suggest renaming
   dup-new and dup-old to some words or phrases that have "MOVED"
   and "TO" or "FROM" in them (e.g. "DIFF_MOVED_FROM",
   "DIFF_MOVED_TO").

 * On 2/2, doing it at xdiff.c level may be limiting this good idea
   to flourish to its full potential, as the interface is fed only
   one diff_filepair at a time.  All the examples you pointed at
   above have line movement within a single path because of this
   design limitation.  I do not think 2/2 would serve as a small but
   good first step to build on top of to enhance the feature to
   notice line movements across files and the design (not the
   implementation) needs rethinking.

The idea has a potential to help reviewing inter-file movement of
lines in 3b0c4200 ("apply: move libified code from builtin/apply.c
to apply.{c,h}", 2016-08-08).  You can see what was _changed_ in the
part that has been moved across files with "show -B -M", and
sometimes that is useful, but at the same time, you cannot see what
was moved without changing, which often is necessary to understand
the changes and notice things like "you moved this across files
without changing, but this and that you did not change need to be
adjusted".

The coloring of "these are moved verbatim" in the style this series
gives would be very helpful for reviewing such a change.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
  2016-09-03  3:31 ` [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently Stefan Beller
@ 2016-09-03 12:25   ` Jakub Narębski
  2016-09-04  5:31     ` Stefan Beller
  2016-09-04  6:48     ` Junio C Hamano
  0 siblings, 2 replies; 13+ messages in thread
From: Jakub Narębski @ 2016-09-03 12:25 UTC (permalink / raw)
  To: Stefan Beller, git

W dniu 03.09.2016 o 05:31, Stefan Beller pisze:

> When moving code (e.g. a function is moved to another part of the file or
> to a different file), the review process is different than reviewing new
> code. When reviewing moved code we are only interested in the diff as
> where there are differences in the moved code, e.g. namespace changes.
> 
> However the inner part of these moved texts should not change.
> To aid a developer reviewing such code, emit it with a different prefix
> than the usual +,- to indicate it is overlapping code.

What would be this different prefix?


Side note: I wonder if the cousin of unified diff, namely context diff[1],
is something that we can and should support.

[1]: https://www.gnu.org/software/diffutils/manual/html_node/Context-Format.html
     https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Context.html

*** lao	2002-02-21 23:30:39.942229878 -0800
--- tzu	2002-02-21 23:30:50.442260588 -0800
***************
*** 1,7 ****
- The Way that can be told of is not the eternal Way;
- The name that can be named is not the eternal name.
  The Nameless is the origin of Heaven and Earth;
! The Named is the mother of all things.
  Therefore let there always be non-being,
    so we may see their subtlety,
  And let there always be being,
--- 1,6 ----
  The Nameless is the origin of Heaven and Earth;
! The named is the mother of all things.
! 
  Therefore let there always be non-being,
    so we may see their subtlety,
  And let there always be being,
***************
*** 9,11 ****
--- 8,13 ----
  The two are the same,
  But after they are produced,
    they have different names.
+ They both may be called deep and profound.
+ Deeper and more profound,
+ The door of all subtleties!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-03  7:00 ` [RFC/PATCH 0/2] Color moved code differently Junio C Hamano
@ 2016-09-04  5:23   ` Stefan Beller
  2016-09-04  6:41     ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Stefan Beller @ 2016-09-04  5:23 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sat, Sep 3, 2016 at 12:00 AM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>> A line is colored differently if that line and the surroundign 2 lines
>> appear as-is in the opposite part of the diff.
>>
>> Example:
>> http://i.imgur.com/ay84q0q.png
>>
>> Or apply these patches and
>>     git show e28eae3184b26d3cf3293e69403babb5c575342c
>>     git show bc9204d4ef6e0672389fdfb0d398fa9a39dba3d5
>>     git show 8465541e8ce8eaf16e66ab847086779768c18f2d
>
> I like this as a concept.  Two quick comments are

Great!

>
>  * On 1/2, you would also need to teach diff-color-slot the
>    correspondence between the name used by configuration and the
>    enum used as the index into the diff_colors[] array.

So I would need to add code to diff_parse_color_slot just below the
definition of the struct.

>    I think
>    these are not "DUPLICATE", but "MOVE", so I'd suggest renaming
>    dup-new and dup-old to some words or phrases that have "MOVED"
>    and "TO" or "FROM" in them (e.g. "DIFF_MOVED_FROM",
>    "DIFF_MOVED_TO").

Ok, sounds sensible.

>
>  * On 2/2, doing it at xdiff.c level may be limiting this good idea
>    to flourish to its full potential, as the interface is fed only
>    one diff_filepair at a time.

I realized that after I implemented it. I agree we would want to have
it function cross file.

So from my current understanding of the code,
* diffcore_std would call a new function diffcore_detect_moved(void)
   just before diffcore_apply_filter is called.
* The new function diffcore_detect_moved would then check if the
   diff is a valid textual diff (i.e. real files, not submodules, but
   deletion/creation of one file is allowed)
   If so we generate the diff internally and as in 2/2 would
   hash all added/removed lines with context and store it.
* Instead checking for a different symbol in fn_out_consume, we consult
   the hashmap whether we want to color the line as a "moved" line.

>     All the examples you pointed at
>    above have line movement within a single path because of this
>    design limitation.  I do not think 2/2 would serve as a small but
>    good first step to build on top of to enhance the feature to
>    notice line movements across files and the design (not the
>    implementation) needs rethinking.

After reading the code more closely I agree. I initially put it there
to see if it is feasible or just messing up the diff. And as I have
a bit of knowledge of the xdl internals due to the first version of
the diff slider heuristic, I went with that.

From a cursory look at the history, you seemed to be very involved in
the implementation of diff.c, so I'd appreciate strong guidance on
the design level.

>
> The idea has a potential to help reviewing inter-file movement of
> lines in 3b0c4200 ("apply: move libified code from builtin/apply.c
> to apply.{c,h}", 2016-08-08).  You can see what was _changed_ in the
> part that has been moved across files with "show -B -M", and
> sometimes that is useful, but at the same time, you cannot see what
> was moved without changing, which often is necessary to understand
> the changes and notice things like "you moved this across files
> without changing, but this and that you did not change need to be
> adjusted".
>
> The coloring of "these are moved verbatim" in the style this series
> gives would be very helpful for reviewing such a change.
>

Right, that is what triggered me to implement it.
Another such case that I reviewed was
https://git.eclipse.org/r/#/c/78645/
and I felt very insecure about reviewing thousands lines of code
that was "just moved".

Thanks,
Stefan

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
  2016-09-03 12:25   ` Jakub Narębski
@ 2016-09-04  5:31     ` Stefan Beller
  2016-09-04 10:35       ` Jakub Narębski
  2016-09-04  6:48     ` Junio C Hamano
  1 sibling, 1 reply; 13+ messages in thread
From: Stefan Beller @ 2016-09-04  5:31 UTC (permalink / raw)
  To: Jakub Narębski; +Cc: git

On Sat, Sep 3, 2016 at 5:25 AM, Jakub Narębski <jnareb@gmail.com> wrote:
> W dniu 03.09.2016 o 05:31, Stefan Beller pisze:
>
>> When moving code (e.g. a function is moved to another part of the file or
>> to a different file), the review process is different than reviewing new
>> code. When reviewing moved code we are only interested in the diff as
>> where there are differences in the moved code, e.g. namespace changes.
>>
>> However the inner part of these moved texts should not change.
>> To aid a developer reviewing such code, emit it with a different prefix
>> than the usual +,- to indicate it is overlapping code.
>
> What would be this different prefix?

I will discard the part of the different prefix as the design of 2/2
will change.



>
>
> Side note: I wonder if the cousin of unified diff, namely context diff[1],
> is something that we can and should support.



>
> [1]: https://www.gnu.org/software/diffutils/manual/html_node/Context-Format.html
>      https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Context.html
>
> *** lao 2002-02-21 23:30:39.942229878 -0800
> --- tzu 2002-02-21 23:30:50.442260588 -0800
> ***************
> *** 1,7 ****
> - The Way that can be told of is not the eternal Way;
> - The name that can be named is not the eternal name.
>   The Nameless is the origin of Heaven and Earth;
> ! The Named is the mother of all things.
>   Therefore let there always be non-being,
>     so we may see their subtlety,
>   And let there always be being,
> --- 1,6 ----
>   The Nameless is the origin of Heaven and Earth;
> ! The named is the mother of all things.
> !

So the line moved here?
Is it intentional that the line differs though?
(capitalisation of 'named")
Not sure I can read this diff correctly.

I think for this small side project I'd rather want
to 'just' support colors of moved code;)

>   Therefore let there always be non-being,
>     so we may see their subtlety,
>   And let there always be being,
> ***************
> *** 9,11 ****
> --- 8,13 ----
>   The two are the same,
>   But after they are produced,
>     they have different names.
> + They both may be called deep and profound.
> + Deeper and more profound,
> + The door of all subtleties!

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-04  5:23   ` Stefan Beller
@ 2016-09-04  6:41     ` Junio C Hamano
  2016-09-04  8:28       ` Stefan Beller
  2016-09-04 22:19       ` Junio C Hamano
  0 siblings, 2 replies; 13+ messages in thread
From: Junio C Hamano @ 2016-09-04  6:41 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

Stefan Beller <sbeller@google.com> writes:

>>  * On 2/2, doing it at xdiff.c level may be limiting this good idea
>>    to flourish to its full potential, as the interface is fed only
>>    one diff_filepair at a time.
>
> I realized that after I implemented it. I agree we would want to have
> it function cross file.
>
> So from my current understanding of the code,
> * diffcore_std would call a new function diffcore_detect_moved(void)
>    just before diffcore_apply_filter is called.
> * The new function diffcore_detect_moved would then check if the
>    diff is a valid textual diff (i.e. real files, not submodules, but
>    deletion/creation of one file is allowed)
>    If so we generate the diff internally and as in 2/2 would
>    hash all added/removed lines with context and store it.

I do not think you should step outside diff_flush().  Only when
producing textual diff, you would have to run the textual diff
twice by going over the q twice:

 * The first pass would run diff_flush_patch(), which would call
   into xdiff the usual way, but the callback from xdiff would
   capture the removed lines and the added lines without making any
   output.

 * The second pass would run diff_flush_patch(), but the callback
   from xdiff would be called with additional information, namely,
   the removed and the added lines captured in the first pass.

 * I suspect that the fn_out_consume() function that is used for a
   normal case (i.e. when we are not doing this more expensive
   "moved to/moved from" coloring) can be used for the second pass
   above (i.e. the "priv" aka "ecbdata" may need to be extended so
   that it can tell which mode of operation it is asked to perform),
   but if there is not enough similarity between the second pass of
   this "moved from/moved to" mode and the normal mode of output, it
   is also OK to have two different callback functions, i.e. the
   original one to be used in the normal mode, the second one that
   knows the "these are moved without modification" coloring.  The
   callback for the first pass is sufficiently different and I think
   it is better to invent a new callback function to be used in the
   first pass, instead of reusing fn_out_consume().

   The fn_out_consume() function working in the "second pass of
   moved from/moved to mode" would inspect line[] and see if it is
   an added or a removed line, and then:

   - if it is an added line, and it appears as a removed line
     elsewhere in the patchset (you obtained the information in the
     first pass), you show it as "this was moved from elsewhere".

   - if it is a removed line, and it appears as an added line
     elsewhere in the patchset (you obtained the information in the
     first pass), you show it as "this was moved to elsewhere".

Or something like that.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
  2016-09-03 12:25   ` Jakub Narębski
  2016-09-04  5:31     ` Stefan Beller
@ 2016-09-04  6:48     ` Junio C Hamano
  1 sibling, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2016-09-04  6:48 UTC (permalink / raw)
  To: Jakub Narębski; +Cc: Stefan Beller, git

Jakub Narębski <jnareb@gmail.com> writes:

> Side note: I wonder if the cousin of unified diff, namely context diff[1],
> is something that we can and should support.

Yes, the lack of support for the copied context (instead of the
unified context) diff format has bugged me over the years.  Reading
copied context diff format however is rapidly becoming a lost art,
unfortunately, partly due to our popularity, and the need for the
format has become a lessor issue.

I wonder if a pair of a pre-processor (on the input side, before
running 'git apply') and a post-processor (on the output side,
munging the output from 'git show/diff') would be sufficient.

In any case, I agree with you that it has nothing to do with what
Stefan is doing here.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-04  6:41     ` Junio C Hamano
@ 2016-09-04  8:28       ` Stefan Beller
  2016-09-04 22:19       ` Junio C Hamano
  1 sibling, 0 replies; 13+ messages in thread
From: Stefan Beller @ 2016-09-04  8:28 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git

On Sat, Sep 3, 2016 at 11:41 PM, Junio C Hamano <gitster@pobox.com> wrote:
> Stefan Beller <sbeller@google.com> writes:
>
>>>  * On 2/2, doing it at xdiff.c level may be limiting this good idea
>>>    to flourish to its full potential, as the interface is fed only
>>>    one diff_filepair at a time.
>>
>> I realized that after I implemented it. I agree we would want to have
>> it function cross file.
>>
>> So from my current understanding of the code,
>> * diffcore_std would call a new function diffcore_detect_moved(void)
>>    just before diffcore_apply_filter is called.
>> * The new function diffcore_detect_moved would then check if the
>>    diff is a valid textual diff (i.e. real files, not submodules, but
>>    deletion/creation of one file is allowed)
>>    If so we generate the diff internally and as in 2/2 would
>>    hash all added/removed lines with context and store it.
>
> I do not think you should step outside diff_flush().

After looking further into it, I think we need to step outside
or at the very least deep down in the xdl cellar from there,
because we need the context around the line both in the
pre and after image.


>  Only when
> producing textual diff, you would have to run the textual diff
> twice by going over the q twice:
>
>  * The first pass would run diff_flush_patch(), which would call
>    into xdiff the usual way, but the callback from xdiff would
>    capture the removed lines and the added lines without making any
>    output.

The callback doesn't have the context around one line as easily
accessible as the xdl emit function, so rather we'd pass in a flag,
i.e. the hashmap where to store the added/removed lines or NULL.

Both passes (the first to find and the second enhanced pass that
outputs the lines) need to have access to the context of the respective
file.

>
>  * The second pass would run diff_flush_patch(), but the callback
>    from xdiff would be called with additional information, namely,
>    the removed and the added lines captured in the first pass.
>
>  * I suspect that the fn_out_consume() function that is used for a
>    normal case (i.e. when we are not doing this more expensive
>    "moved to/moved from" coloring) can be used for the second pass
>    above (i.e. the "priv" aka "ecbdata" may need to be extended so
>    that it can tell which mode of operation it is asked to perform),
>    but if there is not enough similarity between the second pass of
>    this "moved from/moved to" mode and the normal mode of output, it
>    is also OK to have two different callback functions, i.e. the
>    original one to be used in the normal mode, the second one that
>    knows the "these are moved without modification" coloring.  The
>    callback for the first pass is sufficiently different and I think
>    it is better to invent a new callback function to be used in the
>    first pass, instead of reusing fn_out_consume().

The callback doesn't have easy accessible data IMHO,
e.g. we may get these lines in the callback:

(function context not required:)
---8<---
 {
        int i;
        for (i = 0; i < GIT_SHA1_RAWSZ; i++) {
-               unsigned int val;
-               /*
-                * hex[1]=='\0' is caught when val is checked below,
-                * but if hex[0] is NUL we have to avoid reading
-                * past the end of the string:
-                */
-               if (!hex[0])
-                       return -1;
-               val = (hexval(hex[0]) << 4) | hexval(hex[1]);
-               if (val & ~0xff)
+               int val = hex2chr(hex);
+               if (val < 0)
                        return -1;
                *sha1++ = val;
                hex += 2;
---8<---

And for the first + line (int val = hex2chr(hex);) we need
to hash that line and 2 (made up threshold) before and after:

---8<---
        int i;
        for (i = 0; i < GIT_SHA1_RAWSZ; i++) {
               int val = hex2chr(hex);
               if (val < 0)
                        return -1;
---8<---

and this is hard to reconstruct from the above diff fed into the
callback. So we have to do it in the xdl emit phase.

So I think the wrong part of the initial patch 2/2 is the too
late recording of the moved lines.
So rather whenever we call diff_flush_patch(), we have to first
call diff_prepare_moved_line_detection() first for all file pairs.

>
>    The fn_out_consume() function working in the "second pass of
>    moved from/moved to mode" would inspect line[] and see if it is
>    an added or a removed line, and then:
>
>    - if it is an added line, and it appears as a removed line
>      elsewhere in the patchset (you obtained the information in the
>      first pass), you show it as "this was moved from elsewhere".
>
>    - if it is a removed line, and it appears as an added line
>      elsewhere in the patchset (you obtained the information in the
>      first pass), you show it as "this was moved to elsewhere".
>
> Or something like that.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
                   ` (2 preceding siblings ...)
  2016-09-03  7:00 ` [RFC/PATCH 0/2] Color moved code differently Junio C Hamano
@ 2016-09-04  9:57 ` Jacob Keller
  3 siblings, 0 replies; 13+ messages in thread
From: Jacob Keller @ 2016-09-04  9:57 UTC (permalink / raw)
  To: Stefan Beller; +Cc: Git mailing list

On Fri, Sep 2, 2016 at 8:31 PM, Stefan Beller <sbeller@google.com> wrote:
> When moving code (e.g. a function is moved to another part of the file or
> to a different file), the review process is different than reviewing new
> code. When reviewing moved code we are only interested in the diff as
> where there are differences in the moved code, e.g. namespace changes.
>
> However the inner part of these moved texts should not change.
> To aid a developer reviewing such code, we'll color pure moved stuff
> differently.
>
> A line is colored differently if that line and the surroundign 2 lines
> appear as-is in the opposite part of the diff.
>
> Example:
> http://i.imgur.com/ay84q0q.png
>

In the example, the first and last lines of duplicate copies don't get
colored differently, and that threw  me off. I feel like that was
maybe not intentional? If it was, can you explain why?

Thanks,
Jake

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently
  2016-09-04  5:31     ` Stefan Beller
@ 2016-09-04 10:35       ` Jakub Narębski
  0 siblings, 0 replies; 13+ messages in thread
From: Jakub Narębski @ 2016-09-04 10:35 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

W dniu 04.09.2016 o 07:31, Stefan Beller pisze:
> On Sat, Sep 3, 2016 at 5:25 AM, Jakub Narębski <jnareb@gmail.com> wrote:
>> W dniu 03.09.2016 o 05:31, Stefan Beller pisze:
>>
>>> When moving code (e.g. a function is moved to another part of the file or
>>> to a different file), the review process is different than reviewing new
>>> code. When reviewing moved code we are only interested in the diff as
>>> where there are differences in the moved code, e.g. namespace changes.
>>>
>>> However the inner part of these moved texts should not change.
>>> To aid a developer reviewing such code, emit it with a different prefix
>>> than the usual +,- to indicate it is overlapping code.
>>
>> What would be this different prefix?
> 
> I will discard the part of the different prefix as the design of 2/2
> will change.

It would be nice to have at least an option of using different prefix
(or pair of prefixes), as not always it is possible to use color to
markup duplicates.

P.S. BTW. does this work with word-diff?

Best regards,
-- 
Jakub Narębski


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [RFC/PATCH 0/2] Color moved code differently
  2016-09-04  6:41     ` Junio C Hamano
  2016-09-04  8:28       ` Stefan Beller
@ 2016-09-04 22:19       ` Junio C Hamano
  1 sibling, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2016-09-04 22:19 UTC (permalink / raw)
  To: Stefan Beller; +Cc: git

Junio C Hamano <gitster@pobox.com> writes:

> I do not think you should step outside diff_flush().  Only when
> producing textual diff, you would have to run the textual diff
> twice by going over the q twice:
>
>  * The first pass would run diff_flush_patch(), which would call
>    into xdiff the usual way, but the callback from xdiff would
>    capture the removed lines and the added lines without making any
>    output.
>
>  * The second pass would run diff_flush_patch(), but the callback
>    from xdiff would be called with additional information, namely,
>    the removed and the added lines captured in the first pass.
> ...
>    The fn_out_consume() function working in the "second pass of
>    moved from/moved to mode" would inspect line[] and see if it is
>    an added or a removed line, and then:
>
>    - if it is an added line, and it appears as a removed line
>      elsewhere in the patchset (you obtained the information in the
>      first pass), you show it as "this was moved from elsewhere".
>
>    - if it is a removed line, and it appears as an added line
>      elsewhere in the patchset (you obtained the information in the
>      first pass), you show it as "this was moved to elsewhere".
>
> Or something like that.

Actually, the first pass above ends up queuing majority of the diff
output in-core _anyway_, so it would make much more sense not to
rely on the reproducibility of xdiff machinery by not calling it
twice.  Instead, I would imagine that the first pass can queue all
what it receives in the callback, then with the whole-patch picture
at hand, you can call updated fn_out_consume() yourself (instead of
letting xdiff machinery to call you), and that would become the
"second pass".

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2016-09-04 22:20 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-09-03  3:31 [RFC/PATCH 0/2] Color moved code differently Stefan Beller
2016-09-03  3:31 ` [PATCH 1/2] diff.c: emit duplicate lines with a different color Stefan Beller
2016-09-03  3:31 ` [RFC/PATCH 2/2] WIP xdiff: markup duplicates differently Stefan Beller
2016-09-03 12:25   ` Jakub Narębski
2016-09-04  5:31     ` Stefan Beller
2016-09-04 10:35       ` Jakub Narębski
2016-09-04  6:48     ` Junio C Hamano
2016-09-03  7:00 ` [RFC/PATCH 0/2] Color moved code differently Junio C Hamano
2016-09-04  5:23   ` Stefan Beller
2016-09-04  6:41     ` Junio C Hamano
2016-09-04  8:28       ` Stefan Beller
2016-09-04 22:19       ` Junio C Hamano
2016-09-04  9:57 ` Jacob Keller

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.