All of lore.kernel.org
 help / color / mirror / Atom feed
From: Stefan Beller <sbeller@google.com>
To: gitster@pobox.com, git@vger.kernel.org, jacob.keller@gmail.com,
	peff@peff.net
Cc: Stefan Beller <sbeller@google.com>,
	Jacob Keller <jacob.e.keller@intel.com>
Subject: [PATCH 2/2] xdiff: implement empty line chunk heuristic
Date: Tue, 19 Apr 2016 08:21:30 -0700	[thread overview]
Message-ID: <1461079290-6523-3-git-send-email-sbeller@google.com> (raw)
In-Reply-To: <1461079290-6523-1-git-send-email-sbeller@google.com>

In order to produce the smallest possible diff and combine several diff
hunks together, we implement a heuristic from GNU Diff which moves diff
hunks forward as far as possible when we find common context above and
below a diff hunk. This sometimes produces less readable diffs when
writing C, Shell, or other programming languages, ie:

...
 /*
+ *
+ *
+ */
+
+/*
...

instead of the more readable equivalent of

...
+/*
+ *
+ *
+ */
+
 /*
...

Implement the following heuristic to (optionally) produce the desired
output.

  If there are diff chunks which can be shifted around, shift each hunk
  such that the last common empty line is below the chunk with the rest
  of the context above.

This heuristic appears to resolve the above example and several other
common issues without producing significantly weird results. However, as
with any heuristic it is not really known whether this will always be
more optimal. Thus, it can be disabled via diff.compactionHeuristic.

Signed-off-by: Stefan Beller <sbeller@google.com>
Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: Stefan Beller <sbeller@google.com>
---
 Documentation/diff-config.txt  |  5 +++++
 Documentation/diff-options.txt |  6 ++++++
 diff.c                         | 11 +++++++++++
 xdiff/xdiff.h                  |  2 ++
 xdiff/xdiffi.c                 | 26 ++++++++++++++++++++++++++
 5 files changed, 50 insertions(+)

diff --git a/Documentation/diff-config.txt b/Documentation/diff-config.txt
index 6eaa452..9bf3e92 100644
--- a/Documentation/diff-config.txt
+++ b/Documentation/diff-config.txt
@@ -166,6 +166,11 @@ diff.tool::
 
 include::mergetools-diff.txt[]
 
+diff.compactionHeuristic::
+	Set this option to enable an experimental heuristic that
+	shifts the hunk boundary in an attempt to make the resulting
+	patch easier to read.
+
 diff.algorithm::
 	Choose a diff algorithm.  The variants are as follows:
 +
diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 3ad6404..b513023 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -63,6 +63,12 @@ ifndef::git-format-patch[]
 	Synonym for `-p --raw`.
 endif::git-format-patch[]
 
+--compaction-heuristic::
+--no-compaction-heuristic::
+	These are to help debugging and tuning an experimental
+	heuristic that shifts the hunk boundary in an attempt to
+	make the resulting patch easier to read.
+
 --minimal::
 	Spend extra time to make sure the smallest possible
 	diff is produced.
diff --git a/diff.c b/diff.c
index f62b7f7..05ca3ce 100644
--- a/diff.c
+++ b/diff.c
@@ -25,6 +25,7 @@
 #endif
 
 static int diff_detect_rename_default;
+static int diff_compaction_heuristic = 1;
 static int diff_rename_limit_default = 400;
 static int diff_suppress_blank_empty;
 static int diff_use_color_default = -1;
@@ -181,6 +182,10 @@ int git_diff_ui_config(const char *var, const char *value, void *cb)
 	}
 	if (!strcmp(var, "diff.renames")) {
 		diff_detect_rename_default = git_config_rename(var, value);
+		return 0;
+	}
+	if (!strcmp(var, "diff.compactionheuristic")) {
+		diff_compaction_heuristic = git_config_bool(var, value);
 		return 0;
 	}
 	if (!strcmp(var, "diff.autorefreshindex")) {
@@ -3235,6 +3240,8 @@ void diff_setup(struct diff_options *options)
 	options->use_color = diff_use_color_default;
 	options->detect_rename = diff_detect_rename_default;
 	options->xdl_opts |= diff_algorithm;
+	if (diff_compaction_heuristic)
+		DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
 
 	options->orderfile = diff_order_file_cfg;
 
@@ -3712,6 +3719,10 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 		DIFF_XDL_SET(options, IGNORE_WHITESPACE_AT_EOL);
 	else if (!strcmp(arg, "--ignore-blank-lines"))
 		DIFF_XDL_SET(options, IGNORE_BLANK_LINES);
+	else if (!strcmp(arg, "--compaction-heuristic"))
+		DIFF_XDL_SET(options, COMPACTION_HEURISTIC);
+	else if (!strcmp(arg, "--no-compaction-heuristic"))
+		DIFF_XDL_CLR(options, COMPACTION_HEURISTIC);
 	else if (!strcmp(arg, "--patience"))
 		options->xdl_opts = DIFF_WITH_ALG(options, PATIENCE_DIFF);
 	else if (!strcmp(arg, "--histogram"))
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index c033991..d1dbb27 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -41,6 +41,8 @@ extern "C" {
 
 #define XDF_IGNORE_BLANK_LINES (1 << 7)
 
+#define XDF_COMPACTION_HEURISTIC (1 << 8)
+
 #define XDL_EMIT_FUNCNAMES (1 << 0)
 #define XDL_EMIT_COMMON (1 << 1)
 #define XDL_EMIT_FUNCCONTEXT (1 << 2)
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 748eeb9..b3c6848 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -400,6 +400,11 @@ static xdchange_t *xdl_add_change(xdchange_t *xscr, long i1, long i2, long chg1,
 }
 
 
+static int is_blank_line(xrecord_t **recs, long ix, long flags)
+{
+	return xdl_blankline(recs[ix]->ptr, recs[ix]->size, flags);
+}
+
 static int recs_match(xrecord_t **recs, long ixs, long ix, long flags)
 {
 	return (recs[ixs]->ha == recs[ix]->ha &&
@@ -411,6 +416,7 @@ static int recs_match(xrecord_t **recs, long ixs, long ix, long flags)
 int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 	long ix, ixo, ixs, ixref, grpsiz, nrec = xdf->nrec;
 	char *rchg = xdf->rchg, *rchgo = xdfo->rchg;
+	unsigned int blank_lines;
 	xrecord_t **recs = xdf->recs;
 
 	/*
@@ -444,6 +450,7 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 
 		do {
 			grpsiz = ix - ixs;
+			blank_lines = 0;
 
 			/*
 			 * If the line before the current change group, is equal to
@@ -478,6 +485,8 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 			 * the group.
 			 */
 			while (ix < nrec && recs_match(recs, ixs, ix, flags)) {
+				blank_lines += is_blank_line(recs, ix, flags);
+
 				rchg[ixs++] = 0;
 				rchg[ix++] = 1;
 
@@ -503,6 +512,23 @@ int xdl_change_compact(xdfile_t *xdf, xdfile_t *xdfo, long flags) {
 			rchg[--ixs] = 1;
 			rchg[--ix] = 0;
 			while (rchgo[--ixo]);
+		}
+
+		/*
+		 * If a group can be moved back and forth, see if there is a
+		 * blank line in the moving space. If there is a blank line,
+		 * make sure the last blank line is the end of the group.
+		 *
+		 * As we already shifted the group forward as far as possible
+		 * in the earlier loop, we need to shift it back only if at all.
+		 */
+		if ((flags & XDF_COMPACTION_HEURISTIC) && blank_lines) {
+			while (ixs > 0 &&
+			       !is_blank_line(recs, ix - 1, flags) &&
+			       recs_match(recs, ixs - 1, ix - 1, flags)) {
+				rchg[--ixs] = 1;
+				rchg[--ix] = 0;
+			}
 		}
 	}
 
-- 
2.4.11.2.g96ed4e5.dirty

  parent reply	other threads:[~2016-04-19 15:21 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-04-19 15:21 [PATCHv5 0/2] xdiff: implement empty line chunk heuristic Stefan Beller
2016-04-19 15:21 ` [PATCH 1/2] xdiff: add recs_match helper function Stefan Beller
2016-04-19 15:21 ` Stefan Beller [this message]
     [not found]   ` <CA+P7+xoqn3fxEZGn02ST1XV-2UpQGr3iwV-37R8pakFJy_9n0w@mail.gmail.com>
2016-04-20  4:18     ` [PATCH 2/2] xdiff: implement empty line chunk heuristic Jeff King
2016-04-20  4:37       ` Jeff King
2016-04-20  4:37       ` Stefan Beller
2016-04-29 20:29       ` Junio C Hamano
2016-04-29 20:59         ` Jacob Keller
2016-04-29 22:18           ` Junio C Hamano
2016-04-29 22:35             ` Stefan Beller
2016-04-29 22:39               ` Keller, Jacob E
2016-04-29 22:44                 ` Stefan Beller
2016-04-29 22:48                   ` Keller, Jacob E
2016-05-02 17:40                     ` Junio C Hamano
2016-05-02 17:45                       ` Stefan Beller
2016-05-02 18:02                       ` Jeff King
2016-05-03 17:55                         ` Jacob Keller
2016-04-30  3:06               ` Jeff King
2016-04-19 16:23 ` [PATCHv5 0/2] " Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2016-04-18 21:12 [PATCH 0/2 v4] " Stefan Beller
2016-04-18 21:12 ` [PATCH 2/2] " Stefan Beller
2016-04-18 22:04   ` Jacob Keller
2016-04-18 22:24     ` Junio C Hamano
2016-04-19  5:03   ` Jeff King
2016-04-19  6:47     ` Stefan Beller
2016-04-19  7:00       ` Jeff King
2016-04-19  7:05         ` Stefan Beller
2016-04-19 15:17     ` Stefan Beller
2016-04-19 17:06       ` Jeff King
2016-04-19 23:02         ` Jacob Keller
2016-04-19 23:07           ` Junio C Hamano
2016-04-20 13:12             ` Michael S. Tsirkin
2016-04-20 16:09               ` Junio C Hamano
2016-04-20 16:17                 ` Jeff King
2016-04-20  6:00         ` Junio C Hamano
2016-04-19 16:51     ` Junio C Hamano
2016-04-15 23:01 [RFC PATCH, WAS: "weird diff output?" v3a 0/2] implement shortest line diff " Stefan Beller
2016-04-15 23:01 ` [PATCH 2/2] xdiff: implement empty line " Stefan Beller
2016-04-15 23:05   ` Jacob Keller
2016-04-15 23:32     ` Jacob Keller
2016-04-15 23:45       ` Stefan Beller
2016-04-16  0:49   ` Junio C Hamano
2016-04-16  0:59     ` Stefan Beller
2016-04-16  1:07     ` Jacob Keller
2016-04-18 19:22       ` Junio C Hamano
2016-04-18 19:33         ` Stefan Beller

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1461079290-6523-3-git-send-email-sbeller@google.com \
    --to=sbeller@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jacob.e.keller@intel.com \
    --cc=jacob.keller@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.