git.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: Derrick Stolee <dstolee@microsoft.com>,
	Jonathan Tan <jonathantanmy@google.com>,
	Taylor Blau <me@ttaylorr.com>, Junio C Hamano <gitster@pobox.com>,
	Jeff King <peff@peff.net>, Elijah Newren <newren@gmail.com>,
	Derrick Stolee <stolee@gmail.com>,
	Elijah Newren <newren@gmail.com>,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH v4 6/6] merge-ort: call diffcore_rename() directly
Date: Thu, 11 Feb 2021 08:15:49 +0000	[thread overview]
Message-ID: <fedb3d323d948dfdc6e40e036563fd56983f0ad6.1613031350.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.843.v4.git.1613031350.gitgitgadget@gmail.com>

From: Elijah Newren <newren@gmail.com>

We want to pass additional information to diffcore_rename() (or some
variant thereof) without plumbing that extra information through
diff_tree_oid() and diffcore_std().  Further, since we will need to
gather additional special information related to diffs and are walking
the trees anyway in collect_merge_info(), it seems odd to have
diff_tree_oid()/diffcore_std() repeat those tree walks.  And there may
be times where we can avoid traversing into a subtree in
collect_merge_info() (based on additional information at our disposal),
that the basic diff logic would be unable to take advantage of.  For all
these reasons, just create the add and delete pairs ourself and then
call diffcore_rename() directly.

This change is primarily about enabling future optimizations; the
advantage of avoiding extra tree traversals is small compared to the
cost of rename detection, and the advantage of avoiding the extra tree
traversals is somewhat offset by the extra time spent in
collect_merge_info() collecting the additional data anyway.  However...

For the testcases mentioned in commit 557ac0350d ("merge-ort: begin
performance work; instrument with trace2_region_* calls", 2020-10-28),
this change improves the performance as follows:

                            Before                  After
    no-renames:       13.294 s ±  0.103 s    12.775 s ±  0.062 s
    mega-renames:    187.248 s ±  0.882 s   188.754 s ±  0.284 s
    just-one-mega:     5.557 s ±  0.017 s     5.599 s ±  0.019 s

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 merge-ort.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 59 insertions(+), 7 deletions(-)

diff --git a/merge-ort.c b/merge-ort.c
index 931b91438cf1..603d30c52170 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -535,6 +535,23 @@ static void setup_path_info(struct merge_options *opt,
 	result->util = mi;
 }
 
+static void add_pair(struct merge_options *opt,
+		     struct name_entry *names,
+		     const char *pathname,
+		     unsigned side,
+		     unsigned is_add /* if false, is_delete */)
+{
+	struct diff_filespec *one, *two;
+	struct rename_info *renames = &opt->priv->renames;
+	int names_idx = is_add ? side : 0;
+
+	one = alloc_filespec(pathname);
+	two = alloc_filespec(pathname);
+	fill_filespec(is_add ? two : one,
+		      &names[names_idx].oid, 1, names[names_idx].mode);
+	diff_queue(&renames->pairs[side], one, two);
+}
+
 static void collect_rename_info(struct merge_options *opt,
 				struct name_entry *names,
 				const char *dirname,
@@ -544,6 +561,7 @@ static void collect_rename_info(struct merge_options *opt,
 				unsigned match_mask)
 {
 	struct rename_info *renames = &opt->priv->renames;
+	unsigned side;
 
 	/* Update dirs_removed, as needed */
 	if (dirmask == 1 || dirmask == 3 || dirmask == 5) {
@@ -554,6 +572,21 @@ static void collect_rename_info(struct merge_options *opt,
 		if (sides & 2)
 			strset_add(&renames->dirs_removed[2], fullname);
 	}
+
+	if (filemask == 0 || filemask == 7)
+		return;
+
+	for (side = MERGE_SIDE1; side <= MERGE_SIDE2; ++side) {
+		unsigned side_mask = (1 << side);
+
+		/* Check for deletion on side */
+		if ((filemask & 1) && !(filemask & side_mask))
+			add_pair(opt, names, fullname, side, 0 /* delete */);
+
+		/* Check for addition on side */
+		if (!(filemask & 1) && (filemask & side_mask))
+			add_pair(opt, names, fullname, side, 1 /* add */);
+	}
 }
 
 static int collect_merge_info_callback(int n,
@@ -2079,6 +2112,27 @@ static int process_renames(struct merge_options *opt,
 	return clean_merge;
 }
 
+static void resolve_diffpair_statuses(struct diff_queue_struct *q)
+{
+	/*
+	 * A simplified version of diff_resolve_rename_copy(); would probably
+	 * just use that function but it's static...
+	 */
+	int i;
+	struct diff_filepair *p;
+
+	for (i = 0; i < q->nr; ++i) {
+		p = q->queue[i];
+		p->status = 0; /* undecided */
+		if (!DIFF_FILE_VALID(p->one))
+			p->status = DIFF_STATUS_ADDED;
+		else if (!DIFF_FILE_VALID(p->two))
+			p->status = DIFF_STATUS_DELETED;
+		else if (DIFF_PAIR_RENAME(p))
+			p->status = DIFF_STATUS_RENAMED;
+	}
+}
+
 static int compare_pairs(const void *a_, const void *b_)
 {
 	const struct diff_filepair *a = *((const struct diff_filepair **)a_);
@@ -2089,8 +2143,6 @@ static int compare_pairs(const void *a_, const void *b_)
 
 /* Call diffcore_rename() to compute which files have changed on given side */
 static void detect_regular_renames(struct merge_options *opt,
-				   struct tree *merge_base,
-				   struct tree *side,
 				   unsigned side_index)
 {
 	struct diff_options diff_opts;
@@ -2108,11 +2160,11 @@ static void detect_regular_renames(struct merge_options *opt,
 	diff_opts.output_format = DIFF_FORMAT_NO_OUTPUT;
 	diff_setup_done(&diff_opts);
 
+	diff_queued_diff = renames->pairs[side_index];
 	trace2_region_enter("diff", "diffcore_rename", opt->repo);
-	diff_tree_oid(&merge_base->object.oid, &side->object.oid, "",
-		      &diff_opts);
-	diffcore_std(&diff_opts);
+	diffcore_rename(&diff_opts);
 	trace2_region_leave("diff", "diffcore_rename", opt->repo);
+	resolve_diffpair_statuses(&diff_queued_diff);
 
 	if (diff_opts.needed_rename_limit > renames->needed_limit)
 		renames->needed_limit = diff_opts.needed_rename_limit;
@@ -2212,8 +2264,8 @@ static int detect_and_process_renames(struct merge_options *opt,
 	memset(&combined, 0, sizeof(combined));
 
 	trace2_region_enter("merge", "regular renames", opt->repo);
-	detect_regular_renames(opt, merge_base, side1, MERGE_SIDE1);
-	detect_regular_renames(opt, merge_base, side2, MERGE_SIDE2);
+	detect_regular_renames(opt, MERGE_SIDE1);
+	detect_regular_renames(opt, MERGE_SIDE2);
 	trace2_region_leave("merge", "regular renames", opt->repo);
 
 	trace2_region_enter("merge", "directory renames", opt->repo);
-- 
gitgitgadget

  parent reply	other threads:[~2021-02-11  8:17 UTC|newest]

Thread overview: 71+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-06 22:52 [PATCH 0/3] Optimization batch 7: use file basenames to guide rename detection Elijah Newren via GitGitGadget
2021-02-06 22:52 ` [PATCH 1/3] diffcore-rename: compute basenames of all source and dest candidates Elijah Newren via GitGitGadget
2021-02-06 22:52 ` [PATCH 2/3] diffcore-rename: complete find_basename_matches() Elijah Newren via GitGitGadget
2021-02-06 22:52 ` [PATCH 3/3] diffcore-rename: guide inexact rename detection based on basenames Elijah Newren via GitGitGadget
2021-02-07 14:38   ` Derrick Stolee
2021-02-07 19:51     ` Junio C Hamano
2021-02-08  8:38       ` Elijah Newren
2021-02-08 11:43         ` Derrick Stolee
2021-02-08 16:25           ` Elijah Newren
2021-02-08 17:37         ` Junio C Hamano
2021-02-08 22:00           ` Elijah Newren
2021-02-08 23:43             ` Junio C Hamano
2021-02-08 23:52               ` Elijah Newren
2021-02-08  8:27     ` Elijah Newren
2021-02-08 11:31       ` Derrick Stolee
2021-02-08 16:09         ` Elijah Newren
2021-02-07  5:19 ` [PATCH 0/3] Optimization batch 7: use file basenames to guide rename detection Junio C Hamano
2021-02-07  6:05   ` Elijah Newren
2021-02-09 11:32 ` [PATCH v2 0/4] " Elijah Newren via GitGitGadget
2021-02-09 11:32   ` [PATCH v2 1/4] diffcore-rename: compute basenames of all source and dest candidates Elijah Newren via GitGitGadget
2021-02-09 13:17     ` Derrick Stolee
2021-02-09 16:56       ` Elijah Newren
2021-02-09 17:02         ` Derrick Stolee
2021-02-09 17:42           ` Elijah Newren
2021-02-09 11:32   ` [PATCH v2 2/4] diffcore-rename: complete find_basename_matches() Elijah Newren via GitGitGadget
2021-02-09 13:25     ` Derrick Stolee
2021-02-09 17:17       ` Elijah Newren
2021-02-09 17:34         ` Derrick Stolee
2021-02-09 11:32   ` [PATCH v2 3/4] diffcore-rename: guide inexact rename detection based on basenames Elijah Newren via GitGitGadget
2021-02-09 13:33     ` Derrick Stolee
2021-02-09 17:41       ` Elijah Newren
2021-02-09 18:59         ` Junio C Hamano
2021-02-09 11:32   ` [PATCH v2 4/4] gitdiffcore doc: mention new preliminary step for rename detection Elijah Newren via GitGitGadget
2021-02-09 12:59     ` Derrick Stolee
2021-02-09 17:03       ` Junio C Hamano
2021-02-09 17:44         ` Elijah Newren
2021-02-10 15:15   ` [PATCH v3 0/5] Optimization batch 7: use file basenames to guide " Elijah Newren via GitGitGadget
2021-02-10 15:15     ` [PATCH v3 1/5] t4001: add a test comparing basename similarity and content similarity Elijah Newren via GitGitGadget
2021-02-13  1:15       ` Junio C Hamano
2021-02-13  4:50         ` Elijah Newren
2021-02-13 23:56           ` Junio C Hamano
2021-02-14  1:24             ` Elijah Newren
2021-02-14  1:32               ` Junio C Hamano
2021-02-14  3:14                 ` Elijah Newren
2021-02-10 15:15     ` [PATCH v3 2/5] diffcore-rename: compute basenames of all source and dest candidates Elijah Newren via GitGitGadget
2021-02-13  1:32       ` Junio C Hamano
2021-02-10 15:15     ` [PATCH v3 3/5] diffcore-rename: complete find_basename_matches() Elijah Newren via GitGitGadget
2021-02-13  1:48       ` Junio C Hamano
2021-02-13 18:34         ` Elijah Newren
2021-02-13 23:55           ` Junio C Hamano
2021-02-14  3:08             ` Elijah Newren
2021-02-10 15:15     ` [PATCH v3 4/5] diffcore-rename: guide inexact rename detection based on basenames Elijah Newren via GitGitGadget
2021-02-13  1:49       ` Junio C Hamano
2021-02-10 15:15     ` [PATCH v3 5/5] gitdiffcore doc: mention new preliminary step for rename detection Elijah Newren via GitGitGadget
2021-02-10 16:41       ` Junio C Hamano
2021-02-10 17:20         ` Elijah Newren
2021-02-11  8:15     ` [PATCH v4 0/6] Optimization batch 7: use file basenames to guide " Elijah Newren via GitGitGadget
2021-02-11  8:15       ` [PATCH v4 1/6] t4001: add a test comparing basename similarity and content similarity Elijah Newren via GitGitGadget
2021-02-11  8:15       ` [PATCH v4 2/6] diffcore-rename: compute basenames of all source and dest candidates Elijah Newren via GitGitGadget
2021-02-11  8:15       ` [PATCH v4 3/6] diffcore-rename: complete find_basename_matches() Elijah Newren via GitGitGadget
2021-02-11  8:15       ` [PATCH v4 4/6] diffcore-rename: guide inexact rename detection based on basenames Elijah Newren via GitGitGadget
2021-02-11  8:15       ` [PATCH v4 5/6] gitdiffcore doc: mention new preliminary step for rename detection Elijah Newren via GitGitGadget
2021-02-11  8:15       ` Elijah Newren via GitGitGadget [this message]
2021-02-13  1:53       ` [PATCH v4 0/6] Optimization batch 7: use file basenames to guide " Junio C Hamano
2021-02-14  7:51       ` [PATCH v5 " Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 1/6] t4001: add a test comparing basename similarity and content similarity Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 2/6] diffcore-rename: compute basenames of source and dest candidates Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 3/6] diffcore-rename: complete find_basename_matches() Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 4/6] diffcore-rename: guide inexact rename detection based on basenames Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 5/6] gitdiffcore doc: mention new preliminary step for rename detection Elijah Newren via GitGitGadget
2021-02-14  7:51         ` [PATCH v5 6/6] merge-ort: call diffcore_rename() directly Elijah Newren via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=fedb3d323d948dfdc6e40e036563fd56983f0ad6.1613031350.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jonathantanmy@google.com \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    --cc=peff@peff.net \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).