All of lore.kernel.org
 help / color / mirror / Atom feed
From: "Elijah Newren via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: "Derrick Stolee" <dstolee@microsoft.com>,
	"Jonathan Tan" <jonathantanmy@google.com>,
	"Taylor Blau" <me@ttaylorr.com>, "René Scharfe" <l.s.r@web.de>,
	"Elijah Newren" <newren@gmail.com>,
	"Elijah Newren" <newren@gmail.com>,
	"Elijah Newren" <newren@gmail.com>
Subject: [PATCH v2 3/5] diffcore-rename: enable limiting rename detection to relevant destinations
Date: Tue, 01 Jun 2021 14:58:33 +0000	[thread overview]
Message-ID: <45e1de5fe7808f5297d5e33d14c239d74de33bdc.1622559516.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.962.v2.git.1622559516.gitgitgadget@gmail.com>

From: Elijah Newren <newren@gmail.com>

Our former optimizations focused on limiting rename detection to a
pre-specified set of relevant sources.  This was because the merge logic
only had a way of knowing which sources were relevant.  However, other
callers of rename detection might benefit from being able to limit
rename detection to a known set of relevant destinations.  In
particular, a properly implemented `git log --follow` might benefit from
such an ability.

Since the code to implement such limiting is very similar to what we've
already done, just implement it now even though we do not yet have any
callers making use of this ability.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 diffcore-rename.c | 48 +++++++++++++++++++++++++++++++++++++++++------
 diffcore.h        |  2 ++
 merge-ort.c       |  1 +
 3 files changed, 45 insertions(+), 6 deletions(-)

diff --git a/diffcore-rename.c b/diffcore-rename.c
index e333a6d64791..8ff83a9f3b99 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -372,6 +372,7 @@ struct dir_rename_info {
 	struct strmap dir_rename_guess;
 	struct strmap *dir_rename_count;
 	struct strintmap *relevant_source_dirs;
+	struct strset *relevant_destination_dirs;
 	unsigned setup;
 };
 
@@ -491,8 +492,11 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
 		    !strintmap_contains(info->relevant_source_dirs, old_dir))
 			break;
 
-		/* Get new_dir */
+		/* Get new_dir, skip if its directory isn't relevant. */
 		dirname_munge(new_dir);
+		if (info->relevant_destination_dirs &&
+		    !strset_contains(info->relevant_destination_dirs, new_dir))
+			break;
 
 		/*
 		 * When renaming
@@ -567,6 +571,7 @@ static void update_dir_rename_counts(struct dir_rename_info *info,
 
 static void initialize_dir_rename_info(struct dir_rename_info *info,
 				       struct strintmap *relevant_sources,
+				       struct strset *relevant_destinations,
 				       struct strintmap *dirs_removed,
 				       struct strmap *dir_rename_count,
 				       struct strmap *cached_pairs)
@@ -575,7 +580,7 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
 	struct strmap_entry *entry;
 	int i;
 
-	if (!dirs_removed && !relevant_sources) {
+	if (!dirs_removed && !relevant_sources && !relevant_destinations) {
 		info->setup = 0;
 		return;
 	}
@@ -589,6 +594,18 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
 	strintmap_init_with_options(&info->idx_map, -1, NULL, 0);
 	strmap_init_with_options(&info->dir_rename_guess, NULL, 0);
 
+	/* Setup info->relevant_destination_dirs */
+	info->relevant_destination_dirs = NULL;
+	if (relevant_destinations) {
+		info->relevant_destination_dirs = xmalloc(sizeof(struct strset));
+		strset_init(info->relevant_destination_dirs);
+		strset_for_each_entry(relevant_destinations, &iter, entry) {
+			char *dirname = get_dirname(entry->key);
+			strset_add(info->relevant_destination_dirs, dirname);
+			free(dirname);
+		}
+	}
+
 	/* Setup info->relevant_source_dirs */
 	info->relevant_source_dirs = NULL;
 	if (dirs_removed || !relevant_sources) {
@@ -700,6 +717,12 @@ static void cleanup_dir_rename_info(struct dir_rename_info *info,
 		FREE_AND_NULL(info->relevant_source_dirs);
 	}
 
+	/* relevant_destination_dirs */
+	if (info->relevant_destination_dirs) {
+		strset_clear(info->relevant_destination_dirs);
+		FREE_AND_NULL(info->relevant_destination_dirs);
+	}
+
 	/* dir_rename_count */
 	if (!keep_dir_rename_count) {
 		partial_clear_dir_rename_count(info->dir_rename_count);
@@ -827,6 +850,7 @@ static int find_basename_matches(struct diff_options *options,
 				 int minimum_score,
 				 struct dir_rename_info *info,
 				 struct strintmap *relevant_sources,
+				 struct strset *relevant_destinations,
 				 struct strintmap *dirs_removed)
 {
 	/*
@@ -949,9 +973,15 @@ static int find_basename_matches(struct diff_options *options,
 			if (rename_dst[dst_index].is_rename)
 				continue; /* already used previously */
 
-			/* Estimate the similarity */
 			one = rename_src[src_index].p->one;
 			two = rename_dst[dst_index].p->two;
+
+			/* Skip irrelevant destinations */
+			if (relevant_destinations &&
+			    !strset_contains(relevant_destinations, two->path))
+				continue;
+
+			/* Estimate the similarity */
 			score = estimate_similarity(options->repo, one, two,
 						    minimum_score, skip_unmodified);
 
@@ -1258,6 +1288,7 @@ static void handle_early_known_dir_renames(struct dir_rename_info *info,
 
 void diffcore_rename_extended(struct diff_options *options,
 			      struct strintmap *relevant_sources,
+			      struct strset *relevant_destinations,
 			      struct strintmap *dirs_removed,
 			      struct strmap *dir_rename_count,
 			      struct strmap *cached_pairs)
@@ -1376,8 +1407,8 @@ void diffcore_rename_extended(struct diff_options *options,
 		/* Preparation for basename-driven matching. */
 		trace2_region_enter("diff", "dir rename setup", options->repo);
 		initialize_dir_rename_info(&info, relevant_sources,
-					   dirs_removed, dir_rename_count,
-					   cached_pairs);
+					   relevant_destinations, dirs_removed,
+					   dir_rename_count, cached_pairs);
 		trace2_region_leave("diff", "dir rename setup", options->repo);
 
 		/* Utilize file basenames to quickly find renames. */
@@ -1386,6 +1417,7 @@ void diffcore_rename_extended(struct diff_options *options,
 						      min_basename_score,
 						      &info,
 						      relevant_sources,
+						      relevant_destinations,
 						      dirs_removed);
 		trace2_region_leave("diff", "basename matches", options->repo);
 
@@ -1441,6 +1473,10 @@ void diffcore_rename_extended(struct diff_options *options,
 		if (rename_dst[i].is_rename)
 			continue; /* exact or basename match already handled */
 
+		if (relevant_destinations &&
+		    !strset_contains(relevant_destinations, two->path))
+			continue;
+
 		m = &mx[dst_cnt * NUM_CANDIDATE_PER_DST];
 		for (j = 0; j < NUM_CANDIDATE_PER_DST; j++)
 			m[j].dst = -1;
@@ -1574,5 +1610,5 @@ void diffcore_rename_extended(struct diff_options *options,
 
 void diffcore_rename(struct diff_options *options)
 {
-	diffcore_rename_extended(options, NULL, NULL, NULL, NULL);
+	diffcore_rename_extended(options, NULL, NULL, NULL, NULL, NULL);
 }
diff --git a/diffcore.h b/diffcore.h
index 533b30e21e7f..435c7094f403 100644
--- a/diffcore.h
+++ b/diffcore.h
@@ -10,6 +10,7 @@ struct diff_options;
 struct repository;
 struct strintmap;
 struct strmap;
+struct strset;
 struct userdiff_driver;
 
 /* This header file is internal between diff.c and its diff transformers
@@ -180,6 +181,7 @@ void diffcore_break(struct repository *, int);
 void diffcore_rename(struct diff_options *);
 void diffcore_rename_extended(struct diff_options *options,
 			      struct strintmap *relevant_sources,
+			      struct strset *relevant_destinations,
 			      struct strintmap *dirs_removed,
 			      struct strmap *dir_rename_count,
 			      struct strmap *cached_pairs);
diff --git a/merge-ort.c b/merge-ort.c
index 0fe2eaf02eb2..28951c35ddc2 100644
--- a/merge-ort.c
+++ b/merge-ort.c
@@ -2568,6 +2568,7 @@ static void detect_regular_renames(struct merge_options *opt,
 	trace2_region_enter("diff", "diffcore_rename", opt->repo);
 	diffcore_rename_extended(&diff_opts,
 				 &renames->relevant_sources[side_index],
+				 NULL,
 				 &renames->dirs_removed[side_index],
 				 &renames->dir_rename_count[side_index],
 				 &renames->cached_pairs[side_index]);
-- 
gitgitgadget


  parent reply	other threads:[~2021-06-01 14:58 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-05-27  8:37 [PATCH 0/5] Optimization batch 12: miscellaneous unthemed stuff Elijah Newren via GitGitGadget
2021-05-27  8:37 ` [PATCH 1/5] merge-ort: replace string_list_df_name_compare with faster alternative Elijah Newren via GitGitGadget
2021-05-27 21:00   ` René Scharfe
2021-05-27 22:47     ` Elijah Newren
2021-05-28 16:12       ` René Scharfe
2021-05-28 18:09         ` Elijah Newren
2021-05-28  1:32   ` Taylor Blau
2021-05-28  4:10     ` Elijah Newren
2021-05-27  8:37 ` [PATCH 2/5] diffcore-rename: avoid unnecessary strdup'ing in break_idx Elijah Newren via GitGitGadget
2021-05-27  8:37 ` [PATCH 3/5] diffcore-rename: enable limiting rename detection to relevant destinations Elijah Newren via GitGitGadget
2021-05-27  8:37 ` [PATCH 4/5] Fix various issues found in comments Elijah Newren via GitGitGadget
2021-05-27  8:37 ` [PATCH 5/5] merge-ort: miscellaneous touch-ups Elijah Newren via GitGitGadget
2021-06-01 14:58 ` [PATCH v2 0/5] Optimization batch 12: miscellaneous unthemed stuff Elijah Newren via GitGitGadget
2021-06-01 14:58   ` [PATCH v2 1/5] merge-ort: replace string_list_df_name_compare with faster alternative Elijah Newren via GitGitGadget
2021-06-02 11:29     ` Derrick Stolee
2021-06-01 14:58   ` [PATCH v2 2/5] diffcore-rename: avoid unnecessary strdup'ing in break_idx Elijah Newren via GitGitGadget
2021-06-01 14:58   ` Elijah Newren via GitGitGadget [this message]
2021-06-03 12:54     ` [PATCH v2 3/5] diffcore-rename: enable limiting rename detection to relevant destinations Derrick Stolee
2021-06-03 14:13       ` Elijah Newren
2021-06-01 14:58   ` [PATCH v2 4/5] Fix various issues found in comments Elijah Newren via GitGitGadget
2021-06-01 14:58   ` [PATCH v2 5/5] merge-ort: miscellaneous touch-ups Elijah Newren via GitGitGadget
2021-06-03 12:55   ` [PATCH v2 0/5] Optimization batch 12: miscellaneous unthemed stuff Derrick Stolee
2021-06-04  4:39   ` [PATCH v3 0/4] " Elijah Newren via GitGitGadget
2021-06-04  4:39     ` [PATCH v3 1/4] merge-ort: replace string_list_df_name_compare with faster alternative Elijah Newren via GitGitGadget
2021-06-04  4:39     ` [PATCH v3 2/4] diffcore-rename: avoid unnecessary strdup'ing in break_idx Elijah Newren via GitGitGadget
2021-06-04  4:39     ` [PATCH v3 3/4] Fix various issues found in comments Elijah Newren via GitGitGadget
2021-06-04  4:39     ` [PATCH v3 4/4] merge-ort: miscellaneous touch-ups Elijah Newren via GitGitGadget
2021-06-04 13:11     ` [PATCH v3 0/4] Optimization batch 12: miscellaneous unthemed stuff Derrick Stolee
2021-06-04 15:48       ` Elijah Newren
2021-06-04 16:30         ` Elijah Newren
2021-06-04 16:35         ` Jeff King
2021-06-04 18:42           ` Derrick Stolee
2021-06-04 19:43             ` Elijah Newren
2021-06-04 19:53             ` Jeff King
2021-06-08 16:11     ` [PATCH v4 " Elijah Newren via GitGitGadget
2021-06-08 16:11       ` [PATCH v4 1/4] merge-ort: replace string_list_df_name_compare with faster alternative Elijah Newren via GitGitGadget
2021-06-08 16:11       ` [PATCH v4 2/4] diffcore-rename: avoid unnecessary strdup'ing in break_idx Elijah Newren via GitGitGadget
2021-06-08 16:11       ` [PATCH v4 3/4] Fix various issues found in comments Elijah Newren via GitGitGadget
2021-06-08 16:11       ` [PATCH v4 4/4] merge-ort: miscellaneous touch-ups Elijah Newren via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=45e1de5fe7808f5297d5e33d14c239d74de33bdc.1622559516.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=dstolee@microsoft.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=l.s.r@web.de \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.