All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v1] add status config and command line options for rename detection
@ 2018-05-09 14:42 Ben Peart
  2018-05-09 15:59 ` Duy Nguyen
                   ` (4 more replies)
  0 siblings, 5 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-09 14:42 UTC (permalink / raw)
  To: git
  Cc: Ben Peart, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass, Ben Peart

Add a new config status.renames setting to enable turning off rename detection
during status.  This setting will default to the value of diff.renames.

Add a new config status.renamelimit setting to to enable bounding the time spent
finding out inexact renames during status.  This setting will default to the
value of diff.renamelimit.

Add status --no-renames command line option that enables overriding the config
setting from the command line. Add --find-renames[=<n>] to enable detecting
renames and optionaly setting the similarity index from the command line.

Origional-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
---

Notes:
    Base Ref:
    Web-Diff: https://github.com/benpeart/git/commit/aa977d2964
    Checkout: git fetch https://github.com/benpeart/git status-renames-v1 && git checkout aa977d2964

 Documentation/config.txt |  9 ++++
 builtin/commit.c         | 57 +++++++++++++++++++++++++
 diff.c                   |  2 +-
 diff.h                   |  1 +
 t/t7525-status-rename.sh | 90 ++++++++++++++++++++++++++++++++++++++++
 wt-status.c              | 12 ++++++
 wt-status.h              |  4 +-
 7 files changed, 173 insertions(+), 2 deletions(-)
 create mode 100644 t/t7525-status-rename.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb3..b79b83c587 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3119,6 +3119,15 @@ status.displayCommentPrefix::
 	behavior of linkgit:git-status[1] in Git 1.8.4 and previous.
 	Defaults to false.
 
+status.renameLimit::
+	The number of files to consider when performing rename detection;
+	if not specified, defaults to the value of diff.renameLimit.
+
+status.renames::
+	Whether and how Git detects renames.  If set to "false",
+	rename detection is disabled. If set to "true", basic rename
+	detection is enabled.  Defaults to the value of diff.renames.
+
 status.showStash::
 	If set to true, linkgit:git-status[1] will display the number of
 	entries currently stashed away.
diff --git a/builtin/commit.c b/builtin/commit.c
index 5240f11225..a545096ddd 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -109,6 +109,10 @@ static int have_option_m;
 static struct strbuf message = STRBUF_INIT;
 
 static enum wt_status_format status_format = STATUS_FORMAT_UNSPECIFIED;
+static int diff_detect_rename = -1;
+static int status_detect_rename = -1;
+static int diff_rename_limit = -1;
+static int status_rename_limit = -1;
 
 static int opt_parse_porcelain(const struct option *opt, const char *arg, int unset)
 {
@@ -143,6 +147,16 @@ static int opt_parse_m(const struct option *opt, const char *arg, int unset)
 	return 0;
 }
 
+static int opt_parse_rename_score(const struct option *opt, const char *arg, int unset)
+{
+	const char **value = opt->value;
+	if (arg != NULL && *arg == '=')
+		arg = arg + 1;
+
+	*value = arg;
+	return 0;
+}
+
 static void determine_whence(struct wt_status *s)
 {
 	if (file_exists(git_path_merge_head()))
@@ -1259,11 +1273,29 @@ static int git_status_config(const char *k, const char *v, void *cb)
 			return error(_("Invalid untracked files mode '%s'"), v);
 		return 0;
 	}
+	if (!strcmp(k, "diff.renamelimit")) {
+		diff_rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renamelimit")) {
+		status_rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "diff.renames")) {
+		diff_detect_rename = git_config_rename(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renames")) {
+		status_detect_rename = git_config_rename(k, v);
+		return 0;
+	}
 	return git_diff_ui_config(k, v, NULL);
 }
 
 int cmd_status(int argc, const char **argv, const char *prefix)
 {
+	static int no_renames = -1;
+	static const char *rename_score_arg = (const char *)-1;
 	static struct wt_status s;
 	int fd;
 	struct object_id oid;
@@ -1297,6 +1329,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 		  N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
 		  PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
 		OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
+		OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
+		{ OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
+		  N_("n"), N_("detect renames, optionally set similarity index"),
+		  PARSE_OPT_OPTARG, opt_parse_rename_score },
 		OPT_END(),
 	};
 
@@ -1336,6 +1372,27 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	s.status_format = status_format;
 	s.verbose = verbose;
+	s.detect_rename = no_renames >= 0 ? !no_renames :
+					  status_detect_rename >= 0 ? status_detect_rename :
+					  diff_detect_rename >= 0 ? diff_detect_rename :
+					  s.detect_rename;
+	if ((intptr_t)rename_score_arg != -1) {
+		s.detect_rename = DIFF_DETECT_RENAME;
+		if (rename_score_arg)
+			s.rename_score = parse_rename_score(&rename_score_arg);
+	}
+	s.rename_limit = status_rename_limit >= 0 ? status_rename_limit :
+					 diff_rename_limit >= 0 ? diff_rename_limit :
+					 s.rename_limit;
+
+	/*
+	 * We do not have logic to handle the detection of copies.  In
+	 * fact, it may not even make sense to add such logic: would we
+	 * really want a change to a base file to be propagated through
+	 * multiple other files by a merge?
+	 */
+	if (s.detect_rename > DIFF_DETECT_RENAME)
+		s.detect_rename = DIFF_DETECT_RENAME;
 
 	wt_status_collect(&s);
 
diff --git a/diff.c b/diff.c
index 1289df4b1f..5dfc24aa6d 100644
--- a/diff.c
+++ b/diff.c
@@ -177,7 +177,7 @@ static int parse_submodule_params(struct diff_options *options, const char *valu
 	return 0;
 }
 
-static int git_config_rename(const char *var, const char *value)
+int git_config_rename(const char *var, const char *value)
 {
 	if (!value)
 		return DIFF_DETECT_RENAME;
diff --git a/diff.h b/diff.h
index d29560f822..dedac472ca 100644
--- a/diff.h
+++ b/diff.h
@@ -324,6 +324,7 @@ extern int git_diff_ui_config(const char *var, const char *value, void *cb);
 extern void diff_setup(struct diff_options *);
 extern int diff_opt_parse(struct diff_options *, const char **, int, const char *);
 extern void diff_setup_done(struct diff_options *);
+extern int git_config_rename(const char *var, const char *value);
 
 #define DIFF_DETECT_RENAME	1
 #define DIFF_DETECT_COPY	2
diff --git a/t/t7525-status-rename.sh b/t/t7525-status-rename.sh
new file mode 100644
index 0000000000..311df8038a
--- /dev/null
+++ b/t/t7525-status-rename.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+test_description='git status rename detection options'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	echo 1 >original &&
+	git add . &&
+	git commit -m"Adding original file." &&
+	mv original renamed &&
+	echo 2 >> renamed &&
+	git add .
+'
+
+cat >.gitignore <<\EOF
+.gitignore
+expect*
+actual*
+EOF
+
+test_expect_success 'status no-options' '
+	git status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status --no-renames' '
+	git status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames false' '
+	git -c diff.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames true' '
+	git -c diff.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status.renames overrides diff.renames false' '
+	git -c diff.renames=true -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames overrides from diff.renames true' '
+	git -c diff.renames=false -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status status.renames=false' '
+	git -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status status.renames=true' '
+	git -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status config overriden' '
+	git -c status.renames=true status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=100%' '
+	git status -M=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual &&
+
+	git status --find-rename=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=01%' '
+	git status -M=01% >actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git status --find-rename=01% >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index 32f3bcaebd..172f07cbb0 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -138,6 +138,9 @@ void wt_status_prepare(struct wt_status *s)
 	s->show_stash = 0;
 	s->ahead_behind_flags = AHEAD_BEHIND_UNSPECIFIED;
 	s->display_comment_prefix = 0;
+	s->detect_rename = -1;
+	s->rename_score = -1;
+	s->rename_limit = -1;
 }
 
 static void wt_longstatus_print_unmerged_header(struct wt_status *s)
@@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
 	}
 	rev.diffopt.format_callback = wt_status_collect_changed_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_files(&rev, 0);
 }
@@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
 	rev.diffopt.format_callback = wt_status_collect_updated_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_index(&rev, 1);
 }
@@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
 	setup_revisions(0, NULL, &rev, &opt);
 
 	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
 	/*
diff --git a/wt-status.h b/wt-status.h
index 430770b854..1673d146fa 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -89,7 +89,9 @@ struct wt_status {
 	int show_stash;
 	int hints;
 	enum ahead_behind_flags ahead_behind_flags;
-
+	int detect_rename;
+	int rename_score;
+	int rename_limit;
 	enum wt_status_format status_format;
 	unsigned char sha1_commit[GIT_MAX_RAWSZ]; /* when not Initial */
 

base-commit: a92ae92585d8db14b7871f760f157256cd96742c
-- 
2.17.0.windows.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v1] add status config and command line options for rename detection
  2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
@ 2018-05-09 15:59 ` Duy Nguyen
  2018-05-09 17:04   ` Ben Peart
  2018-05-09 16:56 ` Elijah Newren
                   ` (3 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Duy Nguyen @ 2018-05-09 15:59 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, newren, gitster, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

On Wed, May 9, 2018 at 4:42 PM, Ben Peart <Ben.Peart@microsoft.com> wrote:
> Add a new config status.renames setting to enable turning off rename detection
> during status.  This setting will default to the value of diff.renames.

Please add the reason you need this config key in the commit message.
My guess (probably correct) is on super large repo (how large?),
rename detection is just too slow (how long?) that it practically
makes git-status unusable.

This information could be helpful when we optimize rename detection to
be more efficient.

>
> Add a new config status.renamelimit setting to to enable bounding the time spent
> finding out inexact renames during status.  This setting will default to the
> value of diff.renamelimit.
>
> Add status --no-renames command line option that enables overriding the config
> setting from the command line. Add --find-renames[=<n>] to enable detecting
> renames and optionaly setting the similarity index from the command line.
>
> Origional-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
> Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
-- 
Duy

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v1] add status config and command line options for rename detection
  2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
  2018-05-09 15:59 ` Duy Nguyen
@ 2018-05-09 16:56 ` Elijah Newren
  2018-05-09 19:54   ` Ben Peart
  2018-05-10 14:16 ` [PATCH v2] " Ben Peart
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 17+ messages in thread
From: Elijah Newren @ 2018-05-09 16:56 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Hi Ben,

Overall I think this is good, but I have lots of nit-picky things to
bring up.  :-)


On Wed, May 9, 2018 at 7:42 AM, Ben Peart <Ben.Peart@microsoft.com> wrote:
> Add status --no-renames command line option that enables overriding the config
> setting from the command line. Add --find-renames[=<n>] to enable detecting
> renames and optionaly setting the similarity index from the command line.

s/optionaly/optionally/

> Notes:
>     Base Ref:

No base ref?  ;-)

> +status.renameLimit::
> +       The number of files to consider when performing rename detection;
> +       if not specified, defaults to the value of diff.renameLimit.
> +
> +status.renames::
> +       Whether and how Git detects renames.  If set to "false",
> +       rename detection is disabled. If set to "true", basic rename
> +       detection is enabled.  Defaults to the value of diff.renames.

I suspect that status.renames should mention "copy", just as
diff.renames does.  (We didn't mention it in merge.renames, because
merge isn't an operation for which copy detection can be useful -- at
least not until the diffstat at the end of the merge is controlled by
merge.renames as well...)

Also, do these two config settings only affect 'git status', or does
it also affect the status shown when composing a commit message
(assuming the user hasn't turned commit.status off)?  Does it affect
`git commit --dry-run` too?

> --- a/builtin/commit.c
> +++ b/builtin/commit.c
> @@ -109,6 +109,10 @@ static int have_option_m;
>  static struct strbuf message = STRBUF_INIT;
>
>  static enum wt_status_format status_format = STATUS_FORMAT_UNSPECIFIED;
> +static int diff_detect_rename = -1;
> +static int status_detect_rename = -1;
> +static int diff_rename_limit = -1;
> +static int status_rename_limit = -1;

Could we replace these four variables with just two: detect_rename and
rename_limit?  Keeping these separate invites people to write code
using only one of the settings rather than the appropriate inherited
mixture of them, resulting in a weird bug.  More on this below...

> @@ -1259,11 +1273,29 @@ static int git_status_config(const char *k, const char *v, void *cb)
>                         return error(_("Invalid untracked files mode '%s'"), v);
>                 return 0;
>         }
> +       if (!strcmp(k, "diff.renamelimit")) {
> +               diff_rename_limit = git_config_int(k, v);
> +               return 0;
> +       }
> +       if (!strcmp(k, "status.renamelimit")) {
> +               status_rename_limit = git_config_int(k, v);
> +               return 0;
> +       }

Here, since you're already checking diff.renamelimit first, you can
just set rename_limit in both blocks and not need both
diff_rename_limit and status_rename_limit.  (Similar can be said for
diff.renames/status.renames.)

<snip>

> @@ -1297,6 +1329,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
>                   N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
>                   PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
>                 OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
> +               OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
> +               { OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
> +                 N_("n"), N_("detect renames, optionally set similarity index"),
> +                 PARSE_OPT_OPTARG, opt_parse_rename_score },

Should probably also document these options in
Documentation/git-status.txt (and maybe Documentation/git-commit.txt
as well).

Not sure if we want to add a flag for copy detection (similar to
git-diff's -C/--find-copies and --find-copies-harder), or just leave
that for when someone finds a need.  If left out, might want to just
mention that it was considered and intentionally omitted for now in
the commit message.

> @@ -1336,6 +1372,27 @@ int cmd_status(int argc, const char **argv, const char *prefix)
>         s.ignore_submodule_arg = ignore_submodule_arg;
>         s.status_format = status_format;
>         s.verbose = verbose;
> +       s.detect_rename = no_renames >= 0 ? !no_renames :
> +                                         status_detect_rename >= 0 ? status_detect_rename :
> +                                         diff_detect_rename >= 0 ? diff_detect_rename :

Combining the four vars into two as mentioned above would allow
combining the last two lines above into one.

> +       if ((intptr_t)rename_score_arg != -1) {

I don't understand why rename_score_arg is a (char*) and then you need
to cast to intptr_t, but that may just be because I haven't done much
of anything with option parsing.  A quick look at the docs isn't
making it clear to me, though; could you enlighten me?

> +               s.detect_rename = DIFF_DETECT_RENAME;

What if status.renames is 'copy' but someone wants to override the
score for detecting renames and pass --find-renames=40?  Does the
--find-renames override and degrade the 'copy'?  I think it'd make
more sense to increase s.detect_rename to at least DIFF_DETECT_RENAME,
rather than just outright setting it here.

> +               if (rename_score_arg)
> +                       s.rename_score = parse_rename_score(&rename_score_arg);
> +       }
> +       s.rename_limit = status_rename_limit >= 0 ? status_rename_limit :
> +                                        diff_rename_limit >= 0 ? diff_rename_limit :

Again, combination of variables could allow these last two lines to be combined.

> +                                        s.rename_limit;
> +
> +       /*
> +        * We do not have logic to handle the detection of copies.  In
> +        * fact, it may not even make sense to add such logic: would we
> +        * really want a change to a base file to be propagated through
> +        * multiple other files by a merge?
> +        */
> +       if (s.detect_rename > DIFF_DETECT_RENAME)
> +               s.detect_rename = DIFF_DETECT_RENAME;

This comment and code made sense in merge-recursive.c (which doesn't
show detected diffs/renames/copies but just uses them for internal
processing logic).  It does not make sense here; git status could show
detected copies much like `git diff -C --name-status` shows it.  In
fact, a quick grep for DIFF_STATUS_COPIED shows multiple hits in
wt-status.c, so I suspect it already has the necessary logic for
displaying copies.


I looked over the rest of the patch.  Nice testcases.  Adding a couple
more testcases around copy detection could be useful.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v1] add status config and command line options for rename detection
  2018-05-09 15:59 ` Duy Nguyen
@ 2018-05-09 17:04   ` Ben Peart
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-09 17:04 UTC (permalink / raw)
  To: Duy Nguyen, Ben Peart
  Cc: git, newren, gitster, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass



On 5/9/2018 11:59 AM, Duy Nguyen wrote:
> On Wed, May 9, 2018 at 4:42 PM, Ben Peart <Ben.Peart@microsoft.com> wrote:
>> Add a new config status.renames setting to enable turning off rename detection
>> during status.  This setting will default to the value of diff.renames.
> 
> Please add the reason you need this config key in the commit message.
> My guess (probably correct) is on super large repo (how large?),
> rename detection is just too slow (how long?) that it practically
> makes git-status unusable.
> 

Yes, the reasons for this change are the same as for the patch that 
added these same flags for merge and have to do with the poor 
performance of rename detection with large repos.  I'll update the 
commit message to be more descriptive (see below) and correct some 
spelling errors.


add status config and command line options for rename detection

After performing a merge that has conflicts, git status will by default 
attempt to detect renames which causes many objects to be examined.  In 
a virtualized repo, those objects do not exist locally so the rename 
logic triggers them to be fetched from the server. This results in the 
status call taking hours to complete on very large repos.  Even in a 
small repo (the GVFS repo) turning off break and rename detection has a 
significant impact:

git status --no-renames:
31 secs., 105 loose object downloads

git status --no-breaks
7 secs., 17 loose object downloads

git status --no-breaks --no-renames
1 sec., 1 loose object download

Add a new config status.renames setting to enable turning off rename 
detection during status.  This setting will default to the value of 
diff.renames.

Add a new config status.renamelimit setting to to enable bounding the 
time spent finding out inexact renames during status.  This setting will 
default to the value of diff.renamelimit.

Add status --no-renames command line option that enables overriding the 
config setting from the command line. Add --find-renames[=<n>] to enable 
detecting renames and optionally setting the similarity index from the 
command line.

Note: I removed the --no-breaks command line option from the original 
patch as it will no longer be needed once the default has been changed 
[1] to turn it off.

[1] 
https://public-inbox.org/git/20180430093421.27551-2-eckhard.s.maass@gmail.com/

Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>


> This information could be helpful when we optimize rename detection to
> be more efficient.
> 
>>
>> Add a new config status.renamelimit setting to to enable bounding the time spent
>> finding out inexact renames during status.  This setting will default to the
>> value of diff.renamelimit.
>>
>> Add status --no-renames command line option that enables overriding the config
>> setting from the command line. Add --find-renames[=<n>] to enable detecting
>> renames and optionaly setting the similarity index from the command line.
>>
>> Origional-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
>> Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v1] add status config and command line options for rename detection
  2018-05-09 16:56 ` Elijah Newren
@ 2018-05-09 19:54   ` Ben Peart
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-09 19:54 UTC (permalink / raw)
  To: Elijah Newren, Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass



On 5/9/2018 12:56 PM, Elijah Newren wrote:
> Hi Ben,
> 
> Overall I think this is good, but I have lots of nit-picky things to
> bring up.  :-)
> 
> 

Thank you for the review.  I appreciate the extra set of eyes on these 
changes.  Especially when dealing with the merge logic and settings 
which I am unfamiliar with.

> 
> I suspect that status.renames should mention "copy", just as
> diff.renames does.  (We didn't mention it in merge.renames, because
> merge isn't an operation for which copy detection can be useful -- at
> least not until the diffstat at the end of the merge is controlled by
> merge.renames as well...)
> 

I wasn't supporting copy (as you noticed later in the patch) but will 
update the patch to do so and update the documentation appropriately.

> Also, do these two config settings only affect 'git status', or does
> it also affect the status shown when composing a commit message
> (assuming the user hasn't turned commit.status off)?  Does it affect
> `git commit --dry-run` too?
> 

The config settings only affect 'git status'

>> --- a/builtin/commit.c
>> +++ b/builtin/commit.c
>> @@ -109,6 +109,10 @@ static int have_option_m;
>>   static struct strbuf message = STRBUF_INIT;
>>
>>   static enum wt_status_format status_format = STATUS_FORMAT_UNSPECIFIED;
>> +static int diff_detect_rename = -1;
>> +static int status_detect_rename = -1;
>> +static int diff_rename_limit = -1;
>> +static int status_rename_limit = -1;
> 
> Could we replace these four variables with just two: detect_rename and
> rename_limit?  Keeping these separate invites people to write code
> using only one of the settings rather than the appropriate inherited
> mixture of them, resulting in a weird bug.  More on this below...
> 

This model was inherited from the diff code and replicated to the merge 
code.  However, it would be nice to get rid of these 4 static variables. 
  See below for a proposal on how to do that...

>> @@ -1259,11 +1273,29 @@ static int git_status_config(const char *k, const char *v, void *cb)
>>                          return error(_("Invalid untracked files mode '%s'"), v);
>>                  return 0;
>>          }
>> +       if (!strcmp(k, "diff.renamelimit")) {
>> +               diff_rename_limit = git_config_int(k, v);
>> +               return 0;
>> +       }
>> +       if (!strcmp(k, "status.renamelimit")) {
>> +               status_rename_limit = git_config_int(k, v);
>> +               return 0;
>> +       }
> 
> Here, since you're already checking diff.renamelimit first, you can
> just set rename_limit in both blocks and not need both
> diff_rename_limit and status_rename_limit.  (Similar can be said for
> diff.renames/status.renames.)

It really doesn't work that way - the call back is called for every 
config setting and there is no specific order they are called with. 
Typically, you just test for and save off any that you care about like 
I"m doing here.

I can update the logic here so that as I save off the settings that it 
will also enforce the priority model (ie the diff setting can't override 
the status setting) and then compute the final value once I have the 
command line arguments as they override either config setting (if present).

On the plus side, this change passes the red/green test but it now 
splits the priority logic into two places and doesn't match with how 
diff and merge handle this same issue.

> 
> <snip>
> 
>> @@ -1297,6 +1329,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
>>                    N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
>>                    PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
>>                  OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
>> +               OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
>> +               { OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
>> +                 N_("n"), N_("detect renames, optionally set similarity index"),
>> +                 PARSE_OPT_OPTARG, opt_parse_rename_score },
> 
> Should probably also document these options in
> Documentation/git-status.txt (and maybe Documentation/git-commit.txt
> as well).

Good point, will do.

> 
> Not sure if we want to add a flag for copy detection (similar to
> git-diff's -C/--find-copies and --find-copies-harder), or just leave
> that for when someone finds a need.  If left out, might want to just
> mention that it was considered and intentionally omitted for now in
> the commit message.
> 

I tend to only implement the features I know are actually needed so 
intentionally omitted this (along with many other potential diff options).

>> +       if ((intptr_t)rename_score_arg != -1) {
> 
> I don't understand why rename_score_arg is a (char*) and then you need
> to cast to intptr_t, but that may just be because I haven't done much
> of anything with option parsing.  A quick look at the docs isn't
> making it clear to me, though; could you enlighten me?
> 

Yes, it is related to making parse_options() do what we need.  -1 means 
the command line option wasn't passed so use the default.  NULL means 
the command line argument was passed but without the optional score.  A 
non NULL, non -1 value means the optional score was passed and needs to 
be parsed.  The (intptr_t_) cast is to enable comparing a pointer to an 
integer (-1) without generating a compiler warning.

>> +               s.detect_rename = DIFF_DETECT_RENAME;
> 
> What if status.renames is 'copy' but someone wants to override the
> score for detecting renames and pass --find-renames=40?  Does the
> --find-renames override and degrade the 'copy'?  I think it'd make
> more sense to increase s.detect_rename to at least DIFF_DETECT_RENAME,
> rather than just outright setting it here.
> 

I understand your argument and agree that it makes some sense.  I am 
matching the same logic in merge-recursive.c which just sets 
detect_rename to 1 in this case.  I believe more strongly that they 
should be consistent than in one option over the other.

If I'm reading the merge logic for this case incorrectly or if you're 
willing to patch the merge logic to match :), I'm happy to change this to:

		if (s.detect_rename < DIFF_DETECT_RENAME)
			s.detect_rename = DIFF_DETECT_RENAME;

>> +                                        s.rename_limit;
>> +
>> +       /*
>> +        * We do not have logic to handle the detection of copies.  In
>> +        * fact, it may not even make sense to add such logic: would we
>> +        * really want a change to a base file to be propagated through
>> +        * multiple other files by a merge?
>> +        */
>> +       if (s.detect_rename > DIFF_DETECT_RENAME)
>> +               s.detect_rename = DIFF_DETECT_RENAME;
> 
> This comment and code made sense in merge-recursive.c (which doesn't
> show detected diffs/renames/copies but just uses them for internal
> processing logic).  It does not make sense here; git status could show
> detected copies much like `git diff -C --name-status` shows it.  In
> fact, a quick grep for DIFF_STATUS_COPIED shows multiple hits in
> wt-status.c, so I suspect it already has the necessary logic for
> displaying copies.
> 

Clearly I was following my similar patch series for merge too closely 
without thinking through how they should be different.  Thanks for 
catching this.  Gone. :)


^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v2] add status config and command line options for rename detection
  2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
  2018-05-09 15:59 ` Duy Nguyen
  2018-05-09 16:56 ` Elijah Newren
@ 2018-05-10 14:16 ` Ben Peart
  2018-05-10 16:19   ` Elijah Newren
  2018-05-11  6:39   ` Junio C Hamano
  2018-05-11 12:56 ` [PATCH v3] " Ben Peart
  2018-05-11 15:38 ` [PATCH v4] " Ben Peart
  4 siblings, 2 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-10 14:16 UTC (permalink / raw)
  To: git
  Cc: Ben Peart, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass, Ben Peart

After performing a merge that has conflicts, git status will by default attempt
to detect renames which causes many objects to be examined.  In a virtualized
repo, those objects do not exist locally so the rename logic triggers them to be
fetched from the server. This results in the status call taking hours to
complete on very large repos.  Even in a small repo (the GVFS repo) turning off
break and rename detection has a significant impact:

git status --no-renames:
31 secs., 105 loose object downloads

git status --no-breaks
7 secs., 17 loose object downloads

git status --no-breaks --no-renames
1 sec., 1 loose object download

Add a new config status.renames setting to enable turning off rename detection
during status.  This setting will default to the value of diff.renames.

Add a new config status.renamelimit setting to to enable bounding the time spent
finding out inexact renames during status.  This setting will default to the
value of diff.renamelimit.

Add status --no-renames command line option that enables overriding the config
setting from the command line. Add --find-renames[=<n>] to enable detecting
renames and optionally setting the similarity index from the command line.

Note: I removed the --no-breaks command line option from the original patch as
it will no longer be needed once the default has been changed [1] to turn it off.

[1] https://public-inbox.org/git/20180430093421.27551-2-eckhard.s.maass@gmail.com/

Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
---

Notes:
    Base Ref: master
    Web-Diff: https://github.com/benpeart/git/commit/823212725b
    Checkout: git fetch https://github.com/benpeart/git status-renames-v2 && git checkout 823212725b
    
    ### Interdiff (v1..v2):
    
    diff --git a/Documentation/config.txt b/Documentation/config.txt
    index b79b83c587..9c8eca05b1 100644
    --- a/Documentation/config.txt
    +++ b/Documentation/config.txt
    @@ -3126,7 +3126,8 @@ status.renameLimit::
     status.renames::
     	Whether and how Git detects renames.  If set to "false",
     	rename detection is disabled. If set to "true", basic rename
    -	detection is enabled.  Defaults to the value of diff.renames.
    +	detection is enabled.  If set to "copies" or "copy", Git will
    +	detect copies, as well.  Defaults to the value of diff.renames.
    
     status.showStash::
     	If set to true, linkgit:git-status[1] will display the number of
    diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
    index c16e27e63d..c4467ffb98 100644
    --- a/Documentation/git-status.txt
    +++ b/Documentation/git-status.txt
    @@ -135,6 +135,16 @@ ignored, then the directory is not shown, but all contents are shown.
     	Display or do not display detailed ahead/behind counts for the
     	branch relative to its upstream branch.  Defaults to true.
    
    +--renames::
    +--no-renames::
    +	Turn on/off rename detection regardless of user configuration.
    +	See also linkgit:git-diff[1] `--no-renames`.
    +
    +--find-renames[=<n>]::
    +	Turn on rename detection, optionally setting the similarity
    +	threshold.
    +	See also linkgit:git-diff[1] `--find-renames`.
    +
     <pathspec>...::
     	See the 'pathspec' entry in linkgit:gitglossary[7].
    
    diff --git a/builtin/commit.c b/builtin/commit.c
    index a545096ddd..db886277f4 100644
    --- a/builtin/commit.c
    +++ b/builtin/commit.c
    @@ -109,10 +109,6 @@ static int have_option_m;
     static struct strbuf message = STRBUF_INIT;
    
     static enum wt_status_format status_format = STATUS_FORMAT_UNSPECIFIED;
    -static int diff_detect_rename = -1;
    -static int status_detect_rename = -1;
    -static int diff_rename_limit = -1;
    -static int status_rename_limit = -1;
    
     static int opt_parse_porcelain(const struct option *opt, const char *arg, int unset)
     {
    @@ -1274,19 +1270,21 @@ static int git_status_config(const char *k, const char *v, void *cb)
     		return 0;
     	}
     	if (!strcmp(k, "diff.renamelimit")) {
    -		diff_rename_limit = git_config_int(k, v);
    +		if (s->rename_limit == -1)
    +			s->rename_limit = git_config_int(k, v);
     		return 0;
     	}
     	if (!strcmp(k, "status.renamelimit")) {
    -		status_rename_limit = git_config_int(k, v);
    +		s->rename_limit = git_config_int(k, v);
     		return 0;
     	}
     	if (!strcmp(k, "diff.renames")) {
    -		diff_detect_rename = git_config_rename(k, v);
    +		if (s->detect_rename == -1)
    +			s->detect_rename = git_config_rename(k, v);
     		return 0;
     	}
     	if (!strcmp(k, "status.renames")) {
    -		status_detect_rename = git_config_rename(k, v);
    +		s->detect_rename = git_config_rename(k, v);
     		return 0;
     	}
     	return git_diff_ui_config(k, v, NULL);
    @@ -1372,27 +1370,13 @@ int cmd_status(int argc, const char **argv, const char *prefix)
     	s.ignore_submodule_arg = ignore_submodule_arg;
     	s.status_format = status_format;
     	s.verbose = verbose;
    -	s.detect_rename = no_renames >= 0 ? !no_renames :
    -					  status_detect_rename >= 0 ? status_detect_rename :
    -					  diff_detect_rename >= 0 ? diff_detect_rename :
    -					  s.detect_rename;
    +	if (no_renames != -1)
    +		s.detect_rename = !no_renames;
     	if ((intptr_t)rename_score_arg != -1) {
     		s.detect_rename = DIFF_DETECT_RENAME;
     		if (rename_score_arg)
     			s.rename_score = parse_rename_score(&rename_score_arg);
     	}
    -	s.rename_limit = status_rename_limit >= 0 ? status_rename_limit :
    -					 diff_rename_limit >= 0 ? diff_rename_limit :
    -					 s.rename_limit;
    -
    -	/*
    -	 * We do not have logic to handle the detection of copies.  In
    -	 * fact, it may not even make sense to add such logic: would we
    -	 * really want a change to a base file to be propagated through
    -	 * multiple other files by a merge?
    -	 */
    -	if (s.detect_rename > DIFF_DETECT_RENAME)
    -		s.detect_rename = DIFF_DETECT_RENAME;
    
     	wt_status_collect(&s);
    
    ### Patches

 Documentation/config.txt     | 10 ++++
 Documentation/git-status.txt | 10 ++++
 builtin/commit.c             | 41 ++++++++++++++++
 diff.c                       |  2 +-
 diff.h                       |  1 +
 t/t7525-status-rename.sh     | 90 ++++++++++++++++++++++++++++++++++++
 wt-status.c                  | 12 +++++
 wt-status.h                  |  4 +-
 8 files changed, 168 insertions(+), 2 deletions(-)
 create mode 100644 t/t7525-status-rename.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb3..9c8eca05b1 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3119,6 +3119,16 @@ status.displayCommentPrefix::
 	behavior of linkgit:git-status[1] in Git 1.8.4 and previous.
 	Defaults to false.
 
+status.renameLimit::
+	The number of files to consider when performing rename detection;
+	if not specified, defaults to the value of diff.renameLimit.
+
+status.renames::
+	Whether and how Git detects renames.  If set to "false",
+	rename detection is disabled. If set to "true", basic rename
+	detection is enabled.  If set to "copies" or "copy", Git will
+	detect copies, as well.  Defaults to the value of diff.renames.
+
 status.showStash::
 	If set to true, linkgit:git-status[1] will display the number of
 	entries currently stashed away.
diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index c16e27e63d..c4467ffb98 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -135,6 +135,16 @@ ignored, then the directory is not shown, but all contents are shown.
 	Display or do not display detailed ahead/behind counts for the
 	branch relative to its upstream branch.  Defaults to true.
 
+--renames::
+--no-renames::
+	Turn on/off rename detection regardless of user configuration.
+	See also linkgit:git-diff[1] `--no-renames`.
+
+--find-renames[=<n>]::
+	Turn on rename detection, optionally setting the similarity
+	threshold.
+	See also linkgit:git-diff[1] `--find-renames`.
+
 <pathspec>...::
 	See the 'pathspec' entry in linkgit:gitglossary[7].
 
diff --git a/builtin/commit.c b/builtin/commit.c
index 5240f11225..db886277f4 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -143,6 +143,16 @@ static int opt_parse_m(const struct option *opt, const char *arg, int unset)
 	return 0;
 }
 
+static int opt_parse_rename_score(const struct option *opt, const char *arg, int unset)
+{
+	const char **value = opt->value;
+	if (arg != NULL && *arg == '=')
+		arg = arg + 1;
+
+	*value = arg;
+	return 0;
+}
+
 static void determine_whence(struct wt_status *s)
 {
 	if (file_exists(git_path_merge_head()))
@@ -1259,11 +1269,31 @@ static int git_status_config(const char *k, const char *v, void *cb)
 			return error(_("Invalid untracked files mode '%s'"), v);
 		return 0;
 	}
+	if (!strcmp(k, "diff.renamelimit")) {
+		if (s->rename_limit == -1)
+			s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renamelimit")) {
+		s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "diff.renames")) {
+		if (s->detect_rename == -1)
+			s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renames")) {
+		s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
 	return git_diff_ui_config(k, v, NULL);
 }
 
 int cmd_status(int argc, const char **argv, const char *prefix)
 {
+	static int no_renames = -1;
+	static const char *rename_score_arg = (const char *)-1;
 	static struct wt_status s;
 	int fd;
 	struct object_id oid;
@@ -1297,6 +1327,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 		  N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
 		  PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
 		OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
+		OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
+		{ OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
+		  N_("n"), N_("detect renames, optionally set similarity index"),
+		  PARSE_OPT_OPTARG, opt_parse_rename_score },
 		OPT_END(),
 	};
 
@@ -1336,6 +1370,13 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	s.status_format = status_format;
 	s.verbose = verbose;
+	if (no_renames != -1)
+		s.detect_rename = !no_renames;
+	if ((intptr_t)rename_score_arg != -1) {
+		s.detect_rename = DIFF_DETECT_RENAME;
+		if (rename_score_arg)
+			s.rename_score = parse_rename_score(&rename_score_arg);
+	}
 
 	wt_status_collect(&s);
 
diff --git a/diff.c b/diff.c
index 1289df4b1f..5dfc24aa6d 100644
--- a/diff.c
+++ b/diff.c
@@ -177,7 +177,7 @@ static int parse_submodule_params(struct diff_options *options, const char *valu
 	return 0;
 }
 
-static int git_config_rename(const char *var, const char *value)
+int git_config_rename(const char *var, const char *value)
 {
 	if (!value)
 		return DIFF_DETECT_RENAME;
diff --git a/diff.h b/diff.h
index d29560f822..dedac472ca 100644
--- a/diff.h
+++ b/diff.h
@@ -324,6 +324,7 @@ extern int git_diff_ui_config(const char *var, const char *value, void *cb);
 extern void diff_setup(struct diff_options *);
 extern int diff_opt_parse(struct diff_options *, const char **, int, const char *);
 extern void diff_setup_done(struct diff_options *);
+extern int git_config_rename(const char *var, const char *value);
 
 #define DIFF_DETECT_RENAME	1
 #define DIFF_DETECT_COPY	2
diff --git a/t/t7525-status-rename.sh b/t/t7525-status-rename.sh
new file mode 100644
index 0000000000..311df8038a
--- /dev/null
+++ b/t/t7525-status-rename.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+test_description='git status rename detection options'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	echo 1 >original &&
+	git add . &&
+	git commit -m"Adding original file." &&
+	mv original renamed &&
+	echo 2 >> renamed &&
+	git add .
+'
+
+cat >.gitignore <<\EOF
+.gitignore
+expect*
+actual*
+EOF
+
+test_expect_success 'status no-options' '
+	git status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status --no-renames' '
+	git status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames false' '
+	git -c diff.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames true' '
+	git -c diff.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status.renames overrides diff.renames false' '
+	git -c diff.renames=true -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames overrides from diff.renames true' '
+	git -c diff.renames=false -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status status.renames=false' '
+	git -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status status.renames=true' '
+	git -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status config overriden' '
+	git -c status.renames=true status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=100%' '
+	git status -M=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual &&
+
+	git status --find-rename=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=01%' '
+	git status -M=01% >actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git status --find-rename=01% >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index 32f3bcaebd..172f07cbb0 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -138,6 +138,9 @@ void wt_status_prepare(struct wt_status *s)
 	s->show_stash = 0;
 	s->ahead_behind_flags = AHEAD_BEHIND_UNSPECIFIED;
 	s->display_comment_prefix = 0;
+	s->detect_rename = -1;
+	s->rename_score = -1;
+	s->rename_limit = -1;
 }
 
 static void wt_longstatus_print_unmerged_header(struct wt_status *s)
@@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
 	}
 	rev.diffopt.format_callback = wt_status_collect_changed_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_files(&rev, 0);
 }
@@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
 	rev.diffopt.format_callback = wt_status_collect_updated_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_index(&rev, 1);
 }
@@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
 	setup_revisions(0, NULL, &rev, &opt);
 
 	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
 	/*
diff --git a/wt-status.h b/wt-status.h
index 430770b854..1673d146fa 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -89,7 +89,9 @@ struct wt_status {
 	int show_stash;
 	int hints;
 	enum ahead_behind_flags ahead_behind_flags;
-
+	int detect_rename;
+	int rename_score;
+	int rename_limit;
 	enum wt_status_format status_format;
 	unsigned char sha1_commit[GIT_MAX_RAWSZ]; /* when not Initial */
 

base-commit: a92ae92585d8db14b7871f760f157256cd96742c
-- 
2.17.0.windows.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 14:16 ` [PATCH v2] " Ben Peart
@ 2018-05-10 16:19   ` Elijah Newren
  2018-05-10 19:09     ` Ben Peart
  2018-05-11  1:57     ` Junio C Hamano
  2018-05-11  6:39   ` Junio C Hamano
  1 sibling, 2 replies; 17+ messages in thread
From: Elijah Newren @ 2018-05-10 16:19 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Hi Ben,

On Thu, May 10, 2018 at 7:16 AM, Ben Peart <Ben.Peart@microsoft.com> wrote:
> After performing a merge that has conflicts, git status will by default attempt
> to detect renames which causes many objects to be examined.  In a virtualized
> repo, those objects do not exist locally so the rename logic triggers them to be
> fetched from the server. This results in the status call taking hours to
> complete on very large repos.  Even in a small repo (the GVFS repo) turning off
> break and rename detection has a significant impact:

It'd be nice if you could show that impact by comparing 'git status'
to 'git status --no-renames', for some repo.  Showing only the latter
gives us no way to assess the impact.

> git status --no-renames:
> 31 secs., 105 loose object downloads
>
> git status --no-breaks
> 7 secs., 17 loose object downloads
>
> git status --no-breaks --no-renames
> 1 sec., 1 loose object download

This patch doesn't add a --no-breaks option and it doesn't exist
previously, so adding it to the commit message serves to confuse
rather than help.  I'd just drop the last two of these (and redo the
timing for --no-renames assuming you are built on
em/status-rename-config).

> Add a new config status.renames setting to enable turning off rename detection
> during status.  This setting will default to the value of diff.renames.
>
> Add a new config status.renamelimit setting to to enable bounding the time spent
> finding out inexact renames during status.  This setting will default to the
> value of diff.renamelimit.

It may be worth mentioning that these config settings also affect 'git
commit' (and it does, in my testing, which I think is a good thing).

> Add status --no-renames command line option that enables overriding the config
> setting from the command line. Add --find-renames[=<n>] to enable detecting
> renames and optionally setting the similarity index from the command line.

The command line options are specific to 'git status'.  I don't really
have a strong opinion on whether they should also be added to
git-commit; I suspect users would be more likely to use the config
options in order to set it once and forget about it and that users
would be more likely to want to override their config setting for
status than for commit.

> Note: I removed the --no-breaks command line option from the original patch as
> it will no longer be needed once the default has been changed [1] to turn it off.
>
> [1] https://public-inbox.org/git/20180430093421.27551-2-eckhard.s.maass@gmail.com/

I'd just drop these lines from the commit message, and instead mention
that your patch depends on em/status-rename-config.

> Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
> Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
> ---
>
> Notes:
>     Base Ref: master

This patch does not apply to master; it has conflicts.

>     Web-Diff: https://github.com/benpeart/git/commit/823212725b

This web diff shows em/status-rename-config as the parent commit, not
master.  Since your commit message mentions you want the change to
break detection provided by that series, just listing it as the
explicit base seems like the right way to go.

>     ### Interdiff (v1..v2):

Thanks.

> +       if ((intptr_t)rename_score_arg != -1) {
> +               s.detect_rename = DIFF_DETECT_RENAME;

I'd still prefer this was a
        if (s.detect_rename < DIFF_DETECT_RENAME)
                s.detect_rename = DIFF_DETECT_RENAME;

If a user specifies they are willing to pay for copy detection, but
then just passes --find-renames=40% because they want to find more
renames, it seems odd to disable copy detection to me.

> +++ b/t/t7525-status-rename.sh

Testcases look good.  It'd be nice to also add a few testcases where
copy detection is turned on -- in particular, I'd like to see one with
--find-renames=$DIFFERENT_THAN_DEFAULT being passed when
merge.renames=copies.


> +test_expect_success 'setup' '
> +       echo 1 >original &&
> +       git add . &&
> +       git commit -m"Adding original file." &&
> +       mv original renamed &&
> +       echo 2 >> renamed &&
> +       git add .
> +'


> +cat >.gitignore <<\EOF
> +.gitignore
> +expect*
> +actual*
> +EOF

Can this just be included in the setup?


Everything else in the patch looked good to me.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 16:19   ` Elijah Newren
@ 2018-05-10 19:09     ` Ben Peart
  2018-05-10 22:31       ` Elijah Newren
  2018-05-11  1:57     ` Junio C Hamano
  1 sibling, 1 reply; 17+ messages in thread
From: Ben Peart @ 2018-05-10 19:09 UTC (permalink / raw)
  To: Elijah Newren, Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass



On 5/10/2018 12:19 PM, Elijah Newren wrote:
> Hi Ben,
> 
> On Thu, May 10, 2018 at 7:16 AM, Ben Peart <Ben.Peart@microsoft.com> wrote:
>> After performing a merge that has conflicts, git status will by default attempt
>> to detect renames which causes many objects to be examined.  In a virtualized
>> repo, those objects do not exist locally so the rename logic triggers them to be
>> fetched from the server. This results in the status call taking hours to
>> complete on very large repos.  Even in a small repo (the GVFS repo) turning off
>> break and rename detection has a significant impact:
> 
> It'd be nice if you could show that impact by comparing 'git status'
> to 'git status --no-renames', for some repo.  Showing only the latter
> gives us no way to assess the impact.
> 

Given the example perf impact is arbitrary (the actual example that 
triggered this patch took status from 2+ hours to seconds) and can't be 
replicated using the current performance tools in git, I'm just going 
drop the specific numbers.  I believe the patch is worth while just to 
give users the flexibility to control these behaviors.

>> git status --no-renames:
>> 31 secs., 105 loose object downloads
>>
>> git status --no-breaks
>> 7 secs., 17 loose object downloads
>>
>> git status --no-breaks --no-renames
>> 1 sec., 1 loose object download
> 
> This patch doesn't add a --no-breaks option and it doesn't exist
> previously, so adding it to the commit message serves to confuse
> rather than help.  I'd just drop the last two of these (and redo the
> timing for --no-renames assuming you are built on
> em/status-rename-config).
> 

OK

>> Add a new config status.renames setting to enable turning off rename detection
>> during status.  This setting will default to the value of diff.renames.
>>
>> Add a new config status.renamelimit setting to to enable bounding the time spent
>> finding out inexact renames during status.  This setting will default to the
>> value of diff.renamelimit.
> 
> It may be worth mentioning that these config settings also affect 'git
> commit' (and it does, in my testing, which I think is a good thing).
> 

I agree this is a good thing as the other status settings behave the 
same way.  I'll update the documentation to reflect this as well.

>> Add status --no-renames command line option that enables overriding the config
>> setting from the command line. Add --find-renames[=<n>] to enable detecting
>> renames and optionally setting the similarity index from the command line.
> 
> The command line options are specific to 'git status'.  I don't really
> have a strong opinion on whether they should also be added to
> git-commit; I suspect users would be more likely to use the config
> options in order to set it once and forget about it and that users
> would be more likely to want to override their config setting for
> status than for commit.
> 
>> Note: I removed the --no-breaks command line option from the original patch as
>> it will no longer be needed once the default has been changed [1] to turn it off.
>>
>> [1] https://public-inbox.org/git/20180430093421.27551-2-eckhard.s.maass@gmail.com/
> 
> I'd just drop these lines from the commit message, and instead mention
> that your patch depends on em/status-rename-config.
> 

OK

>> +       if ((intptr_t)rename_score_arg != -1) {
>> +               s.detect_rename = DIFF_DETECT_RENAME;
> 
> I'd still prefer this was a
>          if (s.detect_rename < DIFF_DETECT_RENAME)
>                  s.detect_rename = DIFF_DETECT_RENAME;
> 
> If a user specifies they are willing to pay for copy detection, but
> then just passes --find-renames=40% because they want to find more
> renames, it seems odd to disable copy detection to me.
> 

I agree and will change it. It is unfortunate this will behave 
differently than it does with merge.  Fixing the merge behavior to match 
is outside the scope of this patch.

>> +++ b/t/t7525-status-rename.sh
> 
> Testcases look good.  It'd be nice to also add a few testcases where
> copy detection is turned on -- in particular, I'd like to see one with
> --find-renames=$DIFFERENT_THAN_DEFAULT being passed when
> merge.renames=copies.
> 

OK.  I also added tests to verify the settings correctly impact commit.

> 
>> +test_expect_success 'setup' '
>> +       echo 1 >original &&
>> +       git add . &&
>> +       git commit -m"Adding original file." &&
>> +       mv original renamed &&
>> +       echo 2 >> renamed &&
>> +       git add .
>> +'
> 
> 
>> +cat >.gitignore <<\EOF
>> +.gitignore
>> +expect*
>> +actual*
>> +EOF
> 
> Can this just be included in the setup?
> 

OK

> 
> Everything else in the patch looked good to me.
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 19:09     ` Ben Peart
@ 2018-05-10 22:31       ` Elijah Newren
  2018-05-11 12:50         ` Ben Peart
  0 siblings, 1 reply; 17+ messages in thread
From: Elijah Newren @ 2018-05-10 22:31 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Hi Ben,

On Thu, May 10, 2018 at 12:09 PM, Ben Peart <peartben@gmail.com> wrote:
> On 5/10/2018 12:19 PM, Elijah Newren wrote:
>> On Thu, May 10, 2018 at 7:16 AM, Ben Peart <Ben.Peart@microsoft.com>
>> wrote:

> Given the example perf impact is arbitrary (the actual example that
> triggered this patch took status from 2+ hours to seconds) and can't be
> replicated using the current performance tools in git, I'm just going drop
> the specific numbers.  I believe the patch is worth while just to give users
> the flexibility to control these behaviors.

Your parenthetical statement of timing going from hours to seconds I
think would be great; I don't think we need precise numbers.

>>> +       if ((intptr_t)rename_score_arg != -1) {
>>> +               s.detect_rename = DIFF_DETECT_RENAME;
>>
>>
>> I'd still prefer this was a
>>          if (s.detect_rename < DIFF_DETECT_RENAME)
>>                  s.detect_rename = DIFF_DETECT_RENAME;
>>
>> If a user specifies they are willing to pay for copy detection, but
>> then just passes --find-renames=40% because they want to find more
>> renames, it seems odd to disable copy detection to me.
>>
>
> I agree and will change it. It is unfortunate this will behave differently
> than it does with merge.  Fixing the merge behavior to match is outside the
> scope of this patch.

I agree that changing merge is outside the scope of this patch, but
I'm curious what change you have in mind for it to "make it match".
In particular, merge-recursive.c already has (or will shortly have)
+       if (opts.detect_rename > DIFF_DETECT_RENAME)
+               opts.detect_rename = DIFF_DETECT_RENAME;
from your commit 85b460305ce7 ("merge: add merge.renames config
setting", 2018-05-02), so I'm not sure why we'd want to carefully
propagate a larger value for o->{diff,merge}_detect_rename prior to
this point.  If it's just "future proofing" because you suspect that
copy information could be useful to the merging algorithm and we'll
eventually get rid of these two lines of code, then I could get behind
such a change, though color me skeptical that copy information would
ever turn out to be useful in that context.

The one place copy detection does make sense inside a merge is for the
diffstat shown at the end (from builtin/merge.c), but it currently
isn't controlled by any configuration setting at all.  When it is
hooked up, it'd probably store the value separately from
merge-recursive's internal o->{diff,merge}_detect_rename anyway,
because builtin/merge.c's diffstat should be controlled by the
relevant confiig settings and flags (merge.renames, diff.renames,
-Xfind-renames, etc.) regardless of which merge strategy (recursive,
resolve, octopus, ours, ort) is employed.  And when that is hooked up,
I agree with you that it should look like what you've done with
status.renames here.  In fact, if you'd like to take a crack at it, I
think you'd do a great job.  :-)  If not, it's on my list of things to
do.

>> Testcases look good.  It'd be nice to also add a few testcases where
>> copy detection is turned on -- in particular, I'd like to see one with
>> --find-renames=$DIFFERENT_THAN_DEFAULT being passed when
>> merge.renames=copies.
>>
>
> OK.  I also added tests to verify the settings correctly impact commit.

Nice!

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 16:19   ` Elijah Newren
  2018-05-10 19:09     ` Ben Peart
@ 2018-05-11  1:57     ` Junio C Hamano
  1 sibling, 0 replies; 17+ messages in thread
From: Junio C Hamano @ 2018-05-11  1:57 UTC (permalink / raw)
  To: Elijah Newren
  Cc: Ben Peart, git, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Elijah Newren <newren@gmail.com> writes:

>> Note: I removed the --no-breaks command line option from the original patch as
>> it will no longer be needed once the default has been changed [1] to turn it off.
>>
>> [1] https://public-inbox.org/git/20180430093421.27551-2-eckhard.s.maass@gmail.com/
>
> I'd just drop these lines from the commit message, and instead mention
> that your patch depends on em/status-rename-config.
>
>> Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
>> Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
>> ---

Other things seem to have been resolved between you two already, so
I'll only comment on a minor tangent here.

>> Notes:
>>     Base Ref: master
>
> This patch does not apply to master; it has conflicts.
>
>>     Web-Diff: https://github.com/benpeart/git/commit/823212725b

As Git is distributed, unlike tags that are meant to be global among
project participants by convention, a branch name can never be used
as a trustable base among developers.  Your 'master' branch may
point at a different commit from mine, and my 'master' branch today
may point at a different commit from mine yesterday.

I've seen patches that used a similar note below the three-dash line
that named an exact commit object name.  That is a lot more reliable
way to convey the information necessary to consturct the exact state
the contributor worked on.

> This web diff shows em/status-rename-config as the parent commit, not
> master.  Since your commit message mentions you want the change to
> break detection provided by that series, just listing it as the
> explicit base seems like the right way to go.

Thanks for digging.  That would work well, too.

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 14:16 ` [PATCH v2] " Ben Peart
  2018-05-10 16:19   ` Elijah Newren
@ 2018-05-11  6:39   ` Junio C Hamano
  1 sibling, 0 replies; 17+ messages in thread
From: Junio C Hamano @ 2018-05-11  6:39 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, newren, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Ben Peart <Ben.Peart@microsoft.com> writes:

>  Documentation/config.txt     | 10 ++++
>  Documentation/git-status.txt | 10 ++++
>  builtin/commit.c             | 41 ++++++++++++++++
>  diff.c                       |  2 +-
>  diff.h                       |  1 +
>  t/t7525-status-rename.sh     | 90 ++++++++++++++++++++++++++++++++++++
>  wt-status.c                  | 12 +++++
>  wt-status.h                  |  4 +-
>  8 files changed, 168 insertions(+), 2 deletions(-)
>  create mode 100644 t/t7525-status-rename.sh

I'll mark the new script as executable (otherwise the test will not
even start).


^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v2] add status config and command line options for rename detection
  2018-05-10 22:31       ` Elijah Newren
@ 2018-05-11 12:50         ` Ben Peart
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-11 12:50 UTC (permalink / raw)
  To: Elijah Newren
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass



On 5/10/2018 6:31 PM, Elijah Newren wrote:
> Hi Ben,
> 
> On Thu, May 10, 2018 at 12:09 PM, Ben Peart <peartben@gmail.com> wrote:
>> On 5/10/2018 12:19 PM, Elijah Newren wrote:
>>> On Thu, May 10, 2018 at 7:16 AM, Ben Peart <Ben.Peart@microsoft.com>
>>> wrote:
> 
>> Given the example perf impact is arbitrary (the actual example that
>> triggered this patch took status from 2+ hours to seconds) and can't be
>> replicated using the current performance tools in git, I'm just going drop
>> the specific numbers.  I believe the patch is worth while just to give users
>> the flexibility to control these behaviors.
> 
> Your parenthetical statement of timing going from hours to seconds I
> think would be great; I don't think we need precise numbers.
> 
>>>> +       if ((intptr_t)rename_score_arg != -1) {
>>>> +               s.detect_rename = DIFF_DETECT_RENAME;
>>>
>>>
>>> I'd still prefer this was a
>>>           if (s.detect_rename < DIFF_DETECT_RENAME)
>>>                   s.detect_rename = DIFF_DETECT_RENAME;
>>>
>>> If a user specifies they are willing to pay for copy detection, but
>>> then just passes --find-renames=40% because they want to find more
>>> renames, it seems odd to disable copy detection to me.
>>>
>>
>> I agree and will change it. It is unfortunate this will behave differently
>> than it does with merge.  Fixing the merge behavior to match is outside the
>> scope of this patch.
> 
> I agree that changing merge is outside the scope of this patch, but
> I'm curious what change you have in mind for it to "make it match".
> In particular, merge-recursive.c already has (or will shortly have)
> +       if (opts.detect_rename > DIFF_DETECT_RENAME)
> +               opts.detect_rename = DIFF_DETECT_RENAME;
> from your commit 85b460305ce7 ("merge: add merge.renames config
> setting", 2018-05-02), 

This is a good point that I missed.  With that recent change to merge, 
it no longer matters that the settings parsing code caps detect_rename 
at DIFF_DETECT_RENAME because it will cap it later anyway so there is no 
need to change the merge option behavior.

> The one place copy detection does make sense inside a merge is for the
> diffstat shown at the end (from builtin/merge.c), but it currently
> isn't controlled by any configuration setting at all.  When it is
> hooked up, it'd probably store the value separately from
> merge-recursive's internal o->{diff,merge}_detect_rename anyway,
> because builtin/merge.c's diffstat should be controlled by the
> relevant confiig settings and flags (merge.renames, diff.renames,
> -Xfind-renames, etc.) regardless of which merge strategy (recursive,
> resolve, octopus, ours, ort) is employed.  And when that is hooked up,
> I agree with you that it should look like what you've done with
> status.renames here.  In fact, if you'd like to take a crack at it, I
> think you'd do a great job.  :-)  If not, it's on my list of things to
> do.
> 

Thanks but I'll leave that to you. :)  I have a large backlog of patches 
I would like to see pushed through the mailing list into master.  We've 
been sitting on this one for over a year.  If the current rate is any 
indication, it will take man years to get caught up.

>>> Testcases look good.  It'd be nice to also add a few testcases where
>>> copy detection is turned on -- in particular, I'd like to see one with
>>> --find-renames=$DIFFERENT_THAN_DEFAULT being passed when
>>> merge.renames=copies.
>>>
>>
>> OK.  I also added tests to verify the settings correctly impact commit.
> 
> Nice!
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v3] add status config and command line options for rename detection
  2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
                   ` (2 preceding siblings ...)
  2018-05-10 14:16 ` [PATCH v2] " Ben Peart
@ 2018-05-11 12:56 ` Ben Peart
  2018-05-11 14:33   ` Elijah Newren
  2018-05-12  8:04   ` Eckhard Maaß
  2018-05-11 15:38 ` [PATCH v4] " Ben Peart
  4 siblings, 2 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-11 12:56 UTC (permalink / raw)
  To: git
  Cc: Ben Peart, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass, Ben Peart

After performing a merge that has conflicts git status will, by default,
attempt to detect renames which causes many objects to be examined.  In a
virtualized repo, those objects do not exist locally so the rename logic
triggers them to be fetched from the server. This results in the status call
taking hours to complete on very large repos vs seconds with this patch.

Add a new config status.renames setting to enable turning off rename detection
during status and commit.  This setting will default to the value of
diff.renames.

Add a new config status.renamelimit setting to to enable bounding the time
spent finding out inexact renames during status and commit.  This setting will
default to the value of diff.renamelimit.

Add --no-renames command line option to status that enables overriding the
config setting from the command line. Add --find-renames[=<n>] command line
option to status that enables detecting renames and optionally setting the
similarity index.

This patch depends on em/status-rename-config

Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
---

Notes:
    Base Ref: commit dc6b1d92ca9c0c538daa244e3910bb8b2a50d959 (em/status-rename-config)
    Web-Diff: https://github.com/benpeart/git/commit/5bac43610b
    Checkout: git fetch https://github.com/benpeart/git status-renames-v3 && git checkout 5bac43610b
    
    ### Interdiff (v2..v3):
    
    diff --git a/Documentation/config.txt b/Documentation/config.txt
    index 9c8eca05b1..88884f1ead 100644
    --- a/Documentation/config.txt
    +++ b/Documentation/config.txt
    @@ -3120,14 +3120,16 @@ status.displayCommentPrefix::
     	Defaults to false.
    
     status.renameLimit::
    -	The number of files to consider when performing rename detection;
    -	if not specified, defaults to the value of diff.renameLimit.
    +	The number of files to consider when performing rename detection
    +	in linkgit:git-status[1] and linkgit:git-commit[1]. Defaults to
    +	the value of diff.renameLimit.
    
     status.renames::
    -	Whether and how Git detects renames.  If set to "false",
    -	rename detection is disabled. If set to "true", basic rename
    -	detection is enabled.  If set to "copies" or "copy", Git will
    -	detect copies, as well.  Defaults to the value of diff.renames.
    +	Whether and how Git detects renames in linkgit:git-status[1] and
    +	linkgit:git-commit[1] .  If set to "false", rename detection is
    +	disabled. If set to "true", basic rename detection is enabled.
    +	If set to "copies" or "copy", Git will detect copies, as well.
    +	Defaults to the value of diff.renames.
    
     status.showStash::
     	If set to true, linkgit:git-status[1] will display the number of
    diff --git a/builtin/commit.c b/builtin/commit.c
    index db886277f4..b50e33ef48 100644
    --- a/builtin/commit.c
    +++ b/builtin/commit.c
    @@ -1373,6 +1373,7 @@ int cmd_status(int argc, const char **argv, const char *prefix)
     	if (no_renames != -1)
     		s.detect_rename = !no_renames;
     	if ((intptr_t)rename_score_arg != -1) {
    +		if (s.detect_rename < DIFF_DETECT_RENAME)
     			s.detect_rename = DIFF_DETECT_RENAME;
     		if (rename_score_arg)
     			s.rename_score = parse_rename_score(&rename_score_arg);
    diff --git a/t/t7525-status-rename.sh b/t/t7525-status-rename.sh
    old mode 100644
    new mode 100755
    index 311df8038a..ef8b1b3078
    --- a/t/t7525-status-rename.sh
    +++ b/t/t7525-status-rename.sh
    @@ -10,14 +10,13 @@ test_expect_success 'setup' '
     	git commit -m"Adding original file." &&
     	mv original renamed &&
     	echo 2 >> renamed &&
    -	git add .
    -'
    -
    -cat >.gitignore <<\EOF
    +	git add . &&
    +	cat >.gitignore <<-\EOF
     	.gitignore
     	expect*
     	actual*
     	EOF
    +'
    
     test_expect_success 'status no-options' '
     	git status >actual &&
    @@ -63,7 +62,18 @@ test_expect_success 'status status.renames=true' '
     	test_i18ngrep "renamed:" actual
     '
    
    -test_expect_success 'status config overriden' '
    +test_expect_success 'commit honors status.renames=false' '
    +	git -c status.renames=false commit --dry-run >actual &&
    +	test_i18ngrep "deleted:" actual &&
    +	test_i18ngrep "new file:" actual
    +'
    +
    +test_expect_success 'commit honors status.renames=true' '
    +	git -c status.renames=true commit --dry-run >actual &&
    +	test_i18ngrep "renamed:" actual
    +'
    +
    +test_expect_success 'status config overridden' '
     	git -c status.renames=true status --no-renames >actual &&
     	test_i18ngrep "deleted:" actual &&
     	test_i18ngrep "new file:" actual
    @@ -87,4 +97,17 @@ test_expect_success 'status score=01%' '
     	test_i18ngrep "renamed:" actual
     '
    
    +test_expect_success 'copies not overridden by find-rename' '
    +	cp renamed copy &&
    +	git add copy &&
    +
    +	git -c status.renames=copies status -M=01% >actual &&
    +	test_i18ngrep "copied:" actual &&
    +	test_i18ngrep "renamed:" actual &&
    +
    +	git -c status.renames=copies status --find-rename=01% >actual &&
    +	test_i18ngrep "copied:" actual &&
    +	test_i18ngrep "renamed:" actual
    +'
    +
     test_done
    
    ### Patches

 Documentation/config.txt     |  12 ++++
 Documentation/git-status.txt |  10 ++++
 builtin/commit.c             |  42 +++++++++++++
 diff.c                       |   2 +-
 diff.h                       |   1 +
 t/t7525-status-rename.sh     | 113 +++++++++++++++++++++++++++++++++++
 wt-status.c                  |  12 ++++
 wt-status.h                  |   4 +-
 8 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100755 t/t7525-status-rename.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb3..88884f1ead 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3119,6 +3119,18 @@ status.displayCommentPrefix::
 	behavior of linkgit:git-status[1] in Git 1.8.4 and previous.
 	Defaults to false.
 
+status.renameLimit::
+	The number of files to consider when performing rename detection
+	in linkgit:git-status[1] and linkgit:git-commit[1]. Defaults to
+	the value of diff.renameLimit.
+
+status.renames::
+	Whether and how Git detects renames in linkgit:git-status[1] and
+	linkgit:git-commit[1] .  If set to "false", rename detection is
+	disabled. If set to "true", basic rename detection is enabled.
+	If set to "copies" or "copy", Git will detect copies, as well.
+	Defaults to the value of diff.renames.
+
 status.showStash::
 	If set to true, linkgit:git-status[1] will display the number of
 	entries currently stashed away.
diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index c16e27e63d..c4467ffb98 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -135,6 +135,16 @@ ignored, then the directory is not shown, but all contents are shown.
 	Display or do not display detailed ahead/behind counts for the
 	branch relative to its upstream branch.  Defaults to true.
 
+--renames::
+--no-renames::
+	Turn on/off rename detection regardless of user configuration.
+	See also linkgit:git-diff[1] `--no-renames`.
+
+--find-renames[=<n>]::
+	Turn on rename detection, optionally setting the similarity
+	threshold.
+	See also linkgit:git-diff[1] `--find-renames`.
+
 <pathspec>...::
 	See the 'pathspec' entry in linkgit:gitglossary[7].
 
diff --git a/builtin/commit.c b/builtin/commit.c
index 5240f11225..b50e33ef48 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -143,6 +143,16 @@ static int opt_parse_m(const struct option *opt, const char *arg, int unset)
 	return 0;
 }
 
+static int opt_parse_rename_score(const struct option *opt, const char *arg, int unset)
+{
+	const char **value = opt->value;
+	if (arg != NULL && *arg == '=')
+		arg = arg + 1;
+
+	*value = arg;
+	return 0;
+}
+
 static void determine_whence(struct wt_status *s)
 {
 	if (file_exists(git_path_merge_head()))
@@ -1259,11 +1269,31 @@ static int git_status_config(const char *k, const char *v, void *cb)
 			return error(_("Invalid untracked files mode '%s'"), v);
 		return 0;
 	}
+	if (!strcmp(k, "diff.renamelimit")) {
+		if (s->rename_limit == -1)
+			s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renamelimit")) {
+		s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "diff.renames")) {
+		if (s->detect_rename == -1)
+			s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renames")) {
+		s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
 	return git_diff_ui_config(k, v, NULL);
 }
 
 int cmd_status(int argc, const char **argv, const char *prefix)
 {
+	static int no_renames = -1;
+	static const char *rename_score_arg = (const char *)-1;
 	static struct wt_status s;
 	int fd;
 	struct object_id oid;
@@ -1297,6 +1327,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 		  N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
 		  PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
 		OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
+		OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
+		{ OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
+		  N_("n"), N_("detect renames, optionally set similarity index"),
+		  PARSE_OPT_OPTARG, opt_parse_rename_score },
 		OPT_END(),
 	};
 
@@ -1336,6 +1370,14 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	s.status_format = status_format;
 	s.verbose = verbose;
+	if (no_renames != -1)
+		s.detect_rename = !no_renames;
+	if ((intptr_t)rename_score_arg != -1) {
+		if (s.detect_rename < DIFF_DETECT_RENAME)
+			s.detect_rename = DIFF_DETECT_RENAME;
+		if (rename_score_arg)
+			s.rename_score = parse_rename_score(&rename_score_arg);
+	}
 
 	wt_status_collect(&s);
 
diff --git a/diff.c b/diff.c
index 1289df4b1f..5dfc24aa6d 100644
--- a/diff.c
+++ b/diff.c
@@ -177,7 +177,7 @@ static int parse_submodule_params(struct diff_options *options, const char *valu
 	return 0;
 }
 
-static int git_config_rename(const char *var, const char *value)
+int git_config_rename(const char *var, const char *value)
 {
 	if (!value)
 		return DIFF_DETECT_RENAME;
diff --git a/diff.h b/diff.h
index d29560f822..dedac472ca 100644
--- a/diff.h
+++ b/diff.h
@@ -324,6 +324,7 @@ extern int git_diff_ui_config(const char *var, const char *value, void *cb);
 extern void diff_setup(struct diff_options *);
 extern int diff_opt_parse(struct diff_options *, const char **, int, const char *);
 extern void diff_setup_done(struct diff_options *);
+extern int git_config_rename(const char *var, const char *value);
 
 #define DIFF_DETECT_RENAME	1
 #define DIFF_DETECT_COPY	2
diff --git a/t/t7525-status-rename.sh b/t/t7525-status-rename.sh
new file mode 100755
index 0000000000..ef8b1b3078
--- /dev/null
+++ b/t/t7525-status-rename.sh
@@ -0,0 +1,113 @@
+#!/bin/sh
+
+test_description='git status rename detection options'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	echo 1 >original &&
+	git add . &&
+	git commit -m"Adding original file." &&
+	mv original renamed &&
+	echo 2 >> renamed &&
+	git add . &&
+	cat >.gitignore <<-\EOF
+	.gitignore
+	expect*
+	actual*
+	EOF
+'
+
+test_expect_success 'status no-options' '
+	git status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status --no-renames' '
+	git status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames false' '
+	git -c diff.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames true' '
+	git -c diff.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status.renames overrides diff.renames false' '
+	git -c diff.renames=true -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames overrides from diff.renames true' '
+	git -c diff.renames=false -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status status.renames=false' '
+	git -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status status.renames=true' '
+	git -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'commit honors status.renames=false' '
+	git -c status.renames=false commit --dry-run >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'commit honors status.renames=true' '
+	git -c status.renames=true commit --dry-run >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status config overridden' '
+	git -c status.renames=true status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=100%' '
+	git status -M=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual &&
+
+	git status --find-rename=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=01%' '
+	git status -M=01% >actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git status --find-rename=01% >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'copies not overridden by find-rename' '
+	cp renamed copy &&
+	git add copy &&
+
+	git -c status.renames=copies status -M=01% >actual &&
+	test_i18ngrep "copied:" actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git -c status.renames=copies status --find-rename=01% >actual &&
+	test_i18ngrep "copied:" actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index 32f3bcaebd..172f07cbb0 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -138,6 +138,9 @@ void wt_status_prepare(struct wt_status *s)
 	s->show_stash = 0;
 	s->ahead_behind_flags = AHEAD_BEHIND_UNSPECIFIED;
 	s->display_comment_prefix = 0;
+	s->detect_rename = -1;
+	s->rename_score = -1;
+	s->rename_limit = -1;
 }
 
 static void wt_longstatus_print_unmerged_header(struct wt_status *s)
@@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
 	}
 	rev.diffopt.format_callback = wt_status_collect_changed_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_files(&rev, 0);
 }
@@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
 	rev.diffopt.format_callback = wt_status_collect_updated_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_index(&rev, 1);
 }
@@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
 	setup_revisions(0, NULL, &rev, &opt);
 
 	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
 	/*
diff --git a/wt-status.h b/wt-status.h
index 430770b854..1673d146fa 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -89,7 +89,9 @@ struct wt_status {
 	int show_stash;
 	int hints;
 	enum ahead_behind_flags ahead_behind_flags;
-
+	int detect_rename;
+	int rename_score;
+	int rename_limit;
 	enum wt_status_format status_format;
 	unsigned char sha1_commit[GIT_MAX_RAWSZ]; /* when not Initial */
 

base-commit: dc6b1d92ca9c0c538daa244e3910bb8b2a50d959
-- 
2.17.0.windows.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] add status config and command line options for rename detection
  2018-05-11 12:56 ` [PATCH v3] " Ben Peart
@ 2018-05-11 14:33   ` Elijah Newren
  2018-05-12  8:04   ` Eckhard Maaß
  1 sibling, 0 replies; 17+ messages in thread
From: Elijah Newren @ 2018-05-11 14:33 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

Hi Ben,

On Fri, May 11, 2018 at 5:56 AM, Ben Peart <Ben.Peart@microsoft.com> wrote:
> After performing a merge that has conflicts git status will, by default,
> attempt to detect renames which causes many objects to be examined.  In a
> virtualized repo, those objects do not exist locally so the rename logic
> triggers them to be fetched from the server. This results in the status call
> taking hours to complete on very large repos vs seconds with this patch.
>
> Add a new config status.renames setting to enable turning off rename detection
> during status and commit.  This setting will default to the value of
> diff.renames.
>
> Add a new config status.renamelimit setting to to enable bounding the time
> spent finding out inexact renames during status and commit.  This setting will
> default to the value of diff.renamelimit.
>
> Add --no-renames command line option to status that enables overriding the
> config setting from the command line. Add --find-renames[=<n>] command line
> option to status that enables detecting renames and optionally setting the
> similarity index.

Any chance I could get you to re-wrap this at a smaller column width?
Doesn't fit in my (80-char) terminal when I run `git log`; a couple
lines run over by a couple characters.  (Sorry for not noticing this
earlier)

> This patch depends on em/status-rename-config

I'd leave this line for the notes.  It's useful information now, but
won't be to someone looking at the commit a year from now, so it
probably doesn't belong in the commit message.


With those two changes:
  Reviewed-by: Elijah Newren <newren@gmail.com>

^ permalink raw reply	[flat|nested] 17+ messages in thread

* [PATCH v4] add status config and command line options for rename detection
  2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
                   ` (3 preceding siblings ...)
  2018-05-11 12:56 ` [PATCH v3] " Ben Peart
@ 2018-05-11 15:38 ` Ben Peart
  4 siblings, 0 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-11 15:38 UTC (permalink / raw)
  To: git
  Cc: Ben Peart, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass, Ben Peart

After performing a merge that has conflicts git status will, by default,
attempt to detect renames which causes many objects to be examined.  In a
virtualized repo, those objects do not exist locally so the rename logic
triggers them to be fetched from the server. This results in the status call
taking hours to complete on very large repos vs seconds with this patch.

Add a new config status.renames setting to enable turning off rename
detection during status and commit.  This setting will default to the value
of diff.renames.

Add a new config status.renamelimit setting to to enable bounding the time
spent finding out inexact renames during status and commit.  This setting
will default to the value of diff.renamelimit.

Add --no-renames command line option to status that enables overriding the
config setting from the command line. Add --find-renames[=<n>] command line
option to status that enables detecting renames and optionally setting the
similarity index.

Reviewed-by: Elijah Newren <newren@gmail.com>
Original-Patch-by: Alejandro Pauly <alpauly@microsoft.com>
Signed-off-by: Ben Peart <Ben.Peart@microsoft.com>
---

Notes:
    Base Ref: commit dc6b1d92ca9c0c538daa244e3910bb8b2a50d959 (em/status-rename-config)
    Web-Diff: https://github.com/benpeart/git/commit/95974d512b
    Checkout: git fetch https://github.com/benpeart/git status-renames-v4 && git checkout 95974d512b
    
    ### Interdiff (v3..v4):
    
    ### Patches

 Documentation/config.txt     |  12 ++++
 Documentation/git-status.txt |  10 ++++
 builtin/commit.c             |  42 +++++++++++++
 diff.c                       |   2 +-
 diff.h                       |   1 +
 t/t7525-status-rename.sh     | 113 +++++++++++++++++++++++++++++++++++
 wt-status.c                  |  12 ++++
 wt-status.h                  |   4 +-
 8 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100755 t/t7525-status-rename.sh

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 2659153cb3..88884f1ead 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -3119,6 +3119,18 @@ status.displayCommentPrefix::
 	behavior of linkgit:git-status[1] in Git 1.8.4 and previous.
 	Defaults to false.
 
+status.renameLimit::
+	The number of files to consider when performing rename detection
+	in linkgit:git-status[1] and linkgit:git-commit[1]. Defaults to
+	the value of diff.renameLimit.
+
+status.renames::
+	Whether and how Git detects renames in linkgit:git-status[1] and
+	linkgit:git-commit[1] .  If set to "false", rename detection is
+	disabled. If set to "true", basic rename detection is enabled.
+	If set to "copies" or "copy", Git will detect copies, as well.
+	Defaults to the value of diff.renames.
+
 status.showStash::
 	If set to true, linkgit:git-status[1] will display the number of
 	entries currently stashed away.
diff --git a/Documentation/git-status.txt b/Documentation/git-status.txt
index c16e27e63d..c4467ffb98 100644
--- a/Documentation/git-status.txt
+++ b/Documentation/git-status.txt
@@ -135,6 +135,16 @@ ignored, then the directory is not shown, but all contents are shown.
 	Display or do not display detailed ahead/behind counts for the
 	branch relative to its upstream branch.  Defaults to true.
 
+--renames::
+--no-renames::
+	Turn on/off rename detection regardless of user configuration.
+	See also linkgit:git-diff[1] `--no-renames`.
+
+--find-renames[=<n>]::
+	Turn on rename detection, optionally setting the similarity
+	threshold.
+	See also linkgit:git-diff[1] `--find-renames`.
+
 <pathspec>...::
 	See the 'pathspec' entry in linkgit:gitglossary[7].
 
diff --git a/builtin/commit.c b/builtin/commit.c
index 5240f11225..b50e33ef48 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -143,6 +143,16 @@ static int opt_parse_m(const struct option *opt, const char *arg, int unset)
 	return 0;
 }
 
+static int opt_parse_rename_score(const struct option *opt, const char *arg, int unset)
+{
+	const char **value = opt->value;
+	if (arg != NULL && *arg == '=')
+		arg = arg + 1;
+
+	*value = arg;
+	return 0;
+}
+
 static void determine_whence(struct wt_status *s)
 {
 	if (file_exists(git_path_merge_head()))
@@ -1259,11 +1269,31 @@ static int git_status_config(const char *k, const char *v, void *cb)
 			return error(_("Invalid untracked files mode '%s'"), v);
 		return 0;
 	}
+	if (!strcmp(k, "diff.renamelimit")) {
+		if (s->rename_limit == -1)
+			s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renamelimit")) {
+		s->rename_limit = git_config_int(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "diff.renames")) {
+		if (s->detect_rename == -1)
+			s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
+	if (!strcmp(k, "status.renames")) {
+		s->detect_rename = git_config_rename(k, v);
+		return 0;
+	}
 	return git_diff_ui_config(k, v, NULL);
 }
 
 int cmd_status(int argc, const char **argv, const char *prefix)
 {
+	static int no_renames = -1;
+	static const char *rename_score_arg = (const char *)-1;
 	static struct wt_status s;
 	int fd;
 	struct object_id oid;
@@ -1297,6 +1327,10 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 		  N_("ignore changes to submodules, optional when: all, dirty, untracked. (Default: all)"),
 		  PARSE_OPT_OPTARG, NULL, (intptr_t)"all" },
 		OPT_COLUMN(0, "column", &s.colopts, N_("list untracked files in columns")),
+		OPT_BOOL(0, "no-renames", &no_renames, N_("do not detect renames")),
+		{ OPTION_CALLBACK, 'M', "find-renames", &rename_score_arg,
+		  N_("n"), N_("detect renames, optionally set similarity index"),
+		  PARSE_OPT_OPTARG, opt_parse_rename_score },
 		OPT_END(),
 	};
 
@@ -1336,6 +1370,14 @@ int cmd_status(int argc, const char **argv, const char *prefix)
 	s.ignore_submodule_arg = ignore_submodule_arg;
 	s.status_format = status_format;
 	s.verbose = verbose;
+	if (no_renames != -1)
+		s.detect_rename = !no_renames;
+	if ((intptr_t)rename_score_arg != -1) {
+		if (s.detect_rename < DIFF_DETECT_RENAME)
+			s.detect_rename = DIFF_DETECT_RENAME;
+		if (rename_score_arg)
+			s.rename_score = parse_rename_score(&rename_score_arg);
+	}
 
 	wt_status_collect(&s);
 
diff --git a/diff.c b/diff.c
index 1289df4b1f..5dfc24aa6d 100644
--- a/diff.c
+++ b/diff.c
@@ -177,7 +177,7 @@ static int parse_submodule_params(struct diff_options *options, const char *valu
 	return 0;
 }
 
-static int git_config_rename(const char *var, const char *value)
+int git_config_rename(const char *var, const char *value)
 {
 	if (!value)
 		return DIFF_DETECT_RENAME;
diff --git a/diff.h b/diff.h
index d29560f822..dedac472ca 100644
--- a/diff.h
+++ b/diff.h
@@ -324,6 +324,7 @@ extern int git_diff_ui_config(const char *var, const char *value, void *cb);
 extern void diff_setup(struct diff_options *);
 extern int diff_opt_parse(struct diff_options *, const char **, int, const char *);
 extern void diff_setup_done(struct diff_options *);
+extern int git_config_rename(const char *var, const char *value);
 
 #define DIFF_DETECT_RENAME	1
 #define DIFF_DETECT_COPY	2
diff --git a/t/t7525-status-rename.sh b/t/t7525-status-rename.sh
new file mode 100755
index 0000000000..ef8b1b3078
--- /dev/null
+++ b/t/t7525-status-rename.sh
@@ -0,0 +1,113 @@
+#!/bin/sh
+
+test_description='git status rename detection options'
+
+. ./test-lib.sh
+
+test_expect_success 'setup' '
+	echo 1 >original &&
+	git add . &&
+	git commit -m"Adding original file." &&
+	mv original renamed &&
+	echo 2 >> renamed &&
+	git add . &&
+	cat >.gitignore <<-\EOF
+	.gitignore
+	expect*
+	actual*
+	EOF
+'
+
+test_expect_success 'status no-options' '
+	git status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status --no-renames' '
+	git status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames false' '
+	git -c diff.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames inherits from diff.renames true' '
+	git -c diff.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status.renames overrides diff.renames false' '
+	git -c diff.renames=true -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status.renames overrides from diff.renames true' '
+	git -c diff.renames=false -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status status.renames=false' '
+	git -c status.renames=false status >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status status.renames=true' '
+	git -c status.renames=true status >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'commit honors status.renames=false' '
+	git -c status.renames=false commit --dry-run >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'commit honors status.renames=true' '
+	git -c status.renames=true commit --dry-run >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'status config overridden' '
+	git -c status.renames=true status --no-renames >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=100%' '
+	git status -M=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual &&
+
+	git status --find-rename=100% >actual &&
+	test_i18ngrep "deleted:" actual &&
+	test_i18ngrep "new file:" actual
+'
+
+test_expect_success 'status score=01%' '
+	git status -M=01% >actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git status --find-rename=01% >actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_expect_success 'copies not overridden by find-rename' '
+	cp renamed copy &&
+	git add copy &&
+
+	git -c status.renames=copies status -M=01% >actual &&
+	test_i18ngrep "copied:" actual &&
+	test_i18ngrep "renamed:" actual &&
+
+	git -c status.renames=copies status --find-rename=01% >actual &&
+	test_i18ngrep "copied:" actual &&
+	test_i18ngrep "renamed:" actual
+'
+
+test_done
diff --git a/wt-status.c b/wt-status.c
index 32f3bcaebd..172f07cbb0 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -138,6 +138,9 @@ void wt_status_prepare(struct wt_status *s)
 	s->show_stash = 0;
 	s->ahead_behind_flags = AHEAD_BEHIND_UNSPECIFIED;
 	s->display_comment_prefix = 0;
+	s->detect_rename = -1;
+	s->rename_score = -1;
+	s->rename_limit = -1;
 }
 
 static void wt_longstatus_print_unmerged_header(struct wt_status *s)
@@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
 	}
 	rev.diffopt.format_callback = wt_status_collect_changed_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_files(&rev, 0);
 }
@@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
 	rev.diffopt.format_callback = wt_status_collect_updated_cb;
 	rev.diffopt.format_callback_data = s;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	copy_pathspec(&rev.prune_data, &s->pathspec);
 	run_diff_index(&rev, 1);
 }
@@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
 	setup_revisions(0, NULL, &rev, &opt);
 
 	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
+	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
+	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
+	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
 	rev.diffopt.file = s->fp;
 	rev.diffopt.close_file = 0;
 	/*
diff --git a/wt-status.h b/wt-status.h
index 430770b854..1673d146fa 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -89,7 +89,9 @@ struct wt_status {
 	int show_stash;
 	int hints;
 	enum ahead_behind_flags ahead_behind_flags;
-
+	int detect_rename;
+	int rename_score;
+	int rename_limit;
 	enum wt_status_format status_format;
 	unsigned char sha1_commit[GIT_MAX_RAWSZ]; /* when not Initial */
 

base-commit: dc6b1d92ca9c0c538daa244e3910bb8b2a50d959
-- 
2.17.0.windows.1


^ permalink raw reply related	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] add status config and command line options for rename detection
  2018-05-11 12:56 ` [PATCH v3] " Ben Peart
  2018-05-11 14:33   ` Elijah Newren
@ 2018-05-12  8:04   ` Eckhard Maaß
  2018-05-14 12:57     ` Ben Peart
  1 sibling, 1 reply; 17+ messages in thread
From: Eckhard Maaß @ 2018-05-12  8:04 UTC (permalink / raw)
  To: Ben Peart
  Cc: git, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin, eckhard.s.maass

On Fri, May 11, 2018 at 12:56:39PM +0000, Ben Peart wrote:
> After performing a merge that has conflicts git status will, by default,
> attempt to detect renames which causes many objects to be examined.  In a
> virtualized repo, those objects do not exist locally so the rename logic
> triggers them to be fetched from the server. This results in the status call
> taking hours to complete on very large repos vs seconds with this patch.

I see where your need comes from, but as you based this on my little
patch one can achieve this already with tweaking diff.renames itself. I
do wonder why there is a special need for the status command here. And
if there is, I personally would like it more in a style that you could
take all the options provided by diff.*-configuration and prefix that
with status, eg status.diff.renames = true. What do you think? If you
really only need this for merges, maybe a more specialised option is
called for that only kicks in when there is a merge going on?

I would like that status behaves as similar as possible to
diff/show/log. Special options will pull away from that again - passing
-m to show or log will lead to the same performance issues, correct?
Could it be feasible to impose an overall time limit on the detection?

And after writing this I wonder what were your experience with just
tweaking renameLimit - setting it very low should have helped the
fetching from server part already, shouldn't it?

> Add --no-renames command line option to status that enables overriding the
> config setting from the command line. Add --find-renames[=<n>] command line
> option to status that enables detecting renames and optionally setting the
> similarity index.

Would it be reasonable to extend this so that we just use the same
machinery for parsing command line options for the diffcore options and
pass this along? It seems to me that git status wants the same init as
diff/show/log has anyway. But I like the direction towards passing more
command line options to the git status command. 

>  static void wt_longstatus_print_unmerged_header(struct wt_status *s)
> @@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
>  	}
>  	rev.diffopt.format_callback = wt_status_collect_changed_cb;
>  	rev.diffopt.format_callback_data = s;
> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>  	copy_pathspec(&rev.prune_data, &s->pathspec);
>  	run_diff_files(&rev, 0);
>  }
> @@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
>  	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
>  	rev.diffopt.format_callback = wt_status_collect_updated_cb;
>  	rev.diffopt.format_callback_data = s;
> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>  	copy_pathspec(&rev.prune_data, &s->pathspec);
>  	run_diff_index(&rev, 1);
>  }
> @@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
>  	setup_revisions(0, NULL, &rev, &opt);
>  
>  	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>  	rev.diffopt.file = s->fp;
>  	rev.diffopt.close_file = 0;
>  	/*

Somehow I am inclined that those should be factored out to a common
method if the rest of the patch stays as it is.

Greetings,
Eckhard

^ permalink raw reply	[flat|nested] 17+ messages in thread

* Re: [PATCH v3] add status config and command line options for rename detection
  2018-05-12  8:04   ` Eckhard Maaß
@ 2018-05-14 12:57     ` Ben Peart
  0 siblings, 0 replies; 17+ messages in thread
From: Ben Peart @ 2018-05-14 12:57 UTC (permalink / raw)
  To: Eckhard Maaß, Ben Peart
  Cc: git, newren, gitster, pclouds, vmiklos, Alejandro Pauly,
	Johannes.Schindelin



On 5/12/2018 4:04 AM, Eckhard Maaß wrote:
> On Fri, May 11, 2018 at 12:56:39PM +0000, Ben Peart wrote:
>> After performing a merge that has conflicts git status will, by default,
>> attempt to detect renames which causes many objects to be examined.  In a
>> virtualized repo, those objects do not exist locally so the rename logic
>> triggers them to be fetched from the server. This results in the status call
>> taking hours to complete on very large repos vs seconds with this patch.
> 
> I see where your need comes from, but as you based this on my little
> patch one can achieve this already with tweaking diff.renames itself. I
> do wonder why there is a special need for the status command here

The rename detection feature is nice and we'd like to leave it on 
whenever possible.  The performance issues only occur when in the middle 
of a merge - normal status commands behave reasonably.  As a result, we 
don't want to just turn it off completely by setting diff.renames.

Until we come up with a more elegant solution, we currently turn it off 
completely for merge via the new merge settings and then intercept calls 
to status and if there is a MERGE_HEAD we turn it off for status just 
for that specific call.  I view this as a temporary solution so would 
not want to put that logic into git proper as it is quite specific to 
when running git on a virtualized repo.

> if there is, I personally would like it more in a style that you could
> take all the options provided by diff.*-configuration and prefix that
> with status, eg status.diff.renames = true. What do you think? If you
> really only need this for merges, maybe a more specialised option is
> called for that only kicks in when there is a merge going on?
> 
> I would like that status behaves as similar as possible to
> diff/show/log. Special options will pull away from that again - passing
> -m to show or log will lead to the same performance issues, correct?
> Could it be feasible to impose an overall time limit on the detection?
> 

I agree that they should behave as similar as possible which is why all 
the new settings default to the diff setting when not explicitly set.  I 
believe this is a good model - if you don't do anything special you get 
the default/same behavior but if you know and need special behavior, you 
now have that option.

> And after writing this I wonder what were your experience with just
> tweaking renameLimit - setting it very low should have helped the
> fetching from server part already, shouldn't it?
> 
>> Add --no-renames command line option to status that enables overriding the
>> config setting from the command line. Add --find-renames[=<n>] command line
>> option to status that enables detecting renames and optionally setting the
>> similarity index.
> 
> Would it be reasonable to extend this so that we just use the same
> machinery for parsing command line options for the diffcore options and
> pass this along? It seems to me that git status wants the same init as
> diff/show/log has anyway. But I like the direction towards passing more
> command line options to the git status command.
> 

I agree that it is unfortunate that diff/merge/status all parse and deal 
with config settings differently.  I'd be happy to see someone tackle 
that and move the code to a single, coherent model but that is beyond 
the scope of this patch.

>>   static void wt_longstatus_print_unmerged_header(struct wt_status *s)
>> @@ -592,6 +595,9 @@ static void wt_status_collect_changes_worktree(struct wt_status *s)
>>   	}
>>   	rev.diffopt.format_callback = wt_status_collect_changed_cb;
>>   	rev.diffopt.format_callback_data = s;
>> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
>> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
>> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>>   	copy_pathspec(&rev.prune_data, &s->pathspec);
>>   	run_diff_files(&rev, 0);
>>   }
>> @@ -625,6 +631,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
>>   	rev.diffopt.output_format |= DIFF_FORMAT_CALLBACK;
>>   	rev.diffopt.format_callback = wt_status_collect_updated_cb;
>>   	rev.diffopt.format_callback_data = s;
>> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
>> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
>> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>>   	copy_pathspec(&rev.prune_data, &s->pathspec);
>>   	run_diff_index(&rev, 1);
>>   }
>> @@ -982,6 +991,9 @@ static void wt_longstatus_print_verbose(struct wt_status *s)
>>   	setup_revisions(0, NULL, &rev, &opt);
>>   
>>   	rev.diffopt.output_format |= DIFF_FORMAT_PATCH;
>> +	rev.diffopt.detect_rename = s->detect_rename >= 0 ? s->detect_rename : rev.diffopt.detect_rename;
>> +	rev.diffopt.rename_limit = s->rename_limit >= 0 ? s->rename_limit : rev.diffopt.rename_limit;
>> +	rev.diffopt.rename_score = s->rename_score >= 0 ? s->rename_score : rev.diffopt.rename_score;
>>   	rev.diffopt.file = s->fp;
>>   	rev.diffopt.close_file = 0;
>>   	/*
> 
> Somehow I am inclined that those should be factored out to a common
> method if the rest of the patch stays as it is.
> 

I debated that as well but given the logic is so simple, opted to stick 
with this.  I also debated whether it would be clearer in the form:

if (s->detect_rename >= 0)
	rev.diffopt.detect_rename = s->detect_rename;

But decided git contributes are used to seeing dense code :) and this 
style better matched what I saw in the merge settings.

> Greetings,
> Eckhard
> 

^ permalink raw reply	[flat|nested] 17+ messages in thread

end of thread, other threads:[~2018-05-14 12:57 UTC | newest]

Thread overview: 17+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-05-09 14:42 [PATCH v1] add status config and command line options for rename detection Ben Peart
2018-05-09 15:59 ` Duy Nguyen
2018-05-09 17:04   ` Ben Peart
2018-05-09 16:56 ` Elijah Newren
2018-05-09 19:54   ` Ben Peart
2018-05-10 14:16 ` [PATCH v2] " Ben Peart
2018-05-10 16:19   ` Elijah Newren
2018-05-10 19:09     ` Ben Peart
2018-05-10 22:31       ` Elijah Newren
2018-05-11 12:50         ` Ben Peart
2018-05-11  1:57     ` Junio C Hamano
2018-05-11  6:39   ` Junio C Hamano
2018-05-11 12:56 ` [PATCH v3] " Ben Peart
2018-05-11 14:33   ` Elijah Newren
2018-05-12  8:04   ` Eckhard Maaß
2018-05-14 12:57     ` Ben Peart
2018-05-11 15:38 ` [PATCH v4] " Ben Peart

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.