All of lore.kernel.org
 help / color / mirror / Atom feed
From: Taylor Blau <me@ttaylorr.com>
To: Elijah Newren via GitGitGadget <gitgitgadget@gmail.com>
Cc: git@vger.kernel.org, Elijah Newren <newren@gmail.com>
Subject: Re: [PATCH 7/7] Accelerate rename_dst setup
Date: Wed, 9 Dec 2020 18:01:16 -0500	[thread overview]
Message-ID: <X9FXPHUZJAZKSset@nand.local> (raw)
In-Reply-To: <3e83b51628b6f3aeb71c5e637eca03dd63f9e95f.1607223276.git.gitgitgadget@gmail.com>

On Sun, Dec 06, 2020 at 02:54:36AM +0000, Elijah Newren via GitGitGadget wrote:
> diff --git a/diffcore-rename.c b/diffcore-rename.c
> index 816d2fbac44..fb3c2817c6b 100644
> --- a/diffcore-rename.c
> +++ b/diffcore-rename.c
> @@ -5,67 +5,64 @@
>  #include "cache.h"
>  #include "diff.h"
>  #include "diffcore.h"
> -#include "object-store.h"
>  #include "hashmap.h"
> +#include "object-store.h"

Shuffling around this object-store.h include looks like noise to me, but
maybe there's a good reason for doing it.

>  #include "progress.h"
>  #include "promisor-remote.h"
> +#include "strmap.h"
>
>  /* Table of rename/copy destinations */
>
>  static struct diff_rename_dst {
> -	struct diff_filespec *two;
> -	struct diff_filepair *pair;
> +	struct diff_filepair *p;
> +	struct diff_filespec *filespec_to_free;
> +	int is_rename; /* false -> just a create; true -> rename or copy */
>  } *rename_dst;
>  static int rename_dst_nr, rename_dst_alloc;
> +/* Mapping from break source pathname to break destination index */
> +static struct strintmap *break_idx = NULL;
>
> -static int find_rename_dst(struct diff_filespec *two)
> -{
> -	int first, last;
> -
> -	first = 0;
> -	last = rename_dst_nr;
> -	while (last > first) {
> -		int next = first + ((last - first) >> 1);
> -		struct diff_rename_dst *dst = &(rename_dst[next]);
> -		int cmp = strcmp(two->path, dst->two->path);
> -		if (!cmp)
> -			return next;
> -		if (cmp < 0) {
> -			last = next;
> -			continue;
> -		}
> -		first = next+1;
> -	}
> -	return -first - 1;
> -}
> -
> -static struct diff_rename_dst *locate_rename_dst(struct diff_filespec *two)
> +static struct diff_rename_dst *locate_rename_dst(struct diff_filepair *p)
>  {
> -	int ofs = find_rename_dst(two);
> -	return ofs < 0 ? NULL : &rename_dst[ofs];
> +	/* Lookup by p->ONE->path */
> +	int idx = break_idx ? strintmap_get(break_idx, p->one->path) : -1;

I had to lookup the behavior of strintmap_get() to realize that it
returns the map's "default value" to figure out why this double
ternary was necessary, but I think that it is.

Ideally, if break_idx is non-NULL, then we ought to be able to immediately
return NULL, but it's possible that break_idx is non-NULL and simply
doesn't contain p->one->path, in which case we also want to return NULL.

So, I think this is as clear as it can be.

> +	return (idx == -1) ? NULL : &rename_dst[idx];
>  }
>
>  /*
>   * Returns 0 on success, -1 if we found a duplicate.
>   */
> -static int add_rename_dst(struct diff_filespec *two)
> +static int add_rename_dst(struct diff_filepair *p)
>  {
> -	int first = find_rename_dst(two);
> -
> -	if (first >= 0)
> +	/*
> +	 * See t4058; trees might have duplicate entries.  I think
> +	 * trees with duplicate entries should be ignored and we
> +	 * should just leave rename detection on; while the results
> +	 * may be slightly harder to understand, that's merely a
> +	 * result of the underlying tree making no sense.  But I
> +	 * believe everything still works fine, the results do still
> +	 * make sense, and the extra overhead of doing this checking
> +	 * for a few historical weird trees from long ago seems like
> +	 * the dog wagging the tail to me.
> +	 *
> +	 * However: I don't feel like fighting that battle right now.
> +	 * For now, to keep the regression test passing, we have to
> +	 * detect it.  Since the diff machinery passes these to us in
> +	 * adjacent pairs, we just need to check to see if our name
> +	 * matches the previous one.
> +	 *
> +	 * TODO: Dispense with this test, rip out the test in t4058, and
> +	 * lobby folks for the change.
> +	 */
> +	if (rename_dst_nr > 0 &&
> +	    !strcmp(rename_dst[rename_dst_nr-1].p->two->path, p->two->path))
>  		return -1;
> -	first = -first - 1;
>
> -	/* insert to make it at "first" */
>  	ALLOC_GROW(rename_dst, rename_dst_nr + 1, rename_dst_alloc);
> +	rename_dst[rename_dst_nr].p = p;
> +	rename_dst[rename_dst_nr].filespec_to_free = NULL;
> +	rename_dst[rename_dst_nr].is_rename = 0;
>  	rename_dst_nr++;
> -	if (first < rename_dst_nr)
> -		MOVE_ARRAY(rename_dst + first + 1, rename_dst + first,
> -			   rename_dst_nr - first - 1);
> -	rename_dst[first].two = alloc_filespec(two->path);
> -	fill_filespec(rename_dst[first].two, &two->oid, two->oid_valid,
> -		      two->mode);
> -	rename_dst[first].pair = NULL;
>  	return 0;

Very nice, this is much simpler than what was here before.

> @@ -78,6 +75,14 @@ static int rename_src_nr, rename_src_alloc;
>
>  static void register_rename_src(struct diff_filepair *p)
>  {
> +	if (p->broken_pair) {
> +		if (!break_idx) {
> +			break_idx = xmalloc(sizeof(*break_idx));
> +			strintmap_init(break_idx, -1);
> +		}
> +		strintmap_set(break_idx, p->one->path, rename_dst_nr);
> +	}
> +

Makes sense.

> @@ -664,8 +653,9 @@ void diffcore_rename(struct diff_options *options)
>  			 */
>  			if (DIFF_PAIR_BROKEN(p)) {
>  				/* broken delete */
> -				struct diff_rename_dst *dst = locate_rename_dst(p->one);
> -				if (dst && dst->pair)
> +				struct diff_rename_dst *dst = locate_rename_dst(p);
> +				assert(dst);

You're not hurting anything, but I'm not convinced that this assert() is
buying you anything that the line immediately below it isn't already
doing.

The rest of the patch looks good to me.

Thanks,
Taylor

  reply	other threads:[~2020-12-09 23:02 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-06  2:54 [PATCH 0/7] diffcore-rename improvements Elijah Newren via GitGitGadget
2020-12-06  2:54 ` [PATCH 1/7] diffcore-rename: avoid usage of global in too_many_rename_candidates() Elijah Newren via GitGitGadget
2020-12-09 22:06   ` Taylor Blau
2020-12-06  2:54 ` [PATCH 2/7] diffcore-rename: remove unnecessary if-clause Elijah Newren via GitGitGadget
2020-12-09 22:10   ` Taylor Blau
2020-12-10  0:32     ` Elijah Newren
2020-12-10  2:03     ` Junio C Hamano
2020-12-10  2:17       ` Elijah Newren
2020-12-10  6:56         ` Junio C Hamano
2020-12-06  2:54 ` [PATCH 3/7] diffcore-rename: rename num_create to num_targets Elijah Newren via GitGitGadget
2020-12-10  2:20   ` Junio C Hamano
2020-12-10  2:25     ` Elijah Newren
2020-12-06  2:54 ` [PATCH 4/7] diffcore-rename: change a few comments to use 'add' instead of 'create' Elijah Newren via GitGitGadget
2020-12-10  2:29   ` Junio C Hamano
2020-12-06  2:54 ` [PATCH 5/7] diffcore-rename: reduce jumpiness in progress counters Elijah Newren via GitGitGadget
2020-12-09 22:24   ` Taylor Blau
2020-12-10  2:36   ` Junio C Hamano
2020-12-06  2:54 ` [PATCH 6/7] diffcore-rename: simplify and accelerate register_rename_src() Elijah Newren via GitGitGadget
2020-12-09 22:40   ` Taylor Blau
2020-12-10  0:25     ` Elijah Newren
2020-12-10  0:41       ` Taylor Blau
2020-12-10  2:51   ` Junio C Hamano
2020-12-06  2:54 ` [PATCH 7/7] Accelerate rename_dst setup Elijah Newren via GitGitGadget
2020-12-09 23:01   ` Taylor Blau [this message]
2020-12-10  0:57     ` Elijah Newren
2020-12-10  1:43       ` Junio C Hamano
2020-12-06  3:01 ` [PATCH 0/7] diffcore-rename improvements Elijah Newren
2020-12-11  9:08 ` [PATCH v2 0/9] " Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 1/9] diffcore-rename: rename num_create to num_destinations Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 2/9] diffcore-rename: avoid usage of global in too_many_rename_candidates() Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 3/9] diffcore-rename: simplify limit check Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 4/9] diffcore-rename: reduce jumpiness in progress counters Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 5/9] t4058: add more tests and documentation for duplicate tree entry handling Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 6/9] t4058: explore duplicate tree entry handling in a bit more detail Elijah Newren via GitGitGadget
2021-04-21 12:29     ` Ævar Arnfjörð Bjarmason
2021-04-21 17:38       ` Elijah Newren
2020-12-11  9:08   ` [PATCH v2 7/9] diffcore-rename: simplify and accelerate register_rename_src() Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 8/9] diffcore-rename: accelerate rename_dst setup Elijah Newren via GitGitGadget
2020-12-11  9:08   ` [PATCH v2 9/9] diffcore-rename: remove unneccessary duplicate entry checks Elijah Newren via GitGitGadget
2020-12-29  8:31     ` Christian Couder
2020-12-29 18:09       ` Elijah Newren
2020-12-29 20:05   ` [PATCH v3 0/9] diffcore-rename improvements Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 1/9] diffcore-rename: rename num_create to num_destinations Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 2/9] diffcore-rename: avoid usage of global in too_many_rename_candidates() Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 3/9] diffcore-rename: simplify limit check Elijah Newren via GitGitGadget
2021-11-09 21:14       ` Başar Uğur
2021-11-10 20:06         ` Elijah Newren
2021-11-11  9:02           ` Başar Uğur
2021-11-11 16:19             ` Elijah Newren
2020-12-29 20:05     ` [PATCH v3 4/9] diffcore-rename: reduce jumpiness in progress counters Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 5/9] t4058: add more tests and documentation for duplicate tree entry handling Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 6/9] t4058: explore duplicate tree entry handling in a bit more detail Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 7/9] diffcore-rename: simplify and accelerate register_rename_src() Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 8/9] diffcore-rename: accelerate rename_dst setup Elijah Newren via GitGitGadget
2020-12-29 20:05     ` [PATCH v3 9/9] diffcore-rename: remove unnecessary duplicate entry checks Elijah Newren via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=X9FXPHUZJAZKSset@nand.local \
    --to=me@ttaylorr.com \
    --cc=git@vger.kernel.org \
    --cc=gitgitgadget@gmail.com \
    --cc=newren@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.