All of lore.kernel.org
 help / color / mirror / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Heiko Voigt <hvoigt@hvoigt.net>
Cc: Jeff King <peff@peff.net>, Stefan Beller <sbeller@google.com>,
	"git\@vger.kernel.org" <git@vger.kernel.org>,
	Jens Lehmann <Jens.Lehmann@web.de>,
	Fredrik Gustafsson <iveqy@iveqy.com>,
	Leandro Lucarella <leandro.lucarella@sociomantic.com>
Subject: Re: [PATCH 1/2] serialize collection of changed submodules
Date: Fri, 16 Sep 2016 10:27:04 -0700	[thread overview]
Message-ID: <xmqqintvlpqv.fsf@gitster.mtv.corp.google.com> (raw)
In-Reply-To: <20160914173124.GA7613@sandbox> (Heiko Voigt's message of "Wed, 14 Sep 2016 19:31:24 +0200")

Heiko Voigt <hvoigt@hvoigt.net> writes:

> +static struct sha1_array *get_sha1s_from_list(struct string_list *submodules,
> +		const char *path)
> +{
> +	struct string_list_item *item;
> +	struct sha1_array *hashes;
> +
> +	item = string_list_insert(submodules, path);
> +	if (item->util)
> +		return (struct sha1_array *) item->util;
> +
> +	hashes = (struct sha1_array *) xmalloc(sizeof(struct sha1_array));
> +	/* NEEDSWORK: should we add an initializer function for
> +	 * sha1_array ? */
> +	memset(hashes, 0, sizeof(struct sha1_array));
> +	item->util = hashes;


	/* NEEDSWORK: should we have SHA1_ARRAY_INIT etc.? */
	item->util = xcalloc(1, sizeof(struct sha1_array));

>  static void collect_submodules_from_diff(struct diff_queue_struct *q,
>  					 struct diff_options *options,
>  					 void *data)
>  {
>  	int i;
> -	struct string_list *needs_pushing = data;
> +	struct string_list *submodules = data;
>  
>  	for (i = 0; i < q->nr; i++) {
>  		struct diff_filepair *p = q->queue[i];
> +		struct sha1_array *hashes;
>  		if (!S_ISGITLINK(p->two->mode))
>  			continue;
> -		if (submodule_needs_pushing(p->two->path, p->two->oid.hash))
> -			string_list_insert(needs_pushing, p->two->path);
> +		hashes = get_sha1s_from_list(submodules, p->two->path);
> +		sha1_array_append(hashes, p->two->oid.hash);
>  	}
>  }

So the idea at this step is still let each commit in the top-level
history inspected for any submodule change, but the result is
collected in a mapping (submodule -> [ list of submodule commits ]).
As we do not expect too many "oops, the old commit was better, so
let's revert and rebind the old one from the submodule" in the
history of the top-level, appending and then running for-each-unique
is an efficient way, instead of first checking if we already have
it and then inserting new ones to maintain the uniqueness.

Makes sense.

> @@ -582,14 +601,41 @@ static void find_unpushed_submodule_commits(struct commit *commit,
>  	diff_tree_combined_merge(commit, 1, &rev);
>  }
>  
> +struct collect_submodule_from_sha1s_data {
> +	char *submodule_path;
> +	struct string_list *needs_pushing;
> +};
> +
> +static void collect_submodules_from_sha1s(const unsigned char sha1[20],
> +		void *data)
> +{
> +	struct collect_submodule_from_sha1s_data *me =
> +		(struct collect_submodule_from_sha1s_data *) data;
> +
> +	if (submodule_needs_pushing(me->submodule_path, sha1))
> +		string_list_insert(me->needs_pushing, me->submodule_path);
> +}

This is called from sha1_array_for_each_unique() that iterates over
the submodule commit object names for one submodule and then ends up
calling submodule_needs_pushing() number of times, which smells less
efficient than it could be.  You can ask

    rev-list <all the submodule commits to be pushed> --not --remotes

just once in the submodule repository.  I imagine that is what you'll
do in the next patch.

An obvious but much less efficient way to optimize this part would
be to see if me->needs_pushing already has me->submodule_path and
skip the check for submodule_needs_pushing(), but if you drop the
call by find_unpushed_submodule to sha1_array_for_each_unique() to
walk new submodule commits one by one, that would become irrelevant.

> +static void free_submodules_sha1s(struct string_list *submodules)
> +{
> +	int i;
> +	for (i = 0; i < submodules->nr; i++) {
> +		struct string_list_item *item = &submodules->items[i];
> +		struct sha1_array *hashes = (struct sha1_array *) item->util;
> +		sha1_array_clear(hashes);
> +	}
> +	string_list_clear(submodules, 1);
> +}
> +
>  int find_unpushed_submodules(unsigned char new_sha1[20],
>  		const char *remotes_name, struct string_list *needs_pushing)
>  {
>  	struct rev_info rev;
>  	struct commit *commit;
>  	const char *argv[] = {NULL, NULL, "--not", "NULL", NULL};
> -	int argc = ARRAY_SIZE(argv) - 1;
> +	int argc = ARRAY_SIZE(argv) - 1, i;
>  	char *sha1_copy;
> +	struct string_list submodules = STRING_LIST_INIT_DUP;
>  
>  	struct strbuf remotes_arg = STRBUF_INIT;
>  
> @@ -603,12 +649,23 @@ int find_unpushed_submodules(unsigned char new_sha1[20],
>  		die("revision walk setup failed");
>  
>  	while ((commit = get_revision(&rev)) != NULL)
> -		find_unpushed_submodule_commits(commit, needs_pushing);
> +		find_unpushed_submodule_commits(commit, &submodules);
>  
>  	reset_revision_walk();
>  	free(sha1_copy);
>  	strbuf_release(&remotes_arg);
>  
> +	for (i = 0; i < submodules.nr; i++) {
> +		struct string_list_item *item = &submodules.items[i];
> +		struct collect_submodule_from_sha1s_data data;
> +		data.submodule_path = item->string;
> +		data.needs_pushing = needs_pushing;
> +		sha1_array_for_each_unique((struct sha1_array *) item->util,
> +				collect_submodules_from_sha1s,
> +				&data);
> +	}
> +	free_submodules_sha1s(&submodules);
> +
>  	return needs_pushing->nr;
>  }

  parent reply	other threads:[~2016-09-16 17:27 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-24 17:30 [PATCHv2] push: change submodule default to check Stefan Beller
2016-08-24 18:38 ` Junio C Hamano
     [not found] ` <20160824183112.ceekegpzavnbybxp@sigill.intra.peff.net>
2016-08-24 19:37   ` Junio C Hamano
2016-08-24 21:26     ` Junio C Hamano
2016-08-24 22:37     ` Stefan Beller
2016-08-24 23:01       ` Jeff King
2016-09-14 17:31         ` [PATCH 1/2] serialize collection of changed submodules Heiko Voigt
2016-09-14 22:30           ` Junio C Hamano
2016-09-15 12:10             ` [PATCH 3/2] batch check whether submodule needs pushing into one call Heiko Voigt
2016-09-15 21:08               ` Junio C Hamano
2016-09-16  9:40                 ` Heiko Voigt
2016-09-16 12:31                   ` Heiko Voigt
2016-09-16 18:13                     ` Junio C Hamano
2016-09-19 20:08                       ` Heiko Voigt
2016-09-16 17:59               ` Junio C Hamano
2016-09-19 19:58                 ` Heiko Voigt
2016-09-15 12:18             ` [PATCH 4/2] use actual start hashes for submodule push check instead of local refs Heiko Voigt
2016-09-16 17:27           ` Junio C Hamano [this message]
2016-09-19 19:44             ` [PATCH 1/2] serialize collection of changed submodules Heiko Voigt
2016-09-14 17:51         ` [PATCH 2/2] serialize collection of refs that contain submodule changes Heiko Voigt
2016-09-14 19:46           ` Heiko Voigt
2016-09-14 20:04             ` Stefan Beller
2016-09-16 17:47           ` Junio C Hamano
2016-09-19 19:51             ` Heiko Voigt
2016-09-19 20:09               ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqintvlpqv.fsf@gitster.mtv.corp.google.com \
    --to=gitster@pobox.com \
    --cc=Jens.Lehmann@web.de \
    --cc=git@vger.kernel.org \
    --cc=hvoigt@hvoigt.net \
    --cc=iveqy@iveqy.com \
    --cc=leandro.lucarella@sociomantic.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.