git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Heiko Voigt <hvoigt@hvoigt.net>
Cc: Jeff King <peff@peff.net>, Stefan Beller <sbeller@google.com>,
	"git\@vger.kernel.org" <git@vger.kernel.org>,
	Jens Lehmann <Jens.Lehmann@web.de>,
	Fredrik Gustafsson <iveqy@iveqy.com>,
	Leandro Lucarella <leandro.lucarella@sociomantic.com>
Subject: Re: [PATCH 1/2] serialize collection of changed submodules
Date: Fri, 16 Sep 2016 10:27:04 -0700	[thread overview]
Message-ID: <xmqqintvlpqv.fsf@gitster.mtv.corp.google.com> (raw)
In-Reply-To: <20160914173124.GA7613@sandbox> (Heiko Voigt's message of "Wed, 14 Sep 2016 19:31:24 +0200")

Heiko Voigt <hvoigt@hvoigt.net> writes:

> +static struct sha1_array *get_sha1s_from_list(struct string_list *submodules,
> +		const char *path)
> +{
> +	struct string_list_item *item;
> +	struct sha1_array *hashes;
> +
> +	item = string_list_insert(submodules, path);
> +	if (item->util)
> +		return (struct sha1_array *) item->util;
> +
> +	hashes = (struct sha1_array *) xmalloc(sizeof(struct sha1_array));
> +	/* NEEDSWORK: should we add an initializer function for
> +	 * sha1_array ? */
> +	memset(hashes, 0, sizeof(struct sha1_array));
> +	item->util = hashes;


	/* NEEDSWORK: should we have SHA1_ARRAY_INIT etc.? */
	item->util = xcalloc(1, sizeof(struct sha1_array));

>  static void collect_submodules_from_diff(struct diff_queue_struct *q,
>  					 struct diff_options *options,
>  					 void *data)
>  {
>  	int i;
> -	struct string_list *needs_pushing = data;
> +	struct string_list *submodules = data;
>  
>  	for (i = 0; i < q->nr; i++) {
>  		struct diff_filepair *p = q->queue[i];
> +		struct sha1_array *hashes;
>  		if (!S_ISGITLINK(p->two->mode))
>  			continue;
> -		if (submodule_needs_pushing(p->two->path, p->two->oid.hash))
> -			string_list_insert(needs_pushing, p->two->path);
> +		hashes = get_sha1s_from_list(submodules, p->two->path);
> +		sha1_array_append(hashes, p->two->oid.hash);
>  	}
>  }

So the idea at this step is still let each commit in the top-level
history inspected for any submodule change, but the result is
collected in a mapping (submodule -> [ list of submodule commits ]).
As we do not expect too many "oops, the old commit was better, so
let's revert and rebind the old one from the submodule" in the
history of the top-level, appending and then running for-each-unique
is an efficient way, instead of first checking if we already have
it and then inserting new ones to maintain the uniqueness.

Makes sense.

> @@ -582,14 +601,41 @@ static void find_unpushed_submodule_commits(struct commit *commit,
>  	diff_tree_combined_merge(commit, 1, &rev);
>  }
>  
> +struct collect_submodule_from_sha1s_data {
> +	char *submodule_path;
> +	struct string_list *needs_pushing;
> +};
> +
> +static void collect_submodules_from_sha1s(const unsigned char sha1[20],
> +		void *data)
> +{
> +	struct collect_submodule_from_sha1s_data *me =
> +		(struct collect_submodule_from_sha1s_data *) data;
> +
> +	if (submodule_needs_pushing(me->submodule_path, sha1))
> +		string_list_insert(me->needs_pushing, me->submodule_path);
> +}

This is called from sha1_array_for_each_unique() that iterates over
the submodule commit object names for one submodule and then ends up
calling submodule_needs_pushing() number of times, which smells less
efficient than it could be.  You can ask

    rev-list <all the submodule commits to be pushed> --not --remotes

just once in the submodule repository.  I imagine that is what you'll
do in the next patch.

An obvious but much less efficient way to optimize this part would
be to see if me->needs_pushing already has me->submodule_path and
skip the check for submodule_needs_pushing(), but if you drop the
call by find_unpushed_submodule to sha1_array_for_each_unique() to
walk new submodule commits one by one, that would become irrelevant.

> +static void free_submodules_sha1s(struct string_list *submodules)
> +{
> +	int i;
> +	for (i = 0; i < submodules->nr; i++) {
> +		struct string_list_item *item = &submodules->items[i];
> +		struct sha1_array *hashes = (struct sha1_array *) item->util;
> +		sha1_array_clear(hashes);
> +	}
> +	string_list_clear(submodules, 1);
> +}
> +
>  int find_unpushed_submodules(unsigned char new_sha1[20],
>  		const char *remotes_name, struct string_list *needs_pushing)
>  {
>  	struct rev_info rev;
>  	struct commit *commit;
>  	const char *argv[] = {NULL, NULL, "--not", "NULL", NULL};
> -	int argc = ARRAY_SIZE(argv) - 1;
> +	int argc = ARRAY_SIZE(argv) - 1, i;
>  	char *sha1_copy;
> +	struct string_list submodules = STRING_LIST_INIT_DUP;
>  
>  	struct strbuf remotes_arg = STRBUF_INIT;
>  
> @@ -603,12 +649,23 @@ int find_unpushed_submodules(unsigned char new_sha1[20],
>  		die("revision walk setup failed");
>  
>  	while ((commit = get_revision(&rev)) != NULL)
> -		find_unpushed_submodule_commits(commit, needs_pushing);
> +		find_unpushed_submodule_commits(commit, &submodules);
>  
>  	reset_revision_walk();
>  	free(sha1_copy);
>  	strbuf_release(&remotes_arg);
>  
> +	for (i = 0; i < submodules.nr; i++) {
> +		struct string_list_item *item = &submodules.items[i];
> +		struct collect_submodule_from_sha1s_data data;
> +		data.submodule_path = item->string;
> +		data.needs_pushing = needs_pushing;
> +		sha1_array_for_each_unique((struct sha1_array *) item->util,
> +				collect_submodules_from_sha1s,
> +				&data);
> +	}
> +	free_submodules_sha1s(&submodules);
> +
>  	return needs_pushing->nr;
>  }

  parent reply	other threads:[~2016-09-16 17:27 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-24 17:30 [PATCHv2] push: change submodule default to check Stefan Beller
2016-08-24 18:38 ` Junio C Hamano
     [not found] ` <20160824183112.ceekegpzavnbybxp@sigill.intra.peff.net>
2016-08-24 19:37   ` Junio C Hamano
2016-08-24 21:26     ` Junio C Hamano
2016-08-24 22:37     ` Stefan Beller
2016-08-24 23:01       ` Jeff King
2016-09-14 17:31         ` [PATCH 1/2] serialize collection of changed submodules Heiko Voigt
2016-09-14 22:30           ` Junio C Hamano
2016-09-15 12:10             ` [PATCH 3/2] batch check whether submodule needs pushing into one call Heiko Voigt
2016-09-15 21:08               ` Junio C Hamano
2016-09-16  9:40                 ` Heiko Voigt
2016-09-16 12:31                   ` Heiko Voigt
2016-09-16 18:13                     ` Junio C Hamano
2016-09-19 20:08                       ` Heiko Voigt
2016-09-16 17:59               ` Junio C Hamano
2016-09-19 19:58                 ` Heiko Voigt
2016-09-15 12:18             ` [PATCH 4/2] use actual start hashes for submodule push check instead of local refs Heiko Voigt
2016-09-16 17:27           ` Junio C Hamano [this message]
2016-09-19 19:44             ` [PATCH 1/2] serialize collection of changed submodules Heiko Voigt
2016-09-14 17:51         ` [PATCH 2/2] serialize collection of refs that contain submodule changes Heiko Voigt
2016-09-14 19:46           ` Heiko Voigt
2016-09-14 20:04             ` Stefan Beller
2016-09-16 17:47           ` Junio C Hamano
2016-09-19 19:51             ` Heiko Voigt
2016-09-19 20:09               ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=xmqqintvlpqv.fsf@gitster.mtv.corp.google.com \
    --to=gitster@pobox.com \
    --cc=Jens.Lehmann@web.de \
    --cc=git@vger.kernel.org \
    --cc=hvoigt@hvoigt.net \
    --cc=iveqy@iveqy.com \
    --cc=leandro.lucarella@sociomantic.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).