git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Heiko Voigt <hvoigt@hvoigt.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: Jeff King <peff@peff.net>, Stefan Beller <sbeller@google.com>,
	"git@vger.kernel.org" <git@vger.kernel.org>,
	Jens Lehmann <Jens.Lehmann@web.de>,
	Fredrik Gustafsson <iveqy@iveqy.com>,
	Leandro Lucarella <leandro.lucarella@sociomantic.com>
Subject: [PATCH 3/2] batch check whether submodule needs pushing into one call
Date: Thu, 15 Sep 2016 14:10:44 +0200	[thread overview]
Message-ID: <20160915121044.GA96648@book.hvoigt.net> (raw)
In-Reply-To: <xmqqwpiep10i.fsf@gitster.mtv.corp.google.com>

We run a command for each sha1 change in a submodule. This is
unnecessary since we can simply batch all sha1's we want to check into
one command. Lets do it so we can speedup the check when many submodule
changes are in need of checking.

Signed-off-by: Heiko Voigt <hvoigt@hvoigt.net>
---
On Wed, Sep 14, 2016 at 03:30:53PM -0700, Junio C Hamano wrote:
> Heiko Voigt <hvoigt@hvoigt.net> writes:
> 
> > Sorry about the late reply. I was not able to process emails until now.
> > Here are two patches that should help to improve the situation and batch
> > up some processing. This one is for repositories with submodules, so
> > that they do not iterate over the same submodule twice with the same
> > hash.
> >
> > The second one will be the one people without submodules are interested
> > in.
> 
> Thanks.  Will take a look at later as I'm already deep in today's
> integration cycle.  Very much appreciated.

No problem. While I am at it: Here are actually another two patches that
should make life of submodule users easier (push times of big pushes).

In Numbers with the qt5[1] superproject and all submodules initialized.
The same --mirror test as before with the git repository:

# Without patch:

book:qt5 hvoigt (5.6)$
rm -rf ~/Downloads/git-test && mkdir ~/Downloads/git-test &&
   (cd ~/Downloads/git-test && git init) &&
   time git push --mirror --recurse-submodules=check ~/Downloads/git-test

real	4m0.881s
user	3m30.139s
sys	0m22.329s

Without --recurse-submodules=check

real	0m0.251s
user	0m0.218s
sys	0m0.082s


# With patch:

real	0m1.167s
user	0m0.846s
sys	0m0.262s

real	0m1.110s
user	0m0.815s
sys	0m0.247s

real	0m1.111s
user	0m0.818s
sys	0m0.251s

Without --recurse-submodules=check

real	0m0.294s
user	0m0.221s
sys	0m0.104s

real	0m0.248s
user	0m0.216s
sys	0m0.080s

real	0m0.247s
user	0m0.212s
sys	0m0.082s

[1] git://code.qt.io/qt/qt5.git

 submodule.c | 75 ++++++++++++++++++++++++++++++++-----------------------------
 1 file changed, 39 insertions(+), 36 deletions(-)

diff --git a/submodule.c b/submodule.c
index a15e346..28bb74e 100644
--- a/submodule.c
+++ b/submodule.c
@@ -522,27 +522,54 @@ static int has_remote(const char *refname, const struct object_id *oid,
 	return 1;
 }
 
-static int submodule_needs_pushing(const char *path, const unsigned char sha1[20])
+static void append_hash_to_argv(const unsigned char sha1[20], void *data)
 {
-	if (add_submodule_odb(path) || !lookup_commit_reference(sha1))
+	struct argv_array *argv = (struct argv_array *) data;
+	argv_array_push(argv, sha1_to_hex(sha1));
+}
+
+static void check_has_hash(const unsigned char sha1[20], void *data)
+{
+	int *has_hash = (int *) data;
+
+	if (!lookup_commit_reference(sha1))
+		*has_hash = 0;
+}
+
+static int submodule_has_hashes(const char *path, struct sha1_array *hashes)
+{
+	int has_hash = 1;
+
+	if (add_submodule_odb(path))
+		return 0;
+
+	sha1_array_for_each_unique(hashes, check_has_hash, &has_hash);
+	return has_hash;
+}
+
+static int submodule_needs_pushing(const char *path, struct sha1_array *hashes)
+{
+	if (!submodule_has_hashes(path, hashes))
 		return 0;
 
 	if (for_each_remote_ref_submodule(path, has_remote, NULL) > 0) {
 		struct child_process cp = CHILD_PROCESS_INIT;
-		const char *argv[] = {"rev-list", NULL, "--not", "--remotes", "-n", "1" , NULL};
+
+		argv_array_push(&cp.args, "rev-list");
+		sha1_array_for_each_unique(hashes, append_hash_to_argv, &cp.args);
+		argv_array_pushl(&cp.args, "--not", "--remotes", "-n", "1" , NULL);
+
 		struct strbuf buf = STRBUF_INIT;
 		int needs_pushing = 0;
 
-		argv[1] = sha1_to_hex(sha1);
-		cp.argv = argv;
 		prepare_submodule_repo_env(&cp.env_array);
 		cp.git_cmd = 1;
 		cp.no_stdin = 1;
 		cp.out = -1;
 		cp.dir = path;
 		if (start_command(&cp))
-			die("Could not run 'git rev-list %s --not --remotes -n 1' command in submodule %s",
-				sha1_to_hex(sha1), path);
+			die("Could not run 'git rev-list <hashes> --not --remotes -n 1' command in submodule %s",
+					path);
 		if (strbuf_read(&buf, cp.out, 41))
 			needs_pushing = 1;
 		finish_command(&cp);
@@ -601,21 +628,6 @@ static void find_unpushed_submodule_commits(struct commit *commit,
 	diff_tree_combined_merge(commit, 1, &rev);
 }
 
-struct collect_submodule_from_sha1s_data {
-	char *submodule_path;
-	struct string_list *needs_pushing;
-};
-
-static void collect_submodules_from_sha1s(const unsigned char sha1[20],
-		void *data)
-{
-	struct collect_submodule_from_sha1s_data *me =
-		(struct collect_submodule_from_sha1s_data *) data;
-
-	if (submodule_needs_pushing(me->submodule_path, sha1))
-		string_list_insert(me->needs_pushing, me->submodule_path);
-}
-
 static void free_submodules_sha1s(struct string_list *submodules)
 {
 	int i;
@@ -627,13 +639,6 @@ static void free_submodules_sha1s(struct string_list *submodules)
 	string_list_clear(submodules, 1);
 }
 
-static void append_hash_to_argv(const unsigned char sha1[20],
-		void *data)
-{
-	struct argv_array *argv = (struct argv_array *) data;
-	argv_array_push(argv, sha1_to_hex(sha1));
-}
-
 int find_unpushed_submodules(struct sha1_array *hashes,
 		const char *remotes_name, struct string_list *needs_pushing)
 {
@@ -662,13 +667,11 @@ int find_unpushed_submodules(struct sha1_array *hashes,
 	argv_array_clear(&argv);
 
 	for (i = 0; i < submodules.nr; i++) {
-		struct string_list_item *item = &submodules.items[i];
-		struct collect_submodule_from_sha1s_data data;
-		data.submodule_path = item->string;
-		data.needs_pushing = needs_pushing;
-		sha1_array_for_each_unique((struct sha1_array *) item->util,
-				collect_submodules_from_sha1s,
-				&data);
+		struct string_list_item *submodule = &submodules.items[i];
+		struct sha1_array *hashes = (struct sha1_array *) submodule->util;
+
+		if (submodule_needs_pushing(submodule->string, hashes))
+			string_list_insert(needs_pushing, submodule->string);
 	}
 	free_submodules_sha1s(&submodules);
 
-- 
2.10.0.133.g13017a3


  reply	other threads:[~2016-09-15 12:10 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-24 17:30 [PATCHv2] push: change submodule default to check Stefan Beller
2016-08-24 18:38 ` Junio C Hamano
     [not found] ` <20160824183112.ceekegpzavnbybxp@sigill.intra.peff.net>
2016-08-24 19:37   ` Junio C Hamano
2016-08-24 21:26     ` Junio C Hamano
2016-08-24 22:37     ` Stefan Beller
2016-08-24 23:01       ` Jeff King
2016-09-14 17:31         ` [PATCH 1/2] serialize collection of changed submodules Heiko Voigt
2016-09-14 22:30           ` Junio C Hamano
2016-09-15 12:10             ` Heiko Voigt [this message]
2016-09-15 21:08               ` [PATCH 3/2] batch check whether submodule needs pushing into one call Junio C Hamano
2016-09-16  9:40                 ` Heiko Voigt
2016-09-16 12:31                   ` Heiko Voigt
2016-09-16 18:13                     ` Junio C Hamano
2016-09-19 20:08                       ` Heiko Voigt
2016-09-16 17:59               ` Junio C Hamano
2016-09-19 19:58                 ` Heiko Voigt
2016-09-15 12:18             ` [PATCH 4/2] use actual start hashes for submodule push check instead of local refs Heiko Voigt
2016-09-16 17:27           ` [PATCH 1/2] serialize collection of changed submodules Junio C Hamano
2016-09-19 19:44             ` Heiko Voigt
2016-09-14 17:51         ` [PATCH 2/2] serialize collection of refs that contain submodule changes Heiko Voigt
2016-09-14 19:46           ` Heiko Voigt
2016-09-14 20:04             ` Stefan Beller
2016-09-16 17:47           ` Junio C Hamano
2016-09-19 19:51             ` Heiko Voigt
2016-09-19 20:09               ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160915121044.GA96648@book.hvoigt.net \
    --to=hvoigt@hvoigt.net \
    --cc=Jens.Lehmann@web.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=iveqy@iveqy.com \
    --cc=leandro.lucarella@sociomantic.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).