git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Cc: "Jeff King" <peff@peff.net>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Junio C Hamano" <gitster@pobox.com>,
	"Derrick Stolee" <stolee@gmail.com>,
	"René Scharfe" <l.s.r@web.de>
Subject: [PATCH v3 3/7] connected: refactor iterator to return next object ID directly
Date: Wed, 1 Sep 2021 15:09:50 +0200	[thread overview]
Message-ID: <ba834803ab30e3cc0be9ed71da54a6f2da2e6949.1630501732.git.ps@pks.im> (raw)
In-Reply-To: <cover.1630501732.git.ps@pks.im>

[-- Attachment #1: Type: text/plain, Size: 8508 bytes --]

The object ID iterator used by the connectivity checks returns the next
object ID via an out-parameter and then uses a return code to indicate
whether an item was found. This is a bit roundabout: instead of a
separate error code, we can just return the next object ID directly and
use `NULL` pointers as indicator that the iterator got no items left.
Furthermore, this avoids a copy of the object ID.

Refactor the iterator and all its implementations to return object IDs
directly. This brings a tiny performance improvement when doing a mirror-fetch of a repository with about 2.3M refs:

    Benchmark #1: 328dc58b49919c43897240f2eabfa30be2ce32a4~: git-fetch
      Time (mean ± σ):     30.110 s ±  0.148 s    [User: 27.161 s, System: 5.075 s]
      Range (min … max):   29.934 s … 30.406 s    10 runs

    Benchmark #2: 328dc58b49919c43897240f2eabfa30be2ce32a4: git-fetch
      Time (mean ± σ):     29.899 s ±  0.109 s    [User: 26.916 s, System: 5.104 s]
      Range (min … max):   29.696 s … 29.996 s    10 runs

    Summary
      '328dc58b49919c43897240f2eabfa30be2ce32a4: git-fetch' ran
        1.01 ± 0.01 times faster than '328dc58b49919c43897240f2eabfa30be2ce32a4~: git-fetch'

While this 1% speedup could be labelled as statistically insignificant,
the speedup is consistent on my machine. Furthermore, this is an end to
end test, so it is expected that the improvement in the connectivity
check itself is more significant.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
---
 builtin/clone.c        |  8 +++-----
 builtin/fetch.c        |  7 +++----
 builtin/receive-pack.c | 17 +++++++----------
 connected.c            | 15 ++++++++-------
 connected.h            |  2 +-
 fetch-pack.c           |  7 +++----
 6 files changed, 25 insertions(+), 31 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 66fe66679c..4a1056fcc2 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -657,7 +657,7 @@ static void write_followtags(const struct ref *refs, const char *msg)
 	}
 }
 
-static int iterate_ref_map(void *cb_data, struct object_id *oid)
+static const struct object_id *iterate_ref_map(void *cb_data)
 {
 	struct ref **rm = cb_data;
 	struct ref *ref = *rm;
@@ -668,13 +668,11 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid)
 	 */
 	while (ref && !ref->peer_ref)
 		ref = ref->next;
-	/* Returning -1 notes "end of list" to the caller. */
 	if (!ref)
-		return -1;
+		return NULL;
 
-	oidcpy(oid, &ref->old_oid);
 	*rm = ref->next;
-	return 0;
+	return &ref->old_oid;
 }
 
 static void update_remote_refs(const struct ref *refs,
diff --git a/builtin/fetch.c b/builtin/fetch.c
index 0b18c47732..f67fe543ad 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -962,7 +962,7 @@ static int update_local_ref(struct ref *ref,
 	}
 }
 
-static int iterate_ref_map(void *cb_data, struct object_id *oid)
+static const struct object_id *iterate_ref_map(void *cb_data)
 {
 	struct ref **rm = cb_data;
 	struct ref *ref = *rm;
@@ -970,10 +970,9 @@ static int iterate_ref_map(void *cb_data, struct object_id *oid)
 	while (ref && ref->status == REF_STATUS_REJECT_SHALLOW)
 		ref = ref->next;
 	if (!ref)
-		return -1; /* end of the list */
+		return NULL;
 	*rm = ref->next;
-	oidcpy(oid, &ref->old_oid);
-	return 0;
+	return &ref->old_oid;
 }
 
 struct fetch_head {
diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 2d1f97e1ca..041e915454 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1306,7 +1306,7 @@ static void refuse_unconfigured_deny_delete_current(void)
 	rp_error("%s", _(refuse_unconfigured_deny_delete_current_msg));
 }
 
-static int command_singleton_iterator(void *cb_data, struct object_id *oid);
+static const struct object_id *command_singleton_iterator(void *cb_data);
 static int update_shallow_ref(struct command *cmd, struct shallow_info *si)
 {
 	struct shallow_lock shallow_lock = SHALLOW_LOCK_INIT;
@@ -1731,16 +1731,15 @@ static void check_aliased_updates(struct command *commands)
 	string_list_clear(&ref_list, 0);
 }
 
-static int command_singleton_iterator(void *cb_data, struct object_id *oid)
+static const struct object_id *command_singleton_iterator(void *cb_data)
 {
 	struct command **cmd_list = cb_data;
 	struct command *cmd = *cmd_list;
 
 	if (!cmd || is_null_oid(&cmd->new_oid))
-		return -1; /* end of list */
+		return NULL;
 	*cmd_list = NULL; /* this returns only one */
-	oidcpy(oid, &cmd->new_oid);
-	return 0;
+	return &cmd->new_oid;
 }
 
 static void set_connectivity_errors(struct command *commands,
@@ -1770,7 +1769,7 @@ struct iterate_data {
 	struct shallow_info *si;
 };
 
-static int iterate_receive_command_list(void *cb_data, struct object_id *oid)
+static const struct object_id *iterate_receive_command_list(void *cb_data)
 {
 	struct iterate_data *data = cb_data;
 	struct command **cmd_list = &data->cmds;
@@ -1781,13 +1780,11 @@ static int iterate_receive_command_list(void *cb_data, struct object_id *oid)
 			/* to be checked in update_shallow_ref() */
 			continue;
 		if (!is_null_oid(&cmd->new_oid) && !cmd->skip_update) {
-			oidcpy(oid, &cmd->new_oid);
 			*cmd_list = cmd->next;
-			return 0;
+			return &cmd->new_oid;
 		}
 	}
-	*cmd_list = NULL;
-	return -1; /* end of list */
+	return NULL;
 }
 
 static void reject_updates_to_hidden(struct command *commands)
diff --git a/connected.c b/connected.c
index b5f9523a5f..cf68e37a97 100644
--- a/connected.c
+++ b/connected.c
@@ -24,7 +24,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 	struct child_process rev_list = CHILD_PROCESS_INIT;
 	FILE *rev_list_in;
 	struct check_connected_options defaults = CHECK_CONNECTED_INIT;
-	struct object_id oid;
+	const struct object_id *oid;
 	int err = 0;
 	struct packed_git *new_pack = NULL;
 	struct transport *transport;
@@ -34,7 +34,8 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 		opt = &defaults;
 	transport = opt->transport;
 
-	if (fn(cb_data, &oid)) {
+	oid = fn(cb_data);
+	if (!oid) {
 		if (opt->err_fd)
 			close(opt->err_fd);
 		return err;
@@ -73,7 +74,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 			for (p = get_all_packs(the_repository); p; p = p->next) {
 				if (!p->pack_promisor)
 					continue;
-				if (find_pack_entry_one(oid.hash, p))
+				if (find_pack_entry_one(oid->hash, p))
 					goto promisor_pack_found;
 			}
 			/*
@@ -83,7 +84,7 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 			goto no_promisor_pack_found;
 promisor_pack_found:
 			;
-		} while (!fn(cb_data, &oid));
+		} while ((oid = fn(cb_data)) != NULL);
 		return 0;
 	}
 
@@ -133,12 +134,12 @@ int check_connected(oid_iterate_fn fn, void *cb_data,
 		 * are sure the ref is good and not sending it to
 		 * rev-list for verification.
 		 */
-		if (new_pack && find_pack_entry_one(oid.hash, new_pack))
+		if (new_pack && find_pack_entry_one(oid->hash, new_pack))
 			continue;
 
-		if (fprintf(rev_list_in, "%s\n", oid_to_hex(&oid)) < 0)
+		if (fprintf(rev_list_in, "%s\n", oid_to_hex(oid)) < 0)
 			break;
-	} while (!fn(cb_data, &oid));
+	} while ((oid = fn(cb_data)) != NULL);
 
 	if (ferror(rev_list_in) || fflush(rev_list_in)) {
 		if (errno != EPIPE && errno != EINVAL)
diff --git a/connected.h b/connected.h
index 8d5a6b3ad6..6e59c92aa3 100644
--- a/connected.h
+++ b/connected.h
@@ -9,7 +9,7 @@ struct transport;
  * When called after returning the name for the last object, return -1
  * to signal EOF, otherwise return 0.
  */
-typedef int (*oid_iterate_fn)(void *, struct object_id *oid);
+typedef const struct object_id *(*oid_iterate_fn)(void *);
 
 /*
  * Named-arguments struct for check_connected. All arguments are
diff --git a/fetch-pack.c b/fetch-pack.c
index b0c7be717c..7b0e69884d 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -1906,16 +1906,15 @@ static void update_shallow(struct fetch_pack_args *args,
 	oid_array_clear(&ref);
 }
 
-static int iterate_ref_map(void *cb_data, struct object_id *oid)
+static const struct object_id *iterate_ref_map(void *cb_data)
 {
 	struct ref **rm = cb_data;
 	struct ref *ref = *rm;
 
 	if (!ref)
-		return -1; /* end of the list */
+		return NULL;
 	*rm = ref->next;
-	oidcpy(oid, &ref->old_oid);
-	return 0;
+	return &ref->old_oid;
 }
 
 struct ref *fetch_pack(struct fetch_pack_args *args,
-- 
2.33.0


[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply	other threads:[~2021-09-01 13:19 UTC|newest]

Thread overview: 48+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-08-20 10:08 [PATCH 0/6] Speed up mirror-fetches with many refs Patrick Steinhardt
2021-08-20 10:08 ` [PATCH 1/6] fetch: speed up lookup of want refs via commit-graph Patrick Steinhardt
2021-08-20 14:27   ` Derrick Stolee
2021-08-20 17:18     ` Junio C Hamano
2021-08-23  6:46       ` Patrick Steinhardt
2021-08-25 14:12         ` Derrick Stolee
2021-08-20 10:08 ` [PATCH 2/6] fetch: avoid unpacking headers in object existence check Patrick Steinhardt
2021-08-25 23:44   ` Ævar Arnfjörð Bjarmason
2021-08-20 10:08 ` [PATCH 3/6] connected: refactor iterator to return next object ID directly Patrick Steinhardt
2021-08-20 14:32   ` Derrick Stolee
2021-08-20 17:43     ` Junio C Hamano
2021-08-20 17:43   ` René Scharfe
2021-08-23  6:47     ` Patrick Steinhardt
2021-08-20 10:08 ` [PATCH 4/6] fetch-pack: optimize loading of refs via commit graph Patrick Steinhardt
2021-08-20 14:37   ` Derrick Stolee
2021-08-20 10:08 ` [PATCH 5/6] fetch: refactor fetch refs to be more extendable Patrick Steinhardt
2021-08-20 14:41   ` Derrick Stolee
2021-08-20 10:08 ` [PATCH 6/6] fetch: avoid second connectivity check if we already have all objects Patrick Steinhardt
2021-08-20 14:47   ` Derrick Stolee
2021-08-23  6:52     ` Patrick Steinhardt
2021-08-20 14:50 ` [PATCH 0/6] Speed up mirror-fetches with many refs Derrick Stolee
2021-08-21  0:09 ` Junio C Hamano
2021-08-24 10:36 ` [PATCH v2 0/7] " Patrick Steinhardt
2021-08-24 10:36   ` [PATCH v2 1/7] fetch: speed up lookup of want refs via commit-graph Patrick Steinhardt
2021-08-25 14:16     ` Derrick Stolee
2021-08-24 10:37   ` [PATCH v2 2/7] fetch: avoid unpacking headers in object existence check Patrick Steinhardt
2021-08-24 10:37   ` [PATCH v2 3/7] connected: refactor iterator to return next object ID directly Patrick Steinhardt
2021-08-24 10:37   ` [PATCH v2 4/7] fetch-pack: optimize loading of refs via commit graph Patrick Steinhardt
2021-08-24 10:37   ` [PATCH v2 5/7] fetch: refactor fetch refs to be more extendable Patrick Steinhardt
2021-08-25 14:19     ` Derrick Stolee
2021-09-01 12:48       ` Patrick Steinhardt
2021-08-24 10:37   ` [PATCH v2 6/7] fetch: merge fetching and consuming refs Patrick Steinhardt
2021-08-25 14:26     ` Derrick Stolee
2021-09-01 12:49       ` Patrick Steinhardt
2021-08-24 10:37   ` [PATCH v2 7/7] fetch: avoid second connectivity check if we already have all objects Patrick Steinhardt
2021-08-24 22:48   ` [PATCH v2 0/7] Speed up mirror-fetches with many refs Junio C Hamano
2021-08-25  6:04     ` Patrick Steinhardt
2021-08-25 14:27   ` Derrick Stolee
2021-09-01 13:09 ` [PATCH v3 " Patrick Steinhardt
2021-09-01 13:09   ` [PATCH v3 1/7] fetch: speed up lookup of want refs via commit-graph Patrick Steinhardt
2021-09-01 13:09   ` [PATCH v3 2/7] fetch: avoid unpacking headers in object existence check Patrick Steinhardt
2021-09-01 13:09   ` Patrick Steinhardt [this message]
2021-09-01 13:09   ` [PATCH v3 4/7] fetch-pack: optimize loading of refs via commit graph Patrick Steinhardt
2021-09-01 13:09   ` [PATCH v3 5/7] fetch: refactor fetch refs to be more extendable Patrick Steinhardt
2021-09-01 13:10   ` [PATCH v3 6/7] fetch: merge fetching and consuming refs Patrick Steinhardt
2021-09-01 13:10   ` [PATCH v3 7/7] fetch: avoid second connectivity check if we already have all objects Patrick Steinhardt
2021-09-01 19:58   ` [PATCH v3 0/7] Speed up mirror-fetches with many refs Junio C Hamano
2021-09-08  0:08     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ba834803ab30e3cc0be9ed71da54a6f2da2e6949.1630501732.git.ps@pks.im \
    --to=ps@pks.im \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=l.s.r@web.de \
    --cc=peff@peff.net \
    --cc=stolee@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).