git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Julian Phillips <julian@quantumfyre.co.uk>
To: Johan Herland <johan@herland.net>
Cc: git@vger.kernel.org, Junio C Hamano <gitster@pobox.com>
Subject: [RFC/PATCH v3] fetch: Speed up fetch by rewriting find_non_local_tags
Date: Thu, 17 Sep 2009 08:33:19 +0100	[thread overview]
Message-ID: <20090917073320.58452.41718.julian@quantumfyre.co.uk> (raw)
In-Reply-To: <200909170913.03639.johan@herland.net>

When trying to get a list of remote tags to see if we need to fetch
any we were doing a linear search for the matching tag ref for the
tag^{} commit entries.  This proves to be incredibly slow for large
numbers of tags.  Rewrite the function so that we build up a
string_list of refs to fetch and then process that instead.

As an extreme example, for a repository with 50000 tags (and just a
single commit on a single branch), a fetch that does nothing goes from
~1m50s to ~4.1s.

Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
---

Ok, so here it is ...

Sometimes I forget just much we git users value our time and resources.
;)

 builtin-fetch.c |   98 ++++++++++++++++++++++++++++++++++++------------------
 1 files changed, 65 insertions(+), 33 deletions(-)

diff --git a/builtin-fetch.c b/builtin-fetch.c
index cb48c57..acb08e4 100644
--- a/builtin-fetch.c
+++ b/builtin-fetch.c
@@ -504,57 +504,89 @@ static int will_fetch(struct ref **head, const unsigned char *sha1)
 	return 0;
 }
 
+struct tag_data {
+	struct ref **head;
+	struct ref ***tail;
+};
+
+static int add_to_tail(struct string_list_item *item, void *cb_data)
+{
+	struct tag_data *data = (struct tag_data *)cb_data;
+	struct ref *rm = NULL;
+
+	/* We have already decided to ignore this item */
+	if (!item->util)
+		return 0;
+
+	rm = alloc_ref(item->string);
+	rm->peer_ref = alloc_ref(item->string);
+	hashcpy(rm->old_sha1, item->util);
+
+	**data->tail = rm;
+	*data->tail = &rm->next;
+
+	return 0;
+}
+
 static void find_non_local_tags(struct transport *transport,
 			struct ref **head,
 			struct ref ***tail)
 {
 	struct string_list existing_refs = { NULL, 0, 0, 0 };
-	struct string_list new_refs = { NULL, 0, 0, 1 };
-	char *ref_name;
-	int ref_name_len;
-	const unsigned char *ref_sha1;
-	const struct ref *tag_ref;
-	struct ref *rm = NULL;
+	struct string_list remote_refs = { NULL, 0, 0, 0 };
+	struct tag_data data = {head, tail};
 	const struct ref *ref;
+	struct string_list_item *item = NULL;
 
 	for_each_ref(add_existing, &existing_refs);
 	for (ref = transport_get_remote_refs(transport); ref; ref = ref->next) {
 		if (prefixcmp(ref->name, "refs/tags"))
 			continue;
 
-		ref_name = xstrdup(ref->name);
-		ref_name_len = strlen(ref_name);
-		ref_sha1 = ref->old_sha1;
-
-		if (!strcmp(ref_name + ref_name_len - 3, "^{}")) {
-			ref_name[ref_name_len - 3] = 0;
-			tag_ref = transport_get_remote_refs(transport);
-			while (tag_ref) {
-				if (!strcmp(tag_ref->name, ref_name)) {
-					ref_sha1 = tag_ref->old_sha1;
-					break;
-				}
-				tag_ref = tag_ref->next;
-			}
+		/* the peeled ref always follows the matching base ref, so if we
+		 * see a peeled ref that we don't want to fetch then we can mark
+		 * the ref entry in the list as one to ignore by setting util to
+		 * NULL. */
+		if (!strcmp(ref->name + strlen(ref->name) - 3, "^{}")) {
+			if (item && !has_sha1_file(ref->old_sha1) &&
+			    !will_fetch(head, ref->old_sha1) &&
+			    !has_sha1_file(item->util) &&
+			    !will_fetch(head, item->util) )
+				item->util = NULL;
+			item = NULL;
+			continue;
 		}
 
-		if (!string_list_has_string(&existing_refs, ref_name) &&
-		    !string_list_has_string(&new_refs, ref_name) &&
-		    (has_sha1_file(ref->old_sha1) ||
-		     will_fetch(head, ref->old_sha1))) {
-			string_list_insert(ref_name, &new_refs);
+		/* If item is non-NULL here, then we previously saw a ref not
+		 * followed by a peeled reference, so we need to check if it is
+		 * a lightweight tag that we want to fetch */
+		if (item && !has_sha1_file(item->util) &&
+		    !will_fetch(head, item->util) )
+			item->util = NULL;
 
-			rm = alloc_ref(ref_name);
-			rm->peer_ref = alloc_ref(ref_name);
-			hashcpy(rm->old_sha1, ref_sha1);
+		item = NULL;
 
-			**tail = rm;
-			*tail = &rm->next;
-		}
-		free(ref_name);
+		/* skip duplicates and refs that we already have */
+		if (string_list_has_string(&remote_refs, ref->name) ||
+		    string_list_has_string(&existing_refs, ref->name))
+			continue;
+
+		item = string_list_insert(ref->name, &remote_refs);
+		item->util = (void *)ref->old_sha1;
 	}
 	string_list_clear(&existing_refs, 0);
-	string_list_clear(&new_refs, 0);
+
+	/* We may have a final lightweight tag that needs to be checked to see
+	 * if it needs fetching. */
+	if (item && !has_sha1_file(item->util) &&
+	    !will_fetch(head, item->util) )
+		item->util = NULL;
+
+	/* For all the tags in the remote_refs string list, call add_to_tail to
+	 * add them to the list of refs to be fetched */
+	for_each_string_list(add_to_tail, &remote_refs, &data);
+
+	string_list_clear(&remote_refs, 0);
 }
 
 static void check_not_current_branch(struct ref *ref_map)
-- 
1.6.4.2

  reply	other threads:[~2009-09-17  7:40 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-16  7:53 [RFC/PATCH 0/2] Speed up fetch with large number of tags Julian Phillips
2009-09-16  7:53 ` [RFC/PATCH 1/2] ref-dict: Add a set of functions for working with a ref dictionary Julian Phillips
2009-09-16  7:53 ` [RFC/PATCH 2/2] fetch: Speed up fetch by using " Julian Phillips
2009-09-16  9:44 ` [RFC/PATCH 0/2] Speed up fetch with large number of tags Junio C Hamano
2009-09-16 22:32   ` Julian Phillips
2009-09-16 22:42     ` Shawn O. Pearce
2009-09-16 22:52       ` Junio C Hamano
2009-09-16 23:03         ` Shawn O. Pearce
2009-09-16 23:19           ` Junio C Hamano
2009-09-16 22:53     ` [RFC/PATCH v2] fetch: Speed up fetch by rewriting find_non_local_tags Julian Phillips
2009-09-16 23:15       ` Junio C Hamano
2009-09-16 23:46         ` Julian Phillips
2009-09-17  1:30           ` Julian Phillips
2009-09-17  7:13             ` Johan Herland
2009-09-17  7:33               ` Julian Phillips [this message]
2009-09-16 22:46   ` [RFC/PATCH 0/2] Speed up fetch with large number of tags Shawn O. Pearce
2009-09-22 20:36     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090917073320.58452.41718.julian@quantumfyre.co.uk \
    --to=julian@quantumfyre.co.uk \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=johan@herland.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).