git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Julian Phillips <julian@quantumfyre.co.uk>
To: git@vger.kernel.org
Subject: [PATCH] refs.c: add a function to sort a ref list, rather then sorting on add
Date: Tue, 17 Apr 2007 02:42:50 +0100	[thread overview]
Message-ID: <20070417014307.12486.28930.julian@quantumfyre.co.uk> (raw)

Rather than sorting the refs list while building it, sort in one go
after it is built using a merge sort.  This has a large performance
boost with large numbers of refs.

Signed-off-by: Julian Phillips <julian@quantumfyre.co.uk>
---

Having got builtin fetch to the point of generating a correct FETCH_HEAD (for a certain path through the code at least), I revisted the speed issue I brought up a while back with the sorting of refs.

Running fetch (builtin version) on a repo with >9000 refs which is up-to-date, using the old sort-on-add I get (best of 5, warm cache):

real    0m4.351s
user    0m4.068s
sys     0m0.219s

With this patch the same fetch gives (worst of 5, warm cache):

real    0m2.196s
user    0m1.870s
sys     0m0.212s

Since this is orthogonal to making fetch a builtin, I don't see that it needs to wait ...

 refs.c |   95 +++++++++++++++++++++++++++++++++++++++++++++++++--------------
 1 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/refs.c b/refs.c
index d2b7b7f..23982fc 100644
--- a/refs.c
+++ b/refs.c
@@ -47,22 +47,7 @@ static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
 				struct ref_list **new_entry)
 {
 	int len;
-	struct ref_list **p = &list, *entry;
-
-	/* Find the place to insert the ref into.. */
-	while ((entry = *p) != NULL) {
-		int cmp = strcmp(entry->name, name);
-		if (cmp > 0)
-			break;
-
-		/* Same as existing entry? */
-		if (!cmp) {
-			if (new_entry)
-				*new_entry = entry;
-			return list;
-		}
-		p = &entry->next;
-	}
+	struct ref_list *entry;
 
 	/* Allocate it and add it in.. */
 	len = strlen(name) + 1;
@@ -71,11 +56,79 @@ static struct ref_list *add_ref(const char *name, const unsigned char *sha1,
 	hashclr(entry->peeled);
 	memcpy(entry->name, name, len);
 	entry->flag = flag;
-	entry->next = *p;
-	*p = entry;
+	entry->next = list;
 	if (new_entry)
 		*new_entry = entry;
-	return list;
+	return entry;
+}
+
+/* merge sort the ref list */
+static struct ref_list *sort_ref_list(struct ref_list *list)
+{
+	int psize, qsize, last_merge_count;
+	struct ref_list *p, *q, *l, *e;
+	struct ref_list *new_list = list;
+	int k = 1;
+	int merge_count = 0;
+
+	if (!list)
+		return list;
+
+	do {
+		last_merge_count = merge_count;
+		merge_count = 0;
+
+		psize = 0;
+
+		p = new_list;
+		q = new_list;
+		new_list = NULL;
+		l = NULL;
+
+		while (p) {
+			merge_count++;
+
+			while (psize < k && q->next) {
+				q = q->next;
+				psize++;
+			}
+			qsize = k;
+
+			while ((psize > 0) || (qsize > 0 && q)) {
+				if (qsize == 0 || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+				} else if (psize == 0) {
+					e = q;
+					q = q->next;
+					qsize--;
+				} else if (strcmp(q->name, p->name) < 0) {
+					e = q;
+					q = q->next;
+					qsize--;
+				} else {
+					e = p;
+					p = p->next;
+					psize--;
+				}
+
+				e->next = NULL;
+
+				if (l)
+					l->next = e;
+				if (!new_list)
+					new_list = e;
+				l = e;
+			}
+
+			p = q;
+		};
+
+		k = k * 2;
+	} while ((last_merge_count != merge_count) || (last_merge_count != 1));
+
+	return new_list;
 }
 
 /*
@@ -142,7 +195,7 @@ static void read_packed_refs(FILE *f, struct cached_refs *cached_refs)
 		    !get_sha1_hex(refline + 1, sha1))
 			hashcpy(last->peeled, sha1);
 	}
-	cached_refs->packed = list;
+	cached_refs->packed = sort_ref_list(list);
 }
 
 static struct ref_list *get_packed_refs(void)
@@ -201,7 +254,7 @@ static struct ref_list *get_ref_dir(const char *base, struct ref_list *list)
 		free(ref);
 		closedir(dir);
 	}
-	return list;
+	return sort_ref_list(list);
 }
 
 static struct ref_list *get_loose_refs(void)
-- 
1.5.1.1

             reply	other threads:[~2007-04-17  1:48 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2007-04-17  1:42 Julian Phillips [this message]
2007-04-17 16:03 ` [PATCH] refs.c: add a function to sort a ref list, rather then sorting on add Linus Torvalds
2007-04-17 20:17   ` Julian Phillips
2007-04-17 20:51     ` Linus Torvalds
2007-04-17 22:03       ` Julian Phillips
2007-04-17 22:00   ` Junio C Hamano
2007-04-17 22:43     ` Julian Phillips
2007-04-18 19:36       ` Junio C Hamano
2007-04-18 21:27         ` [PATCH] refs.c: drop duplicate entries in sort_ref_list Julian Phillips
2007-04-19  8:19           ` Johannes Sixt
2007-04-19  8:49             ` Julian Phillips
2007-04-17 21:01 ` [PATCH] refs.c: add a function to sort a ref list, rather then sorting on add Julian Phillips

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20070417014307.12486.28930.julian@quantumfyre.co.uk \
    --to=julian@quantumfyre.co.uk \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).