[PATCH] Optimization for git-rev-list --objects

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [PATCH] Optimization for git-rev-list --objects
@ 2006-02-11 19:14 Alexandre Julliard
  2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
  2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
  0 siblings, 2 replies; 21+ messages in thread
From: Alexandre Julliard @ 2006-02-11 19:14 UTC (permalink / raw
  To: git


When building a large list of objects, most of the time is spent in
created_object() inserting the objects in the sorted list. This patch
splits the list in 256 sublists to reduce the impact of the O(n^2)
list insertion.

On the WineHQ repository (220,000 objects), the patch reduces the time
needed for a 'git-rev-list --objects HEAD' from 2 minutes to 20 seconds.

Signed-off-by: Alexandre Julliard <julliard@winehq.org>

---

 fsck-objects.c |   66 +++++++++++++++++++++++++++++---------------------------
 name-rev.c     |    9 ++++----
 object.c       |   24 ++++++++++----------
 object.h       |    4 ++-
 4 files changed, 53 insertions(+), 50 deletions(-)

68f8fcb7f5883ecc44a80e822e0b78ee8efd9eb9
diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..5bdb21e 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -58,45 +58,47 @@ static int objwarning(struct object *obj
 
 static void check_connectivity(void)
 {
-	int i;
+	int i, j;
 
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < nr_objs; i++) {
-		struct object *obj = objs[i];
+	for (i = 0; i < 256; i++) {
+		for (j = 0; j < nr_objs[i]; j++) {
+			struct object *obj = objs[i][j];
 
-		if (!obj->parsed) {
-			if (!standalone && has_sha1_file(obj->sha1))
-				; /* it is in pack */
-			else
-				printf("missing %s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+			if (!obj->parsed) {
+				if (!standalone && has_sha1_file(obj->sha1))
+					; /* it is in pack */
+				else
+					printf("missing %s %s\n",
+					       obj->type, sha1_to_hex(obj->sha1));
+				continue;
+			}
 
-		if (obj->refs) {
-			const struct object_refs *refs = obj->refs;
-			unsigned j;
-			for (j = 0; j < refs->count; j++) {
-				struct object *ref = refs->ref[j];
-				if (ref->parsed ||
-				    (!standalone && has_sha1_file(ref->sha1)))
-					continue;
-				printf("broken link from %7s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-				printf("              to %7s %s\n",
-				       ref->type, sha1_to_hex(ref->sha1));
+			if (obj->refs) {
+				const struct object_refs *refs = obj->refs;
+				unsigned k;
+				for (k = 0; k < refs->count; k++) {
+					struct object *ref = refs->ref[k];
+					if (ref->parsed ||
+					    (!standalone && has_sha1_file(ref->sha1)))
+						continue;
+					printf("broken link from %7s %s\n",
+					       obj->type, sha1_to_hex(obj->sha1));
+					printf("              to %7s %s\n",
+					       ref->type, sha1_to_hex(ref->sha1));
+				}
 			}
-		}
 
-		if (show_unreachable && !(obj->flags & REACHABLE)) {
-			printf("unreachable %s %s\n",
-			       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+			if (show_unreachable && !(obj->flags & REACHABLE)) {
+				printf("unreachable %s %s\n",
+				       obj->type, sha1_to_hex(obj->sha1));
+				continue;
+			}
 
-		if (!obj->used) {
-			printf("dangling %s %s\n", obj->type, 
-			       sha1_to_hex(obj->sha1));
+			if (!obj->used) {
+				printf("dangling %s %s\n", obj->type, 
+				       sha1_to_hex(obj->sha1));
+			}
 		}
 	}
 }
diff --git a/name-rev.c b/name-rev.c
index bbadb91..9a8d086 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -230,11 +230,12 @@ int main(int argc, char **argv)
 				fwrite(p_start, p - p_start, 1, stdout);
 		}
 	} else if (all) {
-		int i;
+		int i, j;
 
-		for (i = 0; i < nr_objs; i++)
-			printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-					get_rev_name(objs[i]));
+		for (i = 0; i < 256; i++)
+			for (j = 0; j < nr_objs[i]; j++)
+				printf("%s %s\n", sha1_to_hex(objs[i][j]->sha1),
+						get_rev_name(objs[i][j]));
 	} else
 		for ( ; revs; revs = revs->next)
 			printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..fcd4d0c 100644
--- a/object.c
+++ b/object.c
@@ -5,19 +5,19 @@
 #include "commit.h"
 #include "tag.h"
 
-struct object **objs;
-int nr_objs;
-static int obj_allocs;
+struct object **objs[256];
+int nr_objs[256];
+static int obj_allocs[256];
 
 int track_object_refs = 1;
 
 static int find_object(const unsigned char *sha1)
 {
-	int first = 0, last = nr_objs;
+	int first = 0, last = nr_objs[*sha1];
 
         while (first < last) {
                 int next = (first + last) / 2;
-                struct object *obj = objs[next];
+                struct object *obj = objs[*sha1][next];
                 int cmp;
 
                 cmp = memcmp(sha1, obj->sha1, 20);
@@ -36,7 +36,7 @@ struct object *lookup_object(const unsig
 {
 	int pos = find_object(sha1);
 	if (pos >= 0)
-		return objs[pos];
+		return objs[*sha1][pos];
 	return NULL;
 }
 
@@ -54,17 +54,17 @@ void created_object(const unsigned char 
 		die("Inserting %s twice\n", sha1_to_hex(sha1));
 	pos = -pos-1;
 
-	if (obj_allocs == nr_objs) {
-		obj_allocs = alloc_nr(obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
+	if (obj_allocs[*sha1] == nr_objs[*sha1]) {
+		obj_allocs[*sha1] = alloc_nr(obj_allocs[*sha1]);
+		objs[*sha1] = xrealloc(objs[*sha1], obj_allocs[*sha1] * sizeof(struct object *));
 	}
 
 	/* Insert it into the right place */
-	memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * 
+	memmove(objs[*sha1] + pos + 1, objs[*sha1] + pos, (nr_objs[*sha1] - pos) * 
 		sizeof(struct object *));
 
-	objs[pos] = obj;
-	nr_objs++;
+	objs[*sha1][pos] = obj;
+	nr_objs[*sha1]++;
 }
 
 struct object_refs *alloc_object_refs(unsigned count)
diff --git a/object.h b/object.h
index 0e76182..dcd6ac4 100644
--- a/object.h
+++ b/object.h
@@ -23,8 +23,8 @@ struct object {
 };
 
 extern int track_object_refs;
-extern int nr_objs;
-extern struct object **objs;
+extern int nr_objs[256];
+extern struct object **objs[256];
 
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
-- 
1.1.6.g68f8


-- 
Alexandre Julliard
julliard@winehq.org

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-11 19:14 [PATCH] Optimization for git-rev-list --objects Alexandre Julliard
@ 2006-02-12  1:57 ` Johannes Schindelin
  2006-02-12  2:46   ` Junio C Hamano
  2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
  1 sibling, 1 reply; 21+ messages in thread
From: Johannes Schindelin @ 2006-02-12  1:57 UTC (permalink / raw
  To: Alexandre Julliard; +Cc: git


In a simple test, this brings down the CPU time from 47 sec to 22 sec.

Signed-off-by: Johannes Schindelin <Johannes.Schindelin@gmx.de>

---

	On Sat, 11 Feb 2006, Alexandre Julliard wrote:

	> When building a large list of objects, most of the time is spent in
	> created_object() inserting the objects in the sorted list. This patch
	> splits the list in 256 sublists to reduce the impact of the O(n^2)
	> list insertion.

	Your patch brought down the CPU time to 27 sec in my test.

 fsck-objects.c |    5 +++-
 name-rev.c     |    7 +++---
 object.c       |   67 +++++++++++++++++++++++++++++++++-----------------------
 object.h       |    2 +-
 4 files changed, 48 insertions(+), 33 deletions(-)

diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..6439d55 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -61,9 +61,12 @@ static void check_connectivity(void)
 	int i;
 
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < nr_objs; i++) {
+	for (i = 0; i < obj_allocs; i++) {
 		struct object *obj = objs[i];
 
+		if (!obj)
+			continue;
+
 		if (!obj->parsed) {
 			if (!standalone && has_sha1_file(obj->sha1))
 				; /* it is in pack */
diff --git a/name-rev.c b/name-rev.c
index bdaa59b..08faa54 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -303,9 +303,10 @@ int main(int argc, char **argv)
 	} else if (all) {
 		int i;
 
-		for (i = 0; i < nr_objs; i++)
-			printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-					get_rev_name(objs[i]));
+		for (i = 0; i < obj_allocs; i++)
+			if (objs[i])
+				printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
+						get_rev_name(objs[i]));
 	} else
 		for ( ; revs; revs = revs->next)
 			printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..3259862 100644
--- a/object.c
+++ b/object.c
@@ -6,30 +6,32 @@
 #include "tag.h"
 
 struct object **objs;
-int nr_objs;
-static int obj_allocs;
+static int nr_objs;
+int obj_allocs;
 
 int track_object_refs = 1;
 
+static int hashtable_index(const unsigned char *sha1)
+{
+	unsigned int i = *(unsigned int *)sha1;
+	return (int)(i % obj_allocs);
+}
+
 static int find_object(const unsigned char *sha1)
 {
-	int first = 0, last = nr_objs;
+	int i = hashtable_index(sha1);
+
+	if (!objs)
+		return -1;
 
-        while (first < last) {
-                int next = (first + last) / 2;
-                struct object *obj = objs[next];
-                int cmp;
-
-                cmp = memcmp(sha1, obj->sha1, 20);
-                if (!cmp)
-                        return next;
-                if (cmp < 0) {
-                        last = next;
-                        continue;
-                }
-                first = next+1;
-        }
-        return -first-1;
+	while (objs[i]) {
+		if (memcmp(sha1, objs[i]->sha1, 20) == 0)
+			return i;
+		i++;
+		if (i == obj_allocs)
+			i = 0;
+	}
+	return -1 - i;
 }
 
 struct object *lookup_object(const unsigned char *sha1)
@@ -42,7 +44,7 @@ struct object *lookup_object(const unsig
 
 void created_object(const unsigned char *sha1, struct object *obj)
 {
-	int pos = find_object(sha1);
+	int pos;
 
 	obj->parsed = 0;
 	memcpy(obj->sha1, sha1, 20);
@@ -50,18 +52,27 @@ void created_object(const unsigned char 
 	obj->refs = NULL;
 	obj->used = 0;
 
-	if (pos >= 0)
-		die("Inserting %s twice\n", sha1_to_hex(sha1));
-	pos = -pos-1;
-
-	if (obj_allocs == nr_objs) {
-		obj_allocs = alloc_nr(obj_allocs);
+	if (obj_allocs - 1 <= nr_objs * 2) {
+		int i, count = obj_allocs;
+		obj_allocs = (obj_allocs < 32 ? 32 : 2 * obj_allocs);
 		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
+		memset(objs + count, 0, (obj_allocs - count)
+				* sizeof(struct object *));
+		for (i = 0; i < count; i++)
+			if (objs[i]) {
+				int j = find_object(objs[i]->sha1);
+				if (j != i) {
+					j = -1 - j;
+					objs[j] = objs[i];
+					objs[i] = NULL;
+				}
+			}
 	}
 
-	/* Insert it into the right place */
-	memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * 
-		sizeof(struct object *));
+	pos = find_object(sha1);
+	if (pos >= 0)
+		die("Inserting %s twice\n", sha1_to_hex(sha1));
+	pos = -pos-1;
 
 	objs[pos] = obj;
 	nr_objs++;
diff --git a/object.h b/object.h
index 0e76182..e08afbd 100644
--- a/object.h
+++ b/object.h
@@ -23,7 +23,7 @@ struct object {
 };
 
 extern int track_object_refs;
-extern int nr_objs;
+extern int obj_allocs;
 extern struct object **objs;
 
 /** Internal only **/

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] Optimization for git-rev-list --objects
  2006-02-11 19:14 [PATCH] Optimization for git-rev-list --objects Alexandre Julliard
  2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
@ 2006-02-12  2:19 ` Linus Torvalds
  2006-02-12  2:39   ` Junio C Hamano
  1 sibling, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  2:19 UTC (permalink / raw
  To: Alexandre Julliard; +Cc: git



On Sat, 11 Feb 2006, Alexandre Julliard wrote:
> 
> When building a large list of objects, most of the time is spent in
> created_object() inserting the objects in the sorted list. This patch
> splits the list in 256 sublists to reduce the impact of the O(n^2)
> list insertion.
> 
> On the WineHQ repository (220,000 objects), the patch reduces the time
> needed for a 'git-rev-list --objects HEAD' from 2 minutes to 20 seconds.

Goodie. However, I wonder if we care about the sorting at _all_.

The sorting really only helps "find_object()", and the thing is, we could 
certainly do that other ways too. Keeping a sorted list is kind of 
senseless, when the data is so amenable to be kept in a tree.

We could add left/right pointers to each object, and just keep them in a 
tree. We probably don't even need any fancy balancing, since hashing means 
that we could just keep the first object we ever allocate as the root, and 
things should normally be pretty damn balanced.

Then we could do:

	static struct object *root = NULL;

	static struct object **lookup_object_position(unsigned char *sha1)
	{
		struct object **p = &root;

		for (;;) {
			struct object *object = *p;
			int sign;

			if (!object)
				break;
			sign = memcmp(sha1, object->sha1, 20)
			if (!sign)
				break;
			p = &object->left;
			if (sign < 0)
				continue;
			p = &object->right;
		}
		return p;
	}

and now we _trivially_ have:

	struct object *lookup_object(unsigned char *sha1)
	{
		return *lookup_object_position(unsigned char *sha1);
	}

	void created_object(struct object **pos, unsigned char *sha1, struct object *n)
	{
		memcpy(n->sha1, sha1, 20);
		*pos = n;
	}

and the "lookup_or_create()" functions would become

	struct tag *lookup_tag(unsigned char *sha1)
	{
		struct object **p = lookup_object_position(sha1);
		struct object *object;

		object = *p;
		if (!object) {
			struct tag *ret = xmalloc(sizeof(struct tag));
			memset(ret, 0, sizeof(struct tag));
			created_object(p, sha1, &ret->object);
			ret->object.type = tag_type;
			return ret;
		}
		if (!object->type)
			obj->type = tag_type;

		if (object->type != tag_type) {
			error("Object %s is a %s, not a tag",
				sha1_to_hex(sha1), obj->type);
			return NULL;
		}
		return (struct tag *)obj;
	}

etc etc.

Hmm? That would be a lot faster than what we have now, I suspect, and none 
of the ops are expensive.

Anybody want to try this approach?

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Optimization for git-rev-list --objects
  2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
@ 2006-02-12  2:39   ` Junio C Hamano
  2006-02-12  4:11     ` [PATCH] binary-tree-based objects Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12  2:39 UTC (permalink / raw
  To: Linus Torvalds; +Cc: git

Linus Torvalds <torvalds@osdl.org> writes:

> Hmm? That would be a lot faster than what we have now, I suspect, and none 
> of the ops are expensive.
>
> Anybody want to try this approach?

I am game.  I was looking at Johannes' patch that uses growing
circular hash and having the same thought.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
@ 2006-02-12  2:46   ` Junio C Hamano
  2006-02-12  8:52     ` Alexandre Julliard
  2006-02-12 11:19     ` Florian Weimer
  0 siblings, 2 replies; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12  2:46 UTC (permalink / raw
  To: Johannes Schindelin, Alexandre Julliard; +Cc: git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> In a simple test, this brings down the CPU time from 47 sec to 22 sec.

I was planning to take Alexandre's patch, but the approach your
patch takes feels more correct -- it scales with the number of
objects you need to handle, instead of having fixed 256
hashbuckets.

BTW, your version dumped core in hashtable_index immediately
after I started "git-rev-list --objects HEAD".  How did you get
_any_ CPU time?

I am not sure expecting that object name pointers are always
(unsigned int *) aligned as your patch does is OK.  We may want
to have something like the attached patch on top of yours.

I am also interested to find out how much the rehashing you do
when you update obj_allocs to a larger value is costing.

Alexandle, if you have a chance, could you try Johannes' patch
on your workload to see if it works OK for you?

-- >8 --
[PATCH] do not assume object name pointers are uint aligned.

Also fix an obvious bug that caused it dump core at my first
attempt.  There might be others but I did not actively look for
them.

Signed-off-by: Junio C Hamano <junkio@cox.net>
---
diff --git a/object.c b/object.c
index 3259862..59e5e36 100644
--- a/object.c
+++ b/object.c
@@ -13,17 +13,24 @@ int track_object_refs = 1;

 static int hashtable_index(const unsigned char *sha1)
 {
-	unsigned int i = *(unsigned int *)sha1;
-	return (int)(i % obj_allocs);
+	int cnt;
+	unsigned int ix = *sha1++;
+
+	for (cnt = 1; cnt < sizeof(unsigned int); cnt++) {
+		ix <<= 8;
+		ix |= *sha1++;
+	}
+	return (int)(ix % obj_allocs);
 }

 static int find_object(const unsigned char *sha1)
 {
-	int i = hashtable_index(sha1);
+	int i;

 	if (!objs)
 		return -1;

+	i = hashtable_index(sha1);
 	while (objs[i]) {
 		if (memcmp(sha1, objs[i]->sha1, 20) == 0)
 			return i;

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH] binary-tree-based objects.
  2006-02-12  2:39   ` Junio C Hamano
@ 2006-02-12  4:11     ` Junio C Hamano
  2006-02-12  5:06       ` Linus Torvalds
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12  4:11 UTC (permalink / raw
  To: Linus Torvalds; +Cc: git

This implements Linus' idea to keep objects in a binary tree,
instead of using the linear array as we currently do.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 * I haven't benched this seriously yet.  One datapoint:

	time git-rev-list --objects v2.6.15..linus | wc -l

   are 53sec vs 22sec improvement with the same output.

 fsck-objects.c |   36 +++++++++++++++++----------------
 name-rev.c     |   17 +++++++++++-----
 object.c       |   61 +++++++++++++++++++++-----------------------------------
 object.h       |    3 ++-
 4 files changed, 55 insertions(+), 62 deletions(-)

3c160f4d94cf16db5dc9c603e98ebacbe9ac4ca7
diff --git a/fsck-objects.c b/fsck-objects.c
index 9950be2..28a7c1b 100644
--- a/fsck-objects.c
+++ b/fsck-objects.c
@@ -56,23 +56,21 @@ static int objwarning(struct object *obj
 }
 
 
-static void check_connectivity(void)
+static void check_connectivity(struct object *obj)
 {
-	int i;
-
 	/* Look up all the requirements, warn about missing objects.. */
-	for (i = 0; i < nr_objs; i++) {
-		struct object *obj = objs[i];
-
-		if (!obj->parsed) {
-			if (!standalone && has_sha1_file(obj->sha1))
-				; /* it is in pack */
-			else
-				printf("missing %s %s\n",
-				       obj->type, sha1_to_hex(obj->sha1));
-			continue;
-		}
+ again:
+	if (!obj)
+		return;
 
+	if (!obj->parsed) {
+		if (!standalone && has_sha1_file(obj->sha1))
+			; /* it is in pack */
+		else
+			printf("missing %s %s\n",
+			       obj->type, sha1_to_hex(obj->sha1));
+	}
+	else {
 		if (obj->refs) {
 			const struct object_refs *refs = obj->refs;
 			unsigned j;
@@ -91,14 +89,16 @@ static void check_connectivity(void)
 		if (show_unreachable && !(obj->flags & REACHABLE)) {
 			printf("unreachable %s %s\n",
 			       obj->type, sha1_to_hex(obj->sha1));
-			continue;
 		}
-
-		if (!obj->used) {
+		else if (!obj->used) {
 			printf("dangling %s %s\n", obj->type, 
 			       sha1_to_hex(obj->sha1));
 		}
 	}
+	if (obj->left && obj->right)
+		check_connectivity(obj->left);
+	obj = obj->right ? obj->right : obj->left;
+	goto again;
 }
 
 /*
@@ -556,6 +556,6 @@ int main(int argc, char **argv)
 		}
 	}
 
-	check_connectivity();
+	check_connectivity(objs_root);
 	return 0;
 }
diff --git a/name-rev.c b/name-rev.c
index bbadb91..a4fecfb 100644
--- a/name-rev.c
+++ b/name-rev.c
@@ -120,6 +120,17 @@ static const char* get_rev_name(struct o
 	return buffer;
 }
 	
+void show_all_names(struct object *obj)
+{
+	while (obj) {
+		printf("%s %s\n", sha1_to_hex(obj->sha1), get_rev_name(obj));
+		if (obj->left && obj->right)
+			show_all_names(obj->left);
+		obj = obj->right ? obj->right : obj->left;
+	}
+}
+
+
 int main(int argc, char **argv)
 {
 	struct object_list *revs = NULL;
@@ -230,11 +241,7 @@ int main(int argc, char **argv)
 				fwrite(p_start, p - p_start, 1, stdout);
 		}
 	} else if (all) {
-		int i;
-
-		for (i = 0; i < nr_objs; i++)
-			printf("%s %s\n", sha1_to_hex(objs[i]->sha1),
-					get_rev_name(objs[i]));
+		show_all_names(objs_root);
 	} else
 		for ( ; revs; revs = revs->next)
 			printf("%s %s\n", revs->name, get_rev_name(revs->item));
diff --git a/object.c b/object.c
index 1577f74..a1b0729 100644
--- a/object.c
+++ b/object.c
@@ -5,65 +5,50 @@
 #include "commit.h"
 #include "tag.h"
 
-struct object **objs;
+struct object *objs_root;
 int nr_objs;
-static int obj_allocs;
 
 int track_object_refs = 1;
 
-static int find_object(const unsigned char *sha1)
+static struct object **lookup_object_position(const unsigned char *sha1)
 {
-	int first = 0, last = nr_objs;
+	struct object **p = &objs_root;
 
-        while (first < last) {
-                int next = (first + last) / 2;
-                struct object *obj = objs[next];
-                int cmp;
-
-                cmp = memcmp(sha1, obj->sha1, 20);
-                if (!cmp)
-                        return next;
-                if (cmp < 0) {
-                        last = next;
-                        continue;
-                }
-                first = next+1;
-        }
-        return -first-1;
+	for (;;) {
+		struct object *object = *p;
+		int sign;
+
+		if (!object)
+			break;
+		sign = memcmp(sha1, object->sha1, 20);
+		if (!sign)
+			break;
+		p = &object->left;
+		if (sign < 0)
+			continue;
+		p = &object->right;
+	}
+	return p;
 }
 
 struct object *lookup_object(const unsigned char *sha1)
 {
-	int pos = find_object(sha1);
-	if (pos >= 0)
-		return objs[pos];
-	return NULL;
+	return *lookup_object_position(sha1);
 }
 
 void created_object(const unsigned char *sha1, struct object *obj)
 {
-	int pos = find_object(sha1);
+	struct object **op = lookup_object_position(sha1);
 
 	obj->parsed = 0;
 	memcpy(obj->sha1, sha1, 20);
 	obj->type = NULL;
 	obj->refs = NULL;
 	obj->used = 0;
-
-	if (pos >= 0)
+	obj->left = obj->right = NULL;
+	if (*op)
 		die("Inserting %s twice\n", sha1_to_hex(sha1));
-	pos = -pos-1;
-
-	if (obj_allocs == nr_objs) {
-		obj_allocs = alloc_nr(obj_allocs);
-		objs = xrealloc(objs, obj_allocs * sizeof(struct object *));
-	}
-
-	/* Insert it into the right place */
-	memmove(objs + pos + 1, objs + pos, (nr_objs - pos) * 
-		sizeof(struct object *));
-
-	objs[pos] = obj;
+	*op = obj;
 	nr_objs++;
 }
 
diff --git a/object.h b/object.h
index 0e76182..32b276d 100644
--- a/object.h
+++ b/object.h
@@ -19,12 +19,13 @@ struct object {
 	unsigned char sha1[20];
 	const char *type;
 	struct object_refs *refs;
+	struct object *left, *right;
 	void *util;
 };
 
 extern int track_object_refs;
 extern int nr_objs;
-extern struct object **objs;
+extern struct object *objs_root;
 
 /** Internal only **/
 struct object *lookup_object(const unsigned char *sha1);
-- 
1.1.6.g69c5

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  4:11     ` [PATCH] binary-tree-based objects Junio C Hamano
@ 2006-02-12  5:06       ` Linus Torvalds
  2006-02-12  5:22         ` Linus Torvalds
  0 siblings, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  5:06 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git



On Sat, 11 Feb 2006, Junio C Hamano wrote:
> 
>  * I haven't benched this seriously yet.  One datapoint:
> 
> 	time git-rev-list --objects v2.6.15..linus | wc -l
> 
>    are 53sec vs 22sec improvement with the same output.

Another datapoint: doing 

	time git-rev-list --objects HEAD > /dev/null 

three times in a row (to verify that the numbers are stable - they very 
clearly are).

Before:
	real    0m41.322s	user    0m40.612s	sys     0m0.492s
	real    0m40.797s	user    0m40.140s	sys     0m0.468s
	real    0m40.433s	user    0m40.016s	sys     0m0.412s

After:
	real    0m22.542s	user    0m22.080s	sys     0m0.448s
	real    0m22.660s	user    0m22.336s	sys     0m0.312s
	real    0m22.671s	user    0m22.236s	sys     0m0.292s

and doing some trivial oprofile runs shows that the object lookup is no 
longer dominant (my libc's don't have symbol information, so I don't get 
good profile data, but it shows that libc and libz are the biggest issues, 
with memcmp and malloc/free apparently being much bigger issues than the 
object lookup).

			Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  5:06       ` Linus Torvalds
@ 2006-02-12  5:22         ` Linus Torvalds
  2006-02-12  5:23           ` Linus Torvalds
  2006-02-12  7:05           ` Linus Torvalds
  0 siblings, 2 replies; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  5:22 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

On Sat, 11 Feb 2006, Linus Torvalds wrote:
> 
> Before:
> 	real    0m41.322s	user    0m40.612s	sys     0m0.492s
> 	real    0m40.797s	user    0m40.140s	sys     0m0.468s
> 	real    0m40.433s	user    0m40.016s	sys     0m0.412s
> 
> After:
> 	real    0m22.542s	user    0m22.080s	sys     0m0.448s
> 	real    0m22.660s	user    0m22.336s	sys     0m0.312s
> 	real    0m22.671s	user    0m22.236s	sys     0m0.292s

And just so you wouldn't think that all my machines are slow..

Before:
	real    0m28.645s	user    0m28.366s	sys     0m0.280s
	real    0m28.700s	user    0m28.486s	sys     0m0.212s

After:
	real    0m16.566s	user    0m16.373s	sys     0m0.196s
	real    0m16.512s	user    0m16.277s	sys     0m0.236s

so there (that's all with current kernel HEAD, mostly packed).

Now, I haven't compared it to the other suggested fixes (hashing, and the 
256-way bucket-sorting), but I obviously prefer the tree approach because 
it's my idea (and my ideas are _always_ superior) and because it's so dang 
simple.

If somebody shows that the other approaches are faster, then I guess I'll 
just have to sulk in a corner and grown quietly at people.

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  5:22         ` Linus Torvalds
@ 2006-02-12  5:23           ` Linus Torvalds
  2006-02-12  5:48             ` Junio C Hamano
  2006-02-12  7:05           ` Linus Torvalds
  1 sibling, 1 reply; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  5:23 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git



On Sat, 11 Feb 2006, Linus Torvalds wrote:
> 
> If somebody shows that the other approaches are faster, then I guess I'll 
> just have to sulk in a corner and grown quietly at people.

growl. growL. With an 'L'!

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  5:23           ` Linus Torvalds
@ 2006-02-12  5:48             ` Junio C Hamano
  2006-02-12  6:07               ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12  5:48 UTC (permalink / raw
  To: Linus Torvalds; +Cc: git

Linus Torvalds <torvalds@osdl.org> writes:

> On Sat, 11 Feb 2006, Linus Torvalds wrote:
>> 
>> If somebody shows that the other approaches are faster, then I guess I'll 
>> just have to sulk in a corner and grown quietly at people.
>
> growl. growL. With an 'L'!

I do not get it.

But my impression was the circular hash with trivial fixes were
the fastest.  I am benching them now.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  5:48             ` Junio C Hamano
@ 2006-02-12  6:07               ` Junio C Hamano
  2006-02-12  6:53                 ` Linus Torvalds
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12  6:07 UTC (permalink / raw
  To: Linus Torvalds; +Cc: git, Alexandre Julliard, Johannes Schindelin

Junio C Hamano <junkio@cox.net> writes:

> Linus Torvalds <torvalds@osdl.org> writes:
>
>> On Sat, 11 Feb 2006, Linus Torvalds wrote:
>>> 
>>> If somebody shows that the other approaches are faster, then I guess I'll 
>>> just have to sulk in a corner and grown quietly at people.
>>
>> growl. growL. With an 'L'!
>
> I do not get it.
> ...

I first suspected you just meant the typo (s/grown/growl/) but
it probably is that you really meant GROWL (and sulk).

It turns out that Johannes (with my patch to fix possible
unsigned int alignment issue and the initial call to
find_object()) is the clear winner.

        base   - tip of "master"
        lt-obj - the binary tree without balancing
        aj-obj - Alexandre's 256-way buckets
        js-obj - Johannes' circular hash

Although I have _not_ double checked the correctness of them, I
did not see major flaw in any of them.

base/git-rev-list --objects v2.6.14..linus
real	2m32.088s	user	2m2.830s	sys	0m0.890s
real	2m6.614s	user	2m1.860s	sys	0m0.660s
real	2m13.776s	user	2m2.450s	sys	0m0.590s
real	2m6.062s	user	2m2.420s	sys	0m0.690s
real	2m15.567s	user	2m3.170s	sys	0m0.900s

lt-obj/git-rev-list --objects v2.6.14..linus
real	0m42.889s	user	0m40.170s	sys	0m0.570s
real	0m44.247s	user	0m40.320s	sys	0m0.530s
real	0m40.891s	user	0m40.110s	sys	0m0.500s
real	0m41.874s	user	0m40.090s	sys	0m0.530s
real	0m41.596s	user	0m40.050s	sys	0m0.600s

aj-obj/git-rev-list --objects v2.6.14..linus
real	0m36.842s	user	0m36.200s	sys	0m0.490s
real	0m37.178s	user	0m36.740s	sys	0m0.390s
real	0m37.222s	user	0m36.540s	sys	0m0.610s
real	0m36.924s	user	0m36.410s	sys	0m0.360s
real	0m37.341s	user	0m36.150s	sys	0m0.620s

js-obj/git-rev-list --objects v2.6.14..linus
real	0m24.689s	user	0m24.120s	sys	0m0.390s
real	0m24.753s	user	0m24.020s	sys	0m0.360s
real	0m27.650s	user	0m24.470s	sys	0m0.440s
real	0m33.480s	user	0m24.030s	sys	0m0.460s
real	0m25.329s	user	0m24.490s	sys	0m0.390s


base/git-name-rev --all
real	0m4.193s	user	0m4.060s	sys	0m0.130s
real	0m4.179s	user	0m4.100s	sys	0m0.080s
real	0m4.210s	user	0m4.040s	sys	0m0.150s
real	0m4.162s	user	0m4.100s	sys	0m0.060s
real	0m4.697s	user	0m4.100s	sys	0m0.120s

lt-obj/git-name-rev --all
real	0m2.199s	user	0m2.120s	sys	0m0.080s
real	0m2.186s	user	0m2.110s	sys	0m0.080s
real	0m2.187s	user	0m2.150s	sys	0m0.040s
real	0m2.817s	user	0m2.150s	sys	0m0.070s
real	0m2.323s	user	0m2.170s	sys	0m0.050s

aj-obj/git-name-rev --all
real	0m2.136s	user	0m2.050s	sys	0m0.080s
real	0m2.164s	user	0m2.080s	sys	0m0.060s
real	0m2.143s	user	0m2.070s	sys	0m0.070s
real	0m2.141s	user	0m2.080s	sys	0m0.060s
real	0m2.154s	user	0m2.070s	sys	0m0.090s

js-obj/git-name-rev --all
real	0m2.047s	user	0m2.010s	sys	0m0.040s
real	0m2.040s	user	0m1.970s	sys	0m0.070s
real	0m2.025s	user	0m1.970s	sys	0m0.060s
real	0m2.170s	user	0m2.020s	sys	0m0.030s
real	0m2.046s	user	0m2.010s	sys	0m0.030s

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  6:07               ` Junio C Hamano
@ 2006-02-12  6:53                 ` Linus Torvalds
  0 siblings, 0 replies; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  6:53 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git, Alexandre Julliard, Johannes Schindelin

On Sat, 11 Feb 2006, Junio C Hamano wrote:
> 
> It turns out that Johannes (with my patch to fix possible
> unsigned int alignment issue and the initial call to
> find_object()) is the clear winner.

Having looked at it, I will have to agree. Johannes' approach looks 
pretty clean, and has the same memory overhead mine has (two pointers per 
object in the hash - one used, one empty), but has a lot fewer memcmp() 
calls and pointer chasing.

So I'll growl softly but concur. Johannes' code isn't even very complex.

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] binary-tree-based objects.
  2006-02-12  5:22         ` Linus Torvalds
  2006-02-12  5:23           ` Linus Torvalds
@ 2006-02-12  7:05           ` Linus Torvalds
  1 sibling, 0 replies; 21+ messages in thread
From: Linus Torvalds @ 2006-02-12  7:05 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git



On Sat, 11 Feb 2006, Linus Torvalds wrote:
> 
> Before:
> 	real    0m41.322s	user    0m40.612s	sys     0m0.492s
> After:
> 	real    0m22.542s	user    0m22.080s	sys     0m0.448s
Johannes:
	real    0m13.814s	user    0m13.492s	sys     0m0.296s

> And just so you wouldn't think that all my machines are slow..
> 
> Before:
> 	real    0m28.645s	user    0m28.366s	sys     0m0.280s
> After:
> 	real    0m16.566s	user    0m16.373s	sys     0m0.196s
Johannes:
	real    0m10.239s	user    0m10.029s	sys     0m0.208s

So the hashing thing is indeed the clear winner.

Make it so. 

		Linus

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12  2:46   ` Junio C Hamano
@ 2006-02-12  8:52     ` Alexandre Julliard
  2006-02-12 12:11       ` Junio C Hamano
  2006-02-12 11:19     ` Florian Weimer
  1 sibling, 1 reply; 21+ messages in thread
From: Alexandre Julliard @ 2006-02-12  8:52 UTC (permalink / raw
  To: Junio C Hamano; +Cc: Johannes Schindelin, git

Junio C Hamano <junkio@cox.net> writes:

> I am also interested to find out how much the rehashing you do
> when you update obj_allocs to a larger value is costing.
>
> Alexandle, if you have a chance, could you try Johannes' patch
> on your workload to see if it works OK for you?

It works great for me, CPU time is down to 15 sec instead of 20 sec
with my patch.

-- 
Alexandre Julliard
julliard@winehq.org

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12  2:46   ` Junio C Hamano
  2006-02-12  8:52     ` Alexandre Julliard
@ 2006-02-12 11:19     ` Florian Weimer
  2006-02-12 12:08       ` ***DONTUSE*** " Junio C Hamano
  1 sibling, 1 reply; 21+ messages in thread
From: Florian Weimer @ 2006-02-12 11:19 UTC (permalink / raw
  To: git

* Junio C. Hamano:

>  static int hashtable_index(const unsigned char *sha1)
>  {
> -	unsigned int i = *(unsigned int *)sha1;
> -	return (int)(i % obj_allocs);
> +	int cnt;

> +	unsigned int ix = *sha1++;
> +
> +	for (cnt = 1; cnt < sizeof(unsigned int); cnt++) {
> +		ix <<= 8;
> +		ix |= *sha1++;
> +	}

memcpy(&ix, sha1, sizeof(ix));

(GCC should do the rest.)

> +	return (int)(ix % obj_allocs);
>  }

return (int)(ix & (obj_allocs - 1));

AFAICS, obj_allocs is a power of two.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* ***DONTUSE*** Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12 11:19     ` Florian Weimer
@ 2006-02-12 12:08       ` Junio C Hamano
  2006-02-12 13:46         ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12 12:08 UTC (permalink / raw
  To: Florian Weimer; +Cc: git

Florian Weimer <fw@deneb.enyo.de> writes:

> (GCC should do the rest.)
>...
> AFAICS, obj_allocs is a power of two.

Yes, I already have something like these in my tree (the latter
did not help much as far as I could tell, though).

****HOWEVER****

Do not use this (not just my patch but with the whole hashtable
version) in your production repository yet.

I've got a mysterious corruption and bogus output from
fsck-objects, and have been tracking it (see the timestamp of
this message).

1.2.0 will most likely to be be *delayed*.  I have to first make
sure my private repository is sane.  Grrrrrrrr.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12  8:52     ` Alexandre Julliard
@ 2006-02-12 12:11       ` Junio C Hamano
  2006-02-12 14:31         ` Johannes Schindelin
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12 12:11 UTC (permalink / raw
  To: Alexandre Julliard; +Cc: git, Johannes Schindelin, Linus Torvalds

Alexandre Julliard <julliard@winehq.org> writes:

> Junio C Hamano <junkio@cox.net> writes:
>
>> Alexandle, if you have a chance, could you try Johannes' patch
>> on your workload to see if it works OK for you?
>
> It works great for me, CPU time is down to 15 sec instead of 20 sec
> with my patch.

Thanks.  Now we have three independent numbers to back up that
Johannes is the winner....

Grrrrrrr.  Please, DO NOT USE THIS ONE YET.

At least, not with your production repository.

I am trying to nail it down but it appears at least fsck-objects
using this version gives bogus results.  I am first trying to
see if my primary working repository is sane.

Oh, and thanks again for your initial patch, which was what
started this drastic improvement.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* ***DONTUSE*** Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12 12:08       ` ***DONTUSE*** " Junio C Hamano
@ 2006-02-12 13:46         ` Junio C Hamano
  2006-02-12 17:26           ` Johannes Schindelin
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12 13:46 UTC (permalink / raw
  To: git

I've pushed things out to "master" and "next" branch.

Quite a lot of things.

One thing that I expected to be there is not.  It is the
hashtable patch.  It is in "pu".

I once had it in my private "next", but dropped it before
pushing things out.

The problem does not seem to trigger with casual use, but I
found that with a clone from my primary repository with '-l -s'
(that is, a clone that uses alternates mechanism to borrow from
my primary repository), fsck-objects built with the patch seems
to report bogus things "missing".  I have not traced it fully;
instead I ended up spending most of the night (I noticed it at
around 01:30 and now it is 05:30 so that's about four hours)
recovering some of my refs and double checking if my primary
repository is not corrupt X-<.  At least, the primary repository
looks sane now.

With luck, I would muster enough energy to figure it out, but I
need some sleep first.

The problem seems to be very elusive.  I took a snapshot of the
two repositories involved, so that I can use them as an isolated
test case (the one is my primary repository and the other one is
the "-l -s" clone). The problem is repeatable, but the SHA1 of
the file the broken fsck-objects reports to be missing is
different from the one I observed in the first experiment with
the real repositories.  It appears it has something to do with
the directory listing order of fsck-objects, which in turn means
the reproduction of the problem is related to memory allocation
patterns, so maybe valgrind would help.  On the other hand, even
if I published a tarball of these two repositories somewhere,
other people (or myself) who extract the tarball would probably
not see the same SHA1 reported as missing X-<.

Anyway, I've pushed them out before crashing, after I double
checked that versions built from my "master" and "next" do not
seem to show the problem, while with the one in "pu", the first
patch after merging "next" in it being the said patch, exhibits
the problem.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12 12:11       ` Junio C Hamano
@ 2006-02-12 14:31         ` Johannes Schindelin
  0 siblings, 0 replies; 21+ messages in thread
From: Johannes Schindelin @ 2006-02-12 14:31 UTC (permalink / raw
  To: Junio C Hamano; +Cc: Alexandre Julliard, git, Linus Torvalds

Hi,

On Sun, 12 Feb 2006, Junio C Hamano wrote:

> Alexandre Julliard <julliard@winehq.org> writes:
> 
> > Junio C Hamano <junkio@cox.net> writes:
> >
> >> Alexandle, if you have a chance, could you try Johannes' patch
> >> on your workload to see if it works OK for you?
> >
> > It works great for me, CPU time is down to 15 sec instead of 20 sec
> > with my patch.
> 
> Thanks.  Now we have three independent numbers to back up that
> Johannes is the winner....
> 
> Grrrrrrr.  Please, DO NOT USE THIS ONE YET.
> 
> At least, not with your production repository.
> 
> I am trying to nail it down but it appears at least fsck-objects
> using this version gives bogus results.  I am first trying to
> see if my primary working repository is sane.
> 
> Oh, and thanks again for your initial patch, which was what
> started this drastic improvement.

I am sorry! I tested fsck, but only *once*, since I did not think such a 
creepy bug was in there. And then, I had to run to sing Beethoven's Missa 
Solemnis, and missed all the action about this patch.

Just a few remarks around the comments in this thread:

- the doubling of obj_allocs is arbitrary. Originally, I planned to do the 
growing much faster, which would have been helped by the fact. But it 
turned out my thinking was defective. So, you can grow the hashtable by 
whatever you like (doubling is quite effective, though).

- hashtable has expected O(1) insertion, and that is what boosts the 
performance. Since the table growing is linear in the number of objects 
(both size and computing time), and all operations afterwards are linear 
on the table, *and the hash is already computed*, the hashtable is 
preferrable over other data structures (sorted list has O(n) insertion 
time, and tree still O(log n)).

- the bug Junio fixed was not triggered here, since I did all the testing 
on my venerable iBook. The PowerPC architecture evidently aligns 
all pointers to 32-bit, so I could reinterpret the pointer as to an 
unsigned int. Note that there is a small overhead in Junio's version, but 
it is probably not worth the hassle to make that a compile time option. 
But I agree with Florian that memcpy would be more efficient.

- Arithmetic and Boolean operations on 32-bit integers typically are 
handled very efficiently in modern 32-bit CPUs, so there should be no 
reason to use "&" instead of "%" (especially since understanding the code 
wouldn't be helped by that).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: ***DONTUSE*** Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12 13:46         ` Junio C Hamano
@ 2006-02-12 17:26           ` Johannes Schindelin
  2006-02-12 18:34             ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Johannes Schindelin @ 2006-02-12 17:26 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

Hi,

On Sun, 12 Feb 2006, Junio C Hamano wrote:

> The problem does not seem to trigger with casual use, but I
> found that with a clone from my primary repository with '-l -s'
> (that is, a clone that uses alternates mechanism to borrow from
> my primary repository), fsck-objects built with the patch seems
> to report bogus things "missing".  I have not traced it fully;
> instead I ended up spending most of the night (I noticed it at
> around 01:30 and now it is 05:30 so that's about four hours)
> recovering some of my refs and double checking if my primary
> repository is not corrupt X-<.  At least, the primary repository
> looks sane now.

Could it be the shallow thing?

+
+void for_each_object(void (*fn)(struct object *))
+{
+       int i;
+       for (i = 0; i < nr_objs; i++)
+               fn(objs[i]);
+}

This will not work with the hashtable. It has to be something like


void for_each_object(void (*fn)(struct object *))
{
       int i;
       for (i = 0; i < obj_allocs; i++)
               if (objs[i])
			fn(objs[i]);
}

This error did not trigger here, since I don't use shallow clones.

Again, sorry for the inconvenience, Junio.

Hth,
Dscho

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: ***DONTUSE*** Re: [PATCH] Use a hashtable for objects instead of a sorted list
  2006-02-12 17:26           ` Johannes Schindelin
@ 2006-02-12 18:34             ` Junio C Hamano
  0 siblings, 0 replies; 21+ messages in thread
From: Junio C Hamano @ 2006-02-12 18:34 UTC (permalink / raw
  To: Johannes Schindelin; +Cc: git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Could it be the shallow thing?

No, I do not think so.  In the current "master", "next", "pu"
picture, "pu" merges js/obj with the "8856cc69" commit and then
shallow and bind comes later.  That merged version is what I am
testing, and also its second parent, which is "master" plus
hashtable.  In either case, shallow and bind are not part of the
picture.

> Again, sorry for the inconvenience, Junio.

That is not your fault, and I did not mean to sound I am unhappy
about *you*.  I am indeed unhappy because I did not see anything
obviously or subtly wrong with your patch after I moved call to
hashtable_index() in find_object().

But I think Linus nailed it.  I'll see if I can understand his
explanation.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2006-02-12 18:34 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-02-11 19:14 [PATCH] Optimization for git-rev-list --objects Alexandre Julliard
2006-02-12  1:57 ` [PATCH] Use a hashtable for objects instead of a sorted list Johannes Schindelin
2006-02-12  2:46   ` Junio C Hamano
2006-02-12  8:52     ` Alexandre Julliard
2006-02-12 12:11       ` Junio C Hamano
2006-02-12 14:31         ` Johannes Schindelin
2006-02-12 11:19     ` Florian Weimer
2006-02-12 12:08       ` ***DONTUSE*** " Junio C Hamano
2006-02-12 13:46         ` Junio C Hamano
2006-02-12 17:26           ` Johannes Schindelin
2006-02-12 18:34             ` Junio C Hamano
2006-02-12  2:19 ` [PATCH] Optimization for git-rev-list --objects Linus Torvalds
2006-02-12  2:39   ` Junio C Hamano
2006-02-12  4:11     ` [PATCH] binary-tree-based objects Junio C Hamano
2006-02-12  5:06       ` Linus Torvalds
2006-02-12  5:22         ` Linus Torvalds
2006-02-12  5:23           ` Linus Torvalds
2006-02-12  5:48             ` Junio C Hamano
2006-02-12  6:07               ` Junio C Hamano
2006-02-12  6:53                 ` Linus Torvalds
2006-02-12  7:05           ` Linus Torvalds

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).