git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/3] fast-import: grow tree storage more aggressively
       [not found] <<20070310191515.GA3416@coredump.intra.peff.net>
@ 2007-03-10 19:16 ` Jeff King
  2007-03-10 19:21 ` [PATCH 2/3] fast-import: tree allocation cleanups Jeff King
  2007-03-10 19:21 ` [PATCH 3/3] fast-import: improve efficiency of tree_content_set Jeff King
  2 siblings, 0 replies; 14+ messages in thread
From: Jeff King @ 2007-03-10 19:16 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, spearce

When building up a tree for a commit, fast-import
dynamically allocates memory for the tree entries. When more
space is needed, the allocated memory is increased by a
constant amount. For very large trees, this means
re-allocating and memcpy()ing the memory O(n) times.

To compound this problem, releasing the previous tree
resource does not free the memory; it is kept in a pool
for future trees. This means that each of the O(n)
allocations will consume increasing amounts of memory,
giving O(n^2) memory consumption.

Signed-off-by: Jeff King <peff@peff.net>
---
 fast-import.c |    4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index fda5018..81bc6ea 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1058,7 +1058,7 @@ static void load_tree(struct tree_entry *root)
 		struct tree_entry *e = new_tree_entry();
 
 		if (t->entry_count == t->entry_capacity)
-			root->tree = t = grow_tree_content(t, 8);
+			root->tree = t = grow_tree_content(t, t->entry_count);
 		t->entries[t->entry_count++] = e;
 
 		e->tree = NULL;
@@ -1225,7 +1225,7 @@ static int tree_content_set(
 	}
 
 	if (t->entry_count == t->entry_capacity)
-		root->tree = t = grow_tree_content(t, 8);
+		root->tree = t = grow_tree_content(t, t->entry_count);
 	e = new_tree_entry();
 	e->name = to_atom(p, (unsigned short)n);
 	e->versions[0].mode = 0;
-- 
1.5.0.3.931.g55c05

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 2/3] fast-import: tree allocation cleanups
       [not found] <<20070310191515.GA3416@coredump.intra.peff.net>
  2007-03-10 19:16 ` [PATCH 1/3] fast-import: grow tree storage more aggressively Jeff King
@ 2007-03-10 19:21 ` Jeff King
  2007-03-11  3:21   ` Shawn O. Pearce
  2007-03-10 19:21 ` [PATCH 3/3] fast-import: improve efficiency of tree_content_set Jeff King
  2 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2007-03-10 19:21 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, spearce

These cleanups are in preparation for improving the
efficiency of the tree-construction code. There should be no
functionality changes.

1. Insert entries into tree_content _after_ they have a name
   assigned. This means that the insertion can use a smarter
   data structure than an unordered list.

2. Factor out the linear search for existing tree entries in
   tree_content_set, which will make it simpler to change.
   Most of the line changes here are simply indentation changes.

Signed-off-by: Jeff King <peff@peff.net>
---
 fast-import.c |   65 +++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 38 insertions(+), 27 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 81bc6ea..ac14a5a 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -594,6 +594,15 @@ static struct tree_content *grow_tree_content(
 	return r;
 }
 
+static int find_tree_entry(struct tree_content *t, struct atom_str *s)
+{
+	int i;
+	for (i = 0; i < t->entry_count; i++)
+		if (!strcmp(t->entries[i]->name->str_dat, s->str_dat))
+			return i;
+	return -1;
+}
+
 static struct tree_entry *new_tree_entry(void)
 {
 	struct tree_entry *e;
@@ -1059,7 +1068,6 @@ static void load_tree(struct tree_entry *root)
 
 		if (t->entry_count == t->entry_capacity)
 			root->tree = t = grow_tree_content(t, t->entry_count);
-		t->entries[t->entry_count++] = e;
 
 		e->tree = NULL;
 		c = get_mode(c, &e->versions[1].mode);
@@ -1071,6 +1079,8 @@ static void load_tree(struct tree_entry *root)
 		hashcpy(e->versions[0].sha1, (unsigned char*)c);
 		hashcpy(e->versions[1].sha1, (unsigned char*)c);
 		c += 20;
+
+		t->entries[t->entry_count++] = e;
 	}
 	free(buf);
 }
@@ -1187,6 +1197,7 @@ static int tree_content_set(
 	const char *slash1;
 	unsigned int i, n;
 	struct tree_entry *e;
+	struct atom_str *name;
 
 	slash1 = strchr(p, '/');
 	if (slash1)
@@ -1194,40 +1205,40 @@ static int tree_content_set(
 	else
 		n = strlen(p);
 
-	for (i = 0; i < t->entry_count; i++) {
+	name = to_atom(p, n);
+	i = find_tree_entry(t, name);
+	if (i != -1) {
 		e = t->entries[i];
-		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
-			if (!slash1) {
-				if (e->versions[1].mode == mode
-						&& !hashcmp(e->versions[1].sha1, sha1))
-					return 0;
-				e->versions[1].mode = mode;
-				hashcpy(e->versions[1].sha1, sha1);
-				if (e->tree) {
-					release_tree_content_recursive(e->tree);
-					e->tree = NULL;
-				}
-				hashclr(root->versions[1].sha1);
-				return 1;
-			}
-			if (!S_ISDIR(e->versions[1].mode)) {
-				e->tree = new_tree_content(8);
-				e->versions[1].mode = S_IFDIR;
-			}
-			if (!e->tree)
-				load_tree(e);
-			if (tree_content_set(e, slash1 + 1, sha1, mode)) {
-				hashclr(root->versions[1].sha1);
-				return 1;
+		if (!slash1) {
+			if (e->versions[1].mode == mode
+					&& !hashcmp(e->versions[1].sha1, sha1))
+				return 0;
+			e->versions[1].mode = mode;
+			hashcpy(e->versions[1].sha1, sha1);
+			if (e->tree) {
+				release_tree_content_recursive(e->tree);
+				e->tree = NULL;
 			}
-			return 0;
+			hashclr(root->versions[1].sha1);
+			return 1;
+		}
+		if (!S_ISDIR(e->versions[1].mode)) {
+			e->tree = new_tree_content(8);
+			e->versions[1].mode = S_IFDIR;
+		}
+		if (!e->tree)
+			load_tree(e);
+		if (tree_content_set(e, slash1 + 1, sha1, mode)) {
+			hashclr(root->versions[1].sha1);
+			return 1;
 		}
+		return 0;
 	}
 
 	if (t->entry_count == t->entry_capacity)
 		root->tree = t = grow_tree_content(t, t->entry_count);
 	e = new_tree_entry();
-	e->name = to_atom(p, (unsigned short)n);
+	e->name = name;
 	e->versions[0].mode = 0;
 	hashclr(e->versions[0].sha1);
 	t->entries[t->entry_count++] = e;
-- 
1.5.0.3.931.g55c05

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* [PATCH 3/3] fast-import: improve efficiency of tree_content_set
       [not found] <<20070310191515.GA3416@coredump.intra.peff.net>
  2007-03-10 19:16 ` [PATCH 1/3] fast-import: grow tree storage more aggressively Jeff King
  2007-03-10 19:21 ` [PATCH 2/3] fast-import: tree allocation cleanups Jeff King
@ 2007-03-10 19:21 ` Jeff King
  2007-03-10 19:23   ` Jeff King
  2 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2007-03-10 19:21 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, spearce

This patch keeps the tree entries of a tree_content in
order, sorted by entry name. The lookup in find_tree_entry
now performs a binary search instead of a linear search.

Signed-off-by: Jeff King <peff@peff.net>
---
 fast-import.c |   48 +++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index ac14a5a..4cd01ee 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -594,13 +594,47 @@ static struct tree_content *grow_tree_content(
 	return r;
 }
 
+static int find_tree_entry_bsearch(struct tree_content *t,
+				   struct atom_str *s,
+				   int *found)
+{
+	int high, low, mid=0, cmp=-1;
+	low = 0;
+	high = t->entry_count - 1;
+	while (low <= high) {
+		mid = low + ((high - low) / 2);
+		cmp = strcmp(s->str_dat, t->entries[mid]->name->str_dat);
+		if (cmp < 0)
+			high = mid - 1;
+		else if (cmp > 0)
+			low = mid + 1;
+		else {
+			*found = 1;
+			return mid;
+		}
+	}
+	*found = 0;
+	return cmp < 0 ? mid : mid + 1;
+}
+
 static int find_tree_entry(struct tree_content *t, struct atom_str *s)
 {
-	int i;
-	for (i = 0; i < t->entry_count; i++)
-		if (!strcmp(t->entries[i]->name->str_dat, s->str_dat))
-			return i;
-	return -1;
+	int i, found;
+	i = find_tree_entry_bsearch(t, s, &found);
+	return found ? i : -1;
+}
+
+static void insert_tree_content(struct tree_content *t, struct tree_entry *e)
+{
+	int i, found;
+
+	i = find_tree_entry_bsearch(t, e->name, &found);
+	if (found)
+		die("BUG: duplicate tree content");
+	memmove(t->entries + i + 1, t->entries + i,
+			sizeof(t->entries[0]) * (t->entry_count - i));
+	t->entries[i] = e;
+	t->entry_count++;
 }
 
 static struct tree_entry *new_tree_entry(void)
@@ -1080,7 +1114,7 @@ static void load_tree(struct tree_entry *root)
 		hashcpy(e->versions[1].sha1, (unsigned char*)c);
 		c += 20;
 
-		t->entries[t->entry_count++] = e;
+		insert_tree_content(t, e);
 	}
 	free(buf);
 }
@@ -1241,7 +1275,7 @@ static int tree_content_set(
 	e->name = name;
 	e->versions[0].mode = 0;
 	hashclr(e->versions[0].sha1);
-	t->entries[t->entry_count++] = e;
+	insert_tree_content(t, e);
 	if (slash1) {
 		e->tree = new_tree_content(8);
 		e->versions[1].mode = S_IFDIR;
-- 
1.5.0.3.931.g55c05

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 3/3] fast-import: improve efficiency of tree_content_set
  2007-03-10 19:21 ` [PATCH 3/3] fast-import: improve efficiency of tree_content_set Jeff King
@ 2007-03-10 19:23   ` Jeff King
  2007-03-10 19:40     ` [PATCH] fast-import: use binary search in tree_content_remove Jeff King
  0 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2007-03-10 19:23 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, spearce

On Sat, Mar 10, 2007 at 02:21:31PM -0500, Jeff King wrote:

> This patch keeps the tree entries of a tree_content in
> order, sorted by entry name. The lookup in find_tree_entry
> now performs a binary search instead of a linear search.

I should note that this patch doesn't handle removing entries from
tree_content structs; I will work up a patch for that momentarily.

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-10 19:23   ` Jeff King
@ 2007-03-10 19:40     ` Jeff King
  2007-03-11  3:38       ` Shawn O. Pearce
  0 siblings, 1 reply; 14+ messages in thread
From: Jeff King @ 2007-03-10 19:40 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, spearce


Signed-off-by: Jeff King <peff@peff.net>
---
> I should note that this patch doesn't handle removing entries from
> tree_content structs; I will work up a patch for that momentarily.

And here it is. However, I should note that this patch is _not_
necessary. I had originally thought that removal might destroy the
sorting that I added in the last patch, but it looks like the entry
isn't actually removed. Shawn, can you sanity check this?

This patch still has value, though, because it improves performance when
deleting entries from a large tree (I just didn't do that in my test
case, so it wasn't a problem before).

 fast-import.c |   33 +++++++++++++++++----------------
 1 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 4cd01ee..4322216 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1295,6 +1295,7 @@ static int tree_content_remove(struct tree_entry *root, const char *p)
 	const char *slash1;
 	unsigned int i, n;
 	struct tree_entry *e;
+	struct atom_str *name;
 
 	slash1 = strchr(p, '/');
 	if (slash1)
@@ -1302,24 +1303,24 @@ static int tree_content_remove(struct tree_entry *root, const char *p)
 	else
 		n = strlen(p);
 
-	for (i = 0; i < t->entry_count; i++) {
-		e = t->entries[i];
-		if (e->name->str_len == n && !strncmp(p, e->name->str_dat, n)) {
-			if (!slash1 || !S_ISDIR(e->versions[1].mode))
-				goto del_entry;
-			if (!e->tree)
-				load_tree(e);
-			if (tree_content_remove(e, slash1 + 1)) {
-				for (n = 0; n < e->tree->entry_count; n++) {
-					if (e->tree->entries[n]->versions[1].mode) {
-						hashclr(root->versions[1].sha1);
-						return 1;
-					}
-				}
-				goto del_entry;
+	name = to_atom(p, n);
+	i = find_tree_entry(t, name);
+	if(i == -1)
+		return 0;
+	e = t->entries[i];
+
+	if (!slash1 || !S_ISDIR(e->versions[1].mode))
+		goto del_entry;
+	if (!e->tree)
+		load_tree(e);
+	if (tree_content_remove(e, slash1 + 1)) {
+		for (n = 0; n < e->tree->entry_count; n++) {
+			if (e->tree->entries[n]->versions[1].mode) {
+				hashclr(root->versions[1].sha1);
+				return 1;
 			}
-			return 0;
 		}
+		goto del_entry;
 	}
 	return 0;
 
-- 
1.5.0.3.931.g55c05

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] fast-import: tree allocation cleanups
  2007-03-10 19:21 ` [PATCH 2/3] fast-import: tree allocation cleanups Jeff King
@ 2007-03-11  3:21   ` Shawn O. Pearce
  2007-03-11 15:51     ` Jeff King
  0 siblings, 1 reply; 14+ messages in thread
From: Shawn O. Pearce @ 2007-03-11  3:21 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano

A nicely done series.  Thanks for fixing this performance bug.

Jeff King <peff@peff.net> wrote:
> @@ -1059,7 +1068,6 @@ static void load_tree(struct tree_entry *root)
>  
>  		if (t->entry_count == t->entry_capacity)
>  			root->tree = t = grow_tree_content(t, t->entry_count);
> -		t->entries[t->entry_count++] = e;
>  
>  		e->tree = NULL;
>  		c = get_mode(c, &e->versions[1].mode);
> @@ -1071,6 +1079,8 @@ static void load_tree(struct tree_entry *root)
>  		hashcpy(e->versions[0].sha1, (unsigned char*)c);
>  		hashcpy(e->versions[1].sha1, (unsigned char*)c);
>  		c += 20;
> +
> +		t->entries[t->entry_count++] = e;
>  	}
>  	free(buf);
>  }

This I wouldn't have bothered to do in this patch.  It is just
unecessary code churning, as you turn around and change this again
in the next patch.  I actually dropped these two hunks from this
patch (but left the dang commit message comment in, whoops) and
moved the first hunk to the next patch.

> @@ -1194,40 +1205,40 @@ static int tree_content_set(
>  	else
>  		n = strlen(p);
>  
> -	for (i = 0; i < t->entry_count; i++) {
> +	name = to_atom(p, n);

You missed an unsigned short cast here.

> -	e->name = to_atom(p, (unsigned short)n);
> +	e->name = name;

See?  ;-) I put the correct cast into the patch when I applied it.


Your series is applied on top of master and is now in my
fast-import.git fork on repo.or.cz.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-10 19:40     ` [PATCH] fast-import: use binary search in tree_content_remove Jeff King
@ 2007-03-11  3:38       ` Shawn O. Pearce
  2007-03-11 16:34         ` Jeff King
  0 siblings, 1 reply; 14+ messages in thread
From: Shawn O. Pearce @ 2007-03-11  3:38 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano

Jeff King <peff@peff.net> wrote:
> And here it is. However, I should note that this patch is _not_
> necessary.

Well, its not necessary for you, as you aren't trying to remove
something from your huge tree.  ;-)

> I had originally thought that removal might destroy the
> sorting that I added in the last patch, but it looks like the entry
> isn't actually removed. Shawn, can you sanity check this?

Your patch is fine.  fast-import takes an "optimization" here and
does not bother to actually delete entries from a tree until *after*
we have written the tree data out to the packfile.  The reason
is we need to retain the version 0 tree entry objects to recreate
the delta base.  Since these are combined with the version 1 data
(the "new" data) we have to delay the actual deletes until after
we store the tree and have its SHA-1.

It turns out however that your entire series was broken. I had to
commit the following on top of it to fix it:

-->8--
From 0af148553a94f7e856089fa68395524932240145 Mon Sep 17 00:00:00 2001
From: Shawn O. Pearce <spearce@spearce.org>
Date: Sat, 10 Mar 2007 22:34:12 -0500
Subject: [PATCH] fast-import: Brown paper bag fix tree sorting

Jeff King's recent changes to sort trees by strictly name (and binary
search to locate an entry) works OK up until we have to write a
tree out that uses the funny name/mode sorting that native Git uses:

  b.
  b/
  ba

Here the subtree "b" must sort between files "b." and "ba", but
Jeff's changes have it sorting before "b.".  This means we would fail
to find entries during future modifications to that tree as Jeff's
binary search algorithm won't find subtree "b" between b.  and ba.

I'm plastering over the problem by resorting a tree strictly by
name after it has been written out and the deleted entries have
been filtered out.

Signed-off-by: Shawn O. Pearce <spearce@spearce.org>
---
 fast-import.c |    8 ++++++++
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index 716819f..fa3b766 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -1136,6 +1136,13 @@ static int tecmp1 (const void *_a, const void *_b)
 		b->name->str_dat, b->name->str_len, b->versions[1].mode);
 }
 
+static int nmcmp (const void *_a, const void *_b)
+{
+	struct tree_entry *a = *((struct tree_entry**)_a);
+	struct tree_entry *b = *((struct tree_entry**)_b);
+	return strcmp(a->name->str_dat, b->name->str_dat);
+}
+
 static void mktree(struct tree_content *t,
 	int v,
 	unsigned long *szp,
@@ -1218,6 +1225,7 @@ static void store_tree(struct tree_entry *root)
 		}
 	}
 	t->entry_count -= del;
+	qsort(t->entries, t->entry_count, sizeof(t->entries[0]), nmcmp);
 }
 
 static int tree_content_set(
-- 
1.5.0.3.942.g299f

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] fast-import: tree allocation cleanups
  2007-03-11  3:21   ` Shawn O. Pearce
@ 2007-03-11 15:51     ` Jeff King
  2007-03-11 15:59       ` Jeff King
  2007-03-12 19:16       ` Shawn O. Pearce
  0 siblings, 2 replies; 14+ messages in thread
From: Jeff King @ 2007-03-11 15:51 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

On Sat, Mar 10, 2007 at 10:21:47PM -0500, Shawn O. Pearce wrote:

> > +
> > +		t->entries[t->entry_count++] = e;
> This I wouldn't have bothered to do in this patch.  It is just
> unecessary code churning, as you turn around and change this again
> in the next patch.  I actually dropped these two hunks from this
> patch (but left the dang commit message comment in, whoops) and
> moved the first hunk to the next patch.

OK. There are actually two changes: moving the insertion until after
e->name is set up (which has no functionality impact) and changing the
manner of insertion. I split them up to try to make them more readable,
but clearly you figured out what I was going for.

> > +	name = to_atom(p, n);
> [...]
> > -	e->name = to_atom(p, (unsigned short)n);
> 
> You missed an unsigned short cast here.

Actually, I removed it intentionally (though clearly I should have
documented it). It's casting from an unsigned int to an unsigned short.
Such a cast is at best pointless (since the compiler performs the exact
same cast implicitly -- see C99 6.5.2.2, paragraph 7), and at worst
masks an error (e.g., if the type of n is changed).

Is there some subtle issue I'm missing here?

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] fast-import: tree allocation cleanups
  2007-03-11 15:51     ` Jeff King
@ 2007-03-11 15:59       ` Jeff King
  2007-03-12 19:16       ` Shawn O. Pearce
  1 sibling, 0 replies; 14+ messages in thread
From: Jeff King @ 2007-03-11 15:59 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git

On Sun, Mar 11, 2007 at 11:51:38AM -0400, Jeff King wrote:

> OK. There are actually two changes: moving the insertion until after
> e->name is set up (which has no functionality impact) and changing the
> manner of insertion. I split them up to try to make them more readable,
> but clearly you figured out what I was going for.

BTW, the commit message for your d4239be9 is now inaccurate (you moved
the hunk for my point "1" to the next patch). Probably not worth fixing,
but I thought I would mention it.

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-11  3:38       ` Shawn O. Pearce
@ 2007-03-11 16:34         ` Jeff King
  2007-03-11 16:54           ` Jeff King
  2007-03-12 19:13           ` Shawn O. Pearce
  0 siblings, 2 replies; 14+ messages in thread
From: Jeff King @ 2007-03-11 16:34 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git, Junio C Hamano

On Sat, Mar 10, 2007 at 10:38:33PM -0500, Shawn O. Pearce wrote:

> Well, its not necessary for you, as you aren't trying to remove
> something from your huge tree.  ;-)

Heh, I meant necessary as in "Oops, I probably just completely broke
fast-import". But I see I managed to do that anyway. :)

> Your patch is fine.  fast-import takes an "optimization" here and
> does not bother to actually delete entries from a tree until *after*

OK, that was my reading, as well; thanks for confirming.

> Jeff King's recent changes to sort trees by strictly name (and binary
> search to locate an entry) works OK up until we have to write a
> tree out that uses the funny name/mode sorting that native Git uses:

And here is a test that I believe triggers the problem (fails with my
patches, succeeds with your fix):

diff --git a/t/t9300-fast-import.sh b/t/t9300-fast-import.sh
index 2e1a09f..2ec68b0 100755
--- a/t/t9300-fast-import.sh
+++ b/t/t9300-fast-import.sh
@@ -501,4 +501,54 @@ test_expect_success \
     'test `git-rev-parse --verify branch^1` \
 		= `git-rev-parse --verify K^1`'
 
+###
+### series L
+###
+
+cat >input <<INPUT_END
+blob
+mark :1
+data <<EOF
+some data
+EOF
+
+blob
+mark :2
+data <<EOF
+other data
+EOF
+
+commit refs/heads/L
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+data <<COMMIT
+create L
+COMMIT
+
+M 644 :1 b.
+M 644 :1 b/other
+M 644 :1 ba
+
+commit refs/heads/L
+committer $GIT_COMMITTER_NAME <$GIT_COMMITTER_EMAIL> $GIT_COMMITTER_DATE
+data <<COMMIT
+update L
+COMMIT
+
+M 644 :2 b.
+M 644 :2 b/other
+M 644 :2 ba
+INPUT_END
+
+cat >expect <<EXPECT_END
+:100644 100644 4268632... 55d3a52... M	b.
+:040000 040000 0ae5cac... 443c768... M	b
+:100644 100644 4268632... 55d3a52... M	ba
+EXPECT_END
+
+test_expect_success \
+    'L: verify internal tree sorting' \
+	'git-fast-import <input &&
+	 git-diff --raw L^ L >output &&
+	 diff -u expect output'
+
 test_done

> I'm plastering over the problem by resorting a tree strictly by
> name after it has been written out and the deleted entries have
> been filtered out.

I wonder if we could make this a bit cleaner by actually using the git
sort in the first place. I will take a look...

-Peff

^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-11 16:34         ` Jeff King
@ 2007-03-11 16:54           ` Jeff King
  2007-03-11 20:19             ` Shawn O. Pearce
  2007-03-12 19:13           ` Shawn O. Pearce
  1 sibling, 1 reply; 14+ messages in thread
From: Jeff King @ 2007-03-11 16:54 UTC (permalink / raw)
  To: Shawn O. Pearce; +Cc: git, Junio C Hamano

On Sun, Mar 11, 2007 at 12:34:13PM -0400, Jeff King wrote:

> > I'm plastering over the problem by resorting a tree strictly by
> > name after it has been written out and the deleted entries have
> > been filtered out.
> I wonder if we could make this a bit cleaner by actually using the git
> sort in the first place. I will take a look...

Hrm, it's not that hard to pass the mode around and use
base_name_compare, but I don't think that's enough. Any time we turn an
entry into a tree, we'll have to resort. I think your patch is simpler
and less error prone.

-Peff

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-11 16:54           ` Jeff King
@ 2007-03-11 20:19             ` Shawn O. Pearce
  0 siblings, 0 replies; 14+ messages in thread
From: Shawn O. Pearce @ 2007-03-11 20:19 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano

Jeff King <peff@peff.net> wrote:
> On Sun, Mar 11, 2007 at 12:34:13PM -0400, Jeff King wrote:
> 
> > > I'm plastering over the problem by resorting a tree strictly by
> > > name after it has been written out and the deleted entries have
> > > been filtered out.
> > I wonder if we could make this a bit cleaner by actually using the git
> > sort in the first place. I will take a look...
> 
> Hrm, it's not that hard to pass the mode around and use
> base_name_compare, but I don't think that's enough. Any time we turn an
> entry into a tree, we'll have to resort. I think your patch is simpler
> and less error prone.

Yes, but it gets worse.  We delete an entry by setting the mode to
0 in version 1, but leaving the entry in the tree so that the entry
will show up in version 0 during delta generation.

So what happens if we delete the tree entry, then modify it in
the same commit?  We can't find it if we are searching with mode
included.

Sidenote: I have the same sorting problems in jgit.  It all comes
down to how annoying the tree format is, in that entries are sorted
by both name and mode, but only name makes the entry be unique.

Right now I'm not sure how to resolve this, short of the bandaid
patch I put onto the end of your series.   :-(

-- 
Shawn.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] fast-import: use binary search in tree_content_remove
  2007-03-11 16:34         ` Jeff King
  2007-03-11 16:54           ` Jeff King
@ 2007-03-12 19:13           ` Shawn O. Pearce
  1 sibling, 0 replies; 14+ messages in thread
From: Shawn O. Pearce @ 2007-03-12 19:13 UTC (permalink / raw)
  To: Jeff King; +Cc: git, Junio C Hamano

Jeff King <peff@peff.net> wrote:
> And here is a test that I believe triggers the problem (fails with my
> patches, succeeds with your fix):

Thanks, this is a good test to have. Since the current "stable"
version passes without your patches I'm applying the test first;
that way we can see if/when a change breaks this ordering requirement
and address it immediately in that patch.
 
> > I'm plastering over the problem by resorting a tree strictly by
> > name after it has been written out and the deleted entries have
> > been filtered out.
> 
> I wonder if we could make this a bit cleaner by actually using the git
> sort in the first place. I will take a look...

Good luck.  I'm not sure its easily done.  Which is why I'm not
attempting to do it right now.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH 2/3] fast-import: tree allocation cleanups
  2007-03-11 15:51     ` Jeff King
  2007-03-11 15:59       ` Jeff King
@ 2007-03-12 19:16       ` Shawn O. Pearce
  1 sibling, 0 replies; 14+ messages in thread
From: Shawn O. Pearce @ 2007-03-12 19:16 UTC (permalink / raw)
  To: Jeff King; +Cc: git

Jeff King <peff@peff.net> wrote:
> On Sat, Mar 10, 2007 at 10:21:47PM -0500, Shawn O. Pearce wrote:
> > > +	name = to_atom(p, n);
> > [...]
> > > -	e->name = to_atom(p, (unsigned short)n);
> > 
> > You missed an unsigned short cast here.
> 
> Actually, I removed it intentionally (though clearly I should have
> documented it). It's casting from an unsigned int to an unsigned short.
> Such a cast is at best pointless (since the compiler performs the exact
> same cast implicitly -- see C99 6.5.2.2, paragraph 7), and at worst
> masks an error (e.g., if the type of n is changed).

Hmm.  You are probably right.  I had put that cast into place before
because (if I recall correctly) I was getting compiler errors.
But today looking at it I'm not, even if I remove the casts.  So uh,
yea... they should probably come out.

-- 
Shawn.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2007-03-12 19:16 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <<20070310191515.GA3416@coredump.intra.peff.net>
2007-03-10 19:16 ` [PATCH 1/3] fast-import: grow tree storage more aggressively Jeff King
2007-03-10 19:21 ` [PATCH 2/3] fast-import: tree allocation cleanups Jeff King
2007-03-11  3:21   ` Shawn O. Pearce
2007-03-11 15:51     ` Jeff King
2007-03-11 15:59       ` Jeff King
2007-03-12 19:16       ` Shawn O. Pearce
2007-03-10 19:21 ` [PATCH 3/3] fast-import: improve efficiency of tree_content_set Jeff King
2007-03-10 19:23   ` Jeff King
2007-03-10 19:40     ` [PATCH] fast-import: use binary search in tree_content_remove Jeff King
2007-03-11  3:38       ` Shawn O. Pearce
2007-03-11 16:34         ` Jeff King
2007-03-11 16:54           ` Jeff King
2007-03-11 20:19             ` Shawn O. Pearce
2007-03-12 19:13           ` Shawn O. Pearce

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).