[PATCH] diff-delta.c: pack the index structure

git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed

* [PATCH] diff-delta.c: pack the index structure
@ 2007-09-07 23:38 David Kastrup
  2007-09-08  2:34 ` Nicolas Pitre
  0 siblings, 1 reply; 7+ messages in thread
From: David Kastrup @ 2007-09-07 23:38 UTC (permalink / raw)
  To: git

In normal use cases, the performance wins are not overly impressive:
we get something like 5-10% due to the slightly better locality of
memory accesses using the packed structure.

However, since the data structure for index entries saves 33% of
memory on 32-bit platforms and 40% on 64-bit platforms, the behavior
when memory gets limited should be nicer.

This is a rather well-contained change.  One obvious improvement would
be sorting the elements in one bucket according to their hash, then
using binary probing to find the elements with the right hash value.

As it stands, the output should be strictly the same as previously
unless one uses the option for limiting the amount of used memory, in
which case the created packs might be better.

Signed-off-by: David Kastrup <dak@gnu.org>
---
 diff-delta.c |   62 +++++++++++++++++++++++++++++++++++++++++++---------------
 1 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/diff-delta.c b/diff-delta.c
index 0dde2f2..cf0136f 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -115,9 +115,13 @@ static const unsigned int U[256] = {
 struct index_entry {
 	const unsigned char *ptr;
 	unsigned int val;
-	struct index_entry *next;
 };
 
+struct unpacked_index_entry {
+	struct index_entry entry;
+	struct unpacked_index_entry *next;
+};	
+
 struct delta_index {
 	unsigned long memsize;
 	const void *src_buf;
@@ -131,7 +135,8 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
 	const unsigned char *data, *buffer = buf;
 	struct delta_index *index;
-	struct index_entry *entry, **hash;
+	struct unpacked_index_entry *entry, **hash;
+	struct index_entry *aentry, **ahash;
 	void *mem;
 	unsigned long memsize;
 
@@ -148,28 +153,21 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	hmask = hsize - 1;
 
 	/* allocate lookup index */
-	memsize = sizeof(*index) +
-		  sizeof(*hash) * hsize +
+	memsize = sizeof(*hash) * hsize +
 		  sizeof(*entry) * entries;
 	mem = malloc(memsize);
 	if (!mem)
 		return NULL;
-	index = mem;
-	mem = index + 1;
 	hash = mem;
 	mem = hash + hsize;
 	entry = mem;
 
-	index->memsize = memsize;
-	index->src_buf = buf;
-	index->src_size = bufsize;
-	index->hash_mask = hmask;
 	memset(hash, 0, hsize * sizeof(*hash));
 
 	/* allocate an array to count hash entries */
 	hash_count = calloc(hsize, sizeof(*hash_count));
 	if (!hash_count) {
-		free(index);
+		free(hash);
 		return NULL;
 	}
 
@@ -183,12 +181,13 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
 		if (val == prev_val) {
 			/* keep the lowest of consecutive identical blocks */
-			entry[-1].ptr = data + RABIN_WINDOW;
+			entry[-1].entry.ptr = data + RABIN_WINDOW;
+			--entries;
 		} else {
 			prev_val = val;
 			i = val & hmask;
-			entry->ptr = data + RABIN_WINDOW;
-			entry->val = val;
+			entry->entry.ptr = data + RABIN_WINDOW;
+			entry->entry.val = val;
 			entry->next = hash[i];
 			hash[i] = entry++;
 			hash_count[i]++;
@@ -212,15 +211,46 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			continue;
 		entry = hash[i];
 		do {
-			struct index_entry *keep = entry;
+			struct unpacked_index_entry *keep = entry;
 			int skip = hash_count[i] / HASH_LIMIT;
 			do {
+				--entries;
 				entry = entry->next;
 			} while(--skip && entry);
+			++entries;
 			keep->next = entry;
 		} while(entry);
 	}
 	free(hash_count);
+	memsize = sizeof(*index)
+		+sizeof(*ahash) * (hsize+1) +
+		+sizeof(*aentry) * entries;
+	mem=malloc(memsize);
+
+	if (!mem) {
+		free(hash);
+		return NULL;
+	}
+
+	index = mem;
+	index->memsize = memsize;
+	index->src_buf = buf;
+	index->src_size = bufsize;
+	index->hash_mask = hmask;
+
+	mem = index+1;
+	ahash = mem;
+	mem = ahash + (hsize+1);
+	aentry = mem;
+
+	for (i=0; i<hsize; i++) {
+		ahash[i] = aentry;
+		for (entry=hash[i]; entry; entry=entry->next)
+			*aentry++ = entry->entry;
+	}
+	ahash[hsize] = aentry;
+	assert(aentry-(struct index_entry *)mem == entries);
+	free(hash);
 
 	return index;
 }
@@ -302,7 +332,7 @@ create_delta(const struct delta_index *index,
 			val ^= U[data[-RABIN_WINDOW]];
 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
 			i = val & index->hash_mask;
-			for (entry = index->hash[i]; entry; entry = entry->next) {
+			for (entry = index->hash[i]; entry<index->hash[i+1]; entry++) {
 				const unsigned char *ref = entry->ptr;
 				const unsigned char *src = data;
 				unsigned int ref_size = ref_top - ref;
-- 
1.5.3.GIT

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] diff-delta.c: pack the index structure
  2007-09-07 23:38 [PATCH] diff-delta.c: pack the index structure David Kastrup
@ 2007-09-08  2:34 ` Nicolas Pitre
  2007-09-08  6:48   ` David Kastrup
  2007-09-08  8:36   ` David Kastrup
  0 siblings, 2 replies; 7+ messages in thread
From: Nicolas Pitre @ 2007-09-08  2:34 UTC (permalink / raw)
  To: David Kastrup; +Cc: git

On Sat, 8 Sep 2007, David Kastrup wrote:

> In normal use cases, the performance wins are not overly impressive:
> we get something like 5-10% due to the slightly better locality of
> memory accesses using the packed structure.

The gain is probably counterbalanced by the fact that you're copying the 
whole index when packing it, which is unfortunate.

Also, could you provide actual test results backing your performance 
claim?  5-10% is still not negligible.

> -	struct index_entry *entry, **hash;
> +	struct unpacked_index_entry *entry, **hash;
> +	struct index_entry *aentry, **ahash;

What does the "a" stand for?

> +	mem = index+1;
[...]
> +	for (i=0; i<hsize; i++) {
[...]
> +		for (entry=hash[i]; entry; entry=entry->next)

Minor style nit: please add spaces around "+", "=", "<", etc. for 
consistency.

Otherwise that looks fine to me.

Nicolas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] diff-delta.c: pack the index structure
  2007-09-08  2:34 ` Nicolas Pitre
@ 2007-09-08  6:48   ` David Kastrup
  2007-09-08  8:36   ` David Kastrup
  1 sibling, 0 replies; 7+ messages in thread
From: David Kastrup @ 2007-09-08  6:48 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: git

Nicolas Pitre <nico@cam.org> writes:

> On Sat, 8 Sep 2007, David Kastrup wrote:
>
>> In normal use cases, the performance wins are not overly impressive:
>> we get something like 5-10% due to the slightly better locality of
>> memory accesses using the packed structure.
>
> The gain is probably counterbalanced by the fact that you're copying
> the whole index when packing it, which is unfortunate.

It was a design choice (I don't particularly like it myself).  An
index is created once, used a dozen times.  Doing the packing in-place
implies using realloc on the finished index.  I consider the
likelihood of permanent memory fragmentation higher when doing that
rather than when allocating a fresh area of the same size.

Also a repack in-place is going to cost more operations and have quite
more complicated code.

> Also, could you provide actual test results backing your performance 
> claim?  5-10% is still not negligible.

I did in my git repository

for i in .git/objects/[0-9a-f][0-9a-f]/[0-9a-f]*;do echo $i;done|sed 's+.*ts/\(..\)/+\1+' > /tmp/objlist

and then something like

dak@lola:/usr/local/tmp/git$ time ./git-pack-objects </tmp/objlist --stdout|dd of=/dev/null
4099+2 records in
4100+1 records out
2099205 bytes (2.1 MB) copied, 3.99295 seconds, 526 kB/s

real	0m4.023s
user	0m3.812s
sys	0m0.168s
dak@lola:/usr/local/tmp/git$ time git-pack-objects </tmp/objlist --stdout|dd of=/dev/null
4099+2 records in
4100+1 records out
2099205 bytes (2.1 MB) copied, 4.18734 seconds, 501 kB/s

real	0m4.218s
user	0m4.012s
sys	0m0.160s
dak@lola:/usr/local/tmp/git$ 

repeatedly on a warm cache.  Results were pretty much comparable:
consistently my version was faster, but never more than 10%.

>> -	struct index_entry *entry, **hash;
>> +	struct unpacked_index_entry *entry, **hash;
>> +	struct index_entry *aentry, **ahash;
>
> What does the "a" stand for?

array (as opposed to linked list).  Was the first thing coming into my
mind.  Maybe I should have gone for "p" for packed, but I shied from
it because it often is meant to imply "pointer".

Alternatively I could replace "entry" with "uentry", but that affects
more lines.

What do you propose?

>> +	mem = index+1;
> [...]
>> +	for (i=0; i<hsize; i++) {
> [...]
>> +		for (entry=hash[i]; entry; entry=entry->next)
>
> Minor style nit: please add spaces around "+", "=", "<", etc. for 
> consistency.

Can do.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] diff-delta.c: pack the index structure
  2007-09-08  2:34 ` Nicolas Pitre
  2007-09-08  6:48   ` David Kastrup
@ 2007-09-08  8:36   ` David Kastrup
  2007-09-08 20:50     ` David Kastrup
  1 sibling, 1 reply; 7+ messages in thread
From: David Kastrup @ 2007-09-08  8:36 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: git

I answered this post already, but the answer seems to have disappeared
into a black hole.  What a nuisance.

Nicolas Pitre <nico@cam.org> writes:

> On Sat, 8 Sep 2007, David Kastrup wrote:
>
>> In normal use cases, the performance wins are not overly impressive:
>> we get something like 5-10% due to the slightly better locality of
>> memory accesses using the packed structure.
>
> The gain is probably counterbalanced by the fact that you're copying
> the whole index when packing it, which is unfortunate.

The alternative would be packing in-place.  That's actually a rather
contorted operation, and the gains from packing would then have to be
claimed by "realloc".  I think that realloc is more likely to leave a
fragmented address space (giving the reclaimed memory to the heap
rather than back to the system), but I'd have to check glibc for
details at least on GNU/Linux.

Another option avoiding the realloc would be not to use linked lists
at all but just collect all entries sequentually in "packed" form, and
sort them into order afterwards.  That's an O(n lg n) operation with a
large n.  Even if one has a sorting algorithm with good memory
locality, I doubt that the locality would compensate for the lg n
factor, even when taking into account that we save ourselves the
copying.  And that is not even taken the possibility of having to cull
some buckets into account, another O(n) operation (which amounts to
copying everything once, too).

> Also, could you provide actual test results backing your performance 
> claim?  5-10% is still not negligible.

dak@lola:/usr/local/tmp/git$ for i in .git/objects/[0-9a-f][0-9a-f]/[0-9a-f]*;do echo $i;done|sed 's+.*ts/\(..\)/+\1+' > /tmp/objlist
dak@lola:/usr/local/tmp/git$ time ./git-pack-objects </tmp/objlist --stdout|dd of=/dev/null
4099+2 records in
4100+1 records out
2099205 bytes (2.1 MB) copied, 4.01225 seconds, 523 kB/s

real	0m4.043s
user	0m3.836s
sys	0m0.164s
dak@lola:/usr/local/tmp/git$ time git-pack-objects </tmp/objlist --stdout|dd of=/dev/null
4099+2 records in
4100+1 records out
2099205 bytes (2.1 MB) copied, 4.34936 seconds, 483 kB/s

real	0m4.381s
user	0m4.056s
sys	0m0.176s
dak@lola:/usr/local/tmp/git$ 

Of course, on a warm file system.

>> -	struct index_entry *entry, **hash;
>> +	struct unpacked_index_entry *entry, **hash;
>> +	struct index_entry *aentry, **ahash;
>
> What does the "a" stand for?

array (rather than linked list).  Should I rather rename entry and
hash to uentry and uhash (for unpacked)?

>> +	mem = index+1;
> [...]
>> +	for (i=0; i<hsize; i++) {
> [...]
>> +		for (entry=hash[i]; entry; entry=entry->next)
>
> Minor style nit: please add spaces around "+", "=", "<", etc. for 
> consistency.

Will do.

-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH] diff-delta.c: pack the index structure
@ 2007-09-08  9:31 David Kastrup
  0 siblings, 0 replies; 7+ messages in thread
From: David Kastrup @ 2007-09-08  9:31 UTC (permalink / raw)
  To: git

In normal use cases, the performance wins are not overly impressive:
we get something like 5-10% due to the slightly better locality of
memory accesses using the packed structure.

However, since the data structure for index entries saves 33% of
memory on 32-bit platforms and 40% on 64-bit platforms, the behavior
when memory gets limited should be nicer.

This is a rather well-contained change.  One obvious improvement would
be sorting the elements in one bucket according to their hash, then
using binary probing to find the elements with the right hash value.

As it stands, the output should be strictly the same as previously
unless one uses the option for limiting the amount of used memory, in
which case the created packs might be better.

Signed-off-by: David Kastrup <dak@gnu.org>
---
 diff-delta.c |   74 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/diff-delta.c b/diff-delta.c
index 0dde2f2..1b4b1c1 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -115,9 +115,13 @@ static const unsigned int U[256] = {
 struct index_entry {
 	const unsigned char *ptr;
 	unsigned int val;
-	struct index_entry *next;
 };
 
+struct unpacked_index_entry {
+	struct index_entry entry;
+	struct unpacked_index_entry *next;
+};	
+
 struct delta_index {
 	unsigned long memsize;
 	const void *src_buf;
@@ -131,7 +135,8 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
 	const unsigned char *data, *buffer = buf;
 	struct delta_index *index;
-	struct index_entry *entry, **hash;
+	struct unpacked_index_entry *entry, **hash;
+	struct index_entry *aentry, **ahash;
 	void *mem;
 	unsigned long memsize;
 
@@ -148,28 +153,21 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	hmask = hsize - 1;
 
 	/* allocate lookup index */
-	memsize = sizeof(*index) +
-		  sizeof(*hash) * hsize +
+	memsize = sizeof(*hash) * hsize +
 		  sizeof(*entry) * entries;
 	mem = malloc(memsize);
 	if (!mem)
 		return NULL;
-	index = mem;
-	mem = index + 1;
 	hash = mem;
 	mem = hash + hsize;
 	entry = mem;
 
-	index->memsize = memsize;
-	index->src_buf = buf;
-	index->src_size = bufsize;
-	index->hash_mask = hmask;
 	memset(hash, 0, hsize * sizeof(*hash));
 
 	/* allocate an array to count hash entries */
 	hash_count = calloc(hsize, sizeof(*hash_count));
 	if (!hash_count) {
-		free(index);
+		free(hash);
 		return NULL;
 	}
 
@@ -183,12 +181,13 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
 		if (val == prev_val) {
 			/* keep the lowest of consecutive identical blocks */
-			entry[-1].ptr = data + RABIN_WINDOW;
+			entry[-1].entry.ptr = data + RABIN_WINDOW;
+			--entries;
 		} else {
 			prev_val = val;
 			i = val & hmask;
-			entry->ptr = data + RABIN_WINDOW;
-			entry->val = val;
+			entry->entry.ptr = data + RABIN_WINDOW;
+			entry->entry.val = val;
 			entry->next = hash[i];
 			hash[i] = entry++;
 			hash_count[i]++;
@@ -212,16 +211,59 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			continue;
 		entry = hash[i];
 		do {
-			struct index_entry *keep = entry;
+			struct unpacked_index_entry *keep = entry;
 			int skip = hash_count[i] / HASH_LIMIT;
 			do {
+				--entries;
 				entry = entry->next;
 			} while(--skip && entry);
+			++entries;
 			keep->next = entry;
 		} while(entry);
 	}
 	free(hash_count);
 
+	/* Now create the packed index in array form rather than
+	 * linked lists */
+
+	memsize = sizeof(*index)
+		+ sizeof(*ahash) * (hsize+1)
+		+ sizeof(*aentry) * entries;
+
+	mem = malloc(memsize);
+
+	if (!mem) {
+		free(hash);
+		return NULL;
+	}
+
+	index = mem;
+	index->memsize = memsize;
+	index->src_buf = buf;
+	index->src_size = bufsize;
+	index->hash_mask = hmask;
+
+	mem = index + 1;
+	ahash = mem;
+	mem = ahash + (hsize+1);
+	aentry = mem;
+
+	/* Coalesce all entries belonging to one linked list into
+	 * consecutive array entries */
+
+	for (i = 0; i < hsize; i++) {
+		ahash[i] = aentry;
+		for (entry = hash[i]; entry; entry = entry->next)
+			*aentry++ = entry->entry;
+	}
+
+	/* Sentinel value to indicate the length of the last hash
+	 * bucket */
+
+	ahash[hsize] = aentry;
+	assert(aentry - (struct index_entry *)mem == entries);
+	free(hash);
+
 	return index;
 }
 
@@ -302,7 +344,7 @@ create_delta(const struct delta_index *index,
 			val ^= U[data[-RABIN_WINDOW]];
 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
 			i = val & index->hash_mask;
-			for (entry = index->hash[i]; entry; entry = entry->next) {
+			for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
 				const unsigned char *ref = entry->ptr;
 				const unsigned char *src = data;
 				unsigned int ref_size = ref_top - ref;
-- 
1.5.3.GIT

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH] diff-delta.c: pack the index structure
  2007-09-08  8:36   ` David Kastrup
@ 2007-09-08 20:50     ` David Kastrup
  0 siblings, 0 replies; 7+ messages in thread
From: David Kastrup @ 2007-09-08 20:50 UTC (permalink / raw)
  To: Nicolas Pitre; +Cc: git

[-- Attachment #1: Type: text/plain, Size: 1794 bytes --]

David Kastrup <dak@gnu.org> writes:

> Another option avoiding the realloc would be not to use linked lists
> at all but just collect all entries sequentually in "packed" form,
> and sort them into order afterwards.  That's an O(n lg n) operation
> with a large n.  Even if one has a sorting algorithm with good
> memory locality, I doubt that the locality would compensate for the
> lg n factor, even when taking into account that we save ourselves
> the copying.  And that is not even taken the possibility of having
> to cull some buckets into account, another O(n) operation (which
> amounts to copying everything once, too).

As an illustration, I have implemented a proof-of-concept.  This is a
patch (not formatted for submission) against the current master.  It
takes about a 20% performance hit on the whole git-pack-objects run,
so the diffing code itself is likely affected worse.  For the
proof-of-concept I did not do the hash bucket collision culling, nor
did I realloc to the possibly reduced size.  So a fairer comparison
would likely involve removing the culling code from master (or
implementing the culling in the proof-of-concept, but I am too lazy
for that).

I hope this addresses the not-in-place complaint.  It is conceivable
that under tight memory conditions, this approach might actually work
out better.  However, the glibc qsort implementation (at least it did
so at one time) reverts to a mergesort for large numbers, and
allocates a temporary buffer of the same size as the original data.
If that is still the case, the memory impact would actually be worse
here.  One would need to implement a custom _real_ in place sorter to
get around this.  All in all, I don't think this approach is worth the
trouble: the sorting overhead factor of lg n is just too much.


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: Type: text/x-patch, Size: 4158 bytes --]

diff --git a/diff-delta.c b/diff-delta.c
index 0dde2f2..f6e2416 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -115,7 +115,6 @@ static const unsigned int U[256] = {
 struct index_entry {
 	const unsigned char *ptr;
 	unsigned int val;
-	struct index_entry *next;
 };
 
 struct delta_index {
@@ -126,12 +125,24 @@ struct delta_index {
 	struct index_entry *hash[FLEX_ARRAY];
 };
 
+static unsigned int cmp_hmask;
+
+static int cmp_index_entry(const void *va, const void *vb)
+{
+	const struct index_entry *a = va, *b = vb;
+	if ((a->val & cmp_hmask) != (b->val & cmp_hmask))
+		return (a->val & cmp_hmask) < (b->val & cmp_hmask) ? -1 : 1;
+	if (a->val != b->val)
+		return a->val < b->val ? -1 : 1;
+	return a->ptr < b->ptr ? -1 : 1;
+}
+
 struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 {
-	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
+	unsigned int i, hsize, hmask, entries, prev_val, ihash;
 	const unsigned char *data, *buffer = buf;
 	struct delta_index *index;
-	struct index_entry *entry, **hash;
+	struct index_entry *entry, **hash, *entry_base;
 	void *mem;
 	unsigned long memsize;
 
@@ -149,7 +160,7 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 
 	/* allocate lookup index */
 	memsize = sizeof(*index) +
-		  sizeof(*hash) * hsize +
+		  sizeof(*hash) * (hsize + 1) +
 		  sizeof(*entry) * entries;
 	mem = malloc(memsize);
 	if (!mem)
@@ -157,21 +168,14 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	index = mem;
 	mem = index + 1;
 	hash = mem;
-	mem = hash + hsize;
-	entry = mem;
+	mem = hash + hsize + 1;
+	entry_base = mem;
+	entry = entry_base;
 
 	index->memsize = memsize;
 	index->src_buf = buf;
 	index->src_size = bufsize;
 	index->hash_mask = hmask;
-	memset(hash, 0, hsize * sizeof(*hash));
-
-	/* allocate an array to count hash entries */
-	hash_count = calloc(hsize, sizeof(*hash_count));
-	if (!hash_count) {
-		free(index);
-		return NULL;
-	}
 
 	/* then populate the index */
 	prev_val = ~0;
@@ -184,29 +188,33 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 		if (val == prev_val) {
 			/* keep the lowest of consecutive identical blocks */
 			entry[-1].ptr = data + RABIN_WINDOW;
+			--entries;
 		} else {
 			prev_val = val;
 			i = val & hmask;
 			entry->ptr = data + RABIN_WINDOW;
 			entry->val = val;
-			entry->next = hash[i];
-			hash[i] = entry++;
-			hash_count[i]++;
+			entry++;
 		}
 	}
+	cmp_hmask = hmask;
+	qsort(entry_base, entries, sizeof(struct index_entry), cmp_index_entry);
+	ihash = 0;
+	
+	for (i=0; i<entries; i++) {
+		entry = &entry_base[i];
+		if ((entry->val & hmask) >= ihash) {
+			/* cull here */
+			do {
+				hash[ihash++] = entry;
+			} while ((entry->val & hmask) >= ihash);
+		}
+	}
+	while (ihash <= hsize) {
+		hash[ihash++] = &entry_base[entries];
+	}
 
-	/*
-	 * Determine a limit on the number of entries in the same hash
-	 * bucket.  This guards us against pathological data sets causing
-	 * really bad hash distribution with most entries in the same hash
-	 * bucket that would bring us to O(m*n) computing costs (m and n
-	 * corresponding to reference and target buffer sizes).
-	 *
-	 * Make sure none of the hash buckets has more entries than
-	 * we're willing to test.  Otherwise we cull the entry list
-	 * uniformly to still preserve a good repartition across
-	 * the reference buffer.
-	 */
+#if 0
 	for (i = 0; i < hsize; i++) {
 		if (hash_count[i] < HASH_LIMIT)
 			continue;
@@ -221,6 +229,7 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 		} while(entry);
 	}
 	free(hash_count);
+#endif
 
 	return index;
 }
@@ -302,7 +311,7 @@ create_delta(const struct delta_index *index,
 			val ^= U[data[-RABIN_WINDOW]];
 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
 			i = val & index->hash_mask;
-			for (entry = index->hash[i]; entry; entry = entry->next) {
+			for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
 				const unsigned char *ref = entry->ptr;
 				const unsigned char *src = data;
 				unsigned int ref_size = ref_top - ref;

[-- Attachment #3: Type: text/plain, Size: 52 bytes --]



-- 
David Kastrup, Kriemhildstr. 15, 44793 Bochum

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [PATCH] diff-delta.c: pack the index structure
@ 2007-09-08 21:17 David Kastrup
  0 siblings, 0 replies; 7+ messages in thread
From: David Kastrup @ 2007-09-08 21:17 UTC (permalink / raw)
  To: git

In normal use cases, the performance wins are not overly impressive:
we get something like 5-10% due to the slightly better locality of
memory accesses using the packed structure.

However, since the data structure for index entries saves 33% of
memory on 32-bit platforms and 40% on 64-bit platforms, the behavior
when memory gets limited should be nicer.

This is a rather well-contained change.  One obvious improvement would
be sorting the elements in one bucket according to their hash, then
using binary probing to find the elements with the right hash value.

As it stands, the output should be strictly the same as previously
unless one uses the option for limiting the amount of used memory, in
which case the created packs might be better.

Signed-off-by: David Kastrup <dak@gnu.org>
Acked-by: Nicolas Pitre <nico@cam.org>
---
 diff-delta.c |   74 +++++++++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 58 insertions(+), 16 deletions(-)

diff --git a/diff-delta.c b/diff-delta.c
index 0dde2f2..d355e5e 100644
--- a/diff-delta.c
+++ b/diff-delta.c
@@ -115,9 +115,13 @@ static const unsigned int U[256] = {
 struct index_entry {
 	const unsigned char *ptr;
 	unsigned int val;
-	struct index_entry *next;
 };
 
+struct unpacked_index_entry {
+	struct index_entry entry;
+	struct unpacked_index_entry *next;
+};	
+
 struct delta_index {
 	unsigned long memsize;
 	const void *src_buf;
@@ -131,7 +135,8 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	unsigned int i, hsize, hmask, entries, prev_val, *hash_count;
 	const unsigned char *data, *buffer = buf;
 	struct delta_index *index;
-	struct index_entry *entry, **hash;
+	struct unpacked_index_entry *entry, **hash;
+	struct index_entry *packed_entry, **packed_hash;
 	void *mem;
 	unsigned long memsize;
 
@@ -148,28 +153,21 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 	hmask = hsize - 1;
 
 	/* allocate lookup index */
-	memsize = sizeof(*index) +
-		  sizeof(*hash) * hsize +
+	memsize = sizeof(*hash) * hsize +
 		  sizeof(*entry) * entries;
 	mem = malloc(memsize);
 	if (!mem)
 		return NULL;
-	index = mem;
-	mem = index + 1;
 	hash = mem;
 	mem = hash + hsize;
 	entry = mem;
 
-	index->memsize = memsize;
-	index->src_buf = buf;
-	index->src_size = bufsize;
-	index->hash_mask = hmask;
 	memset(hash, 0, hsize * sizeof(*hash));
 
 	/* allocate an array to count hash entries */
 	hash_count = calloc(hsize, sizeof(*hash_count));
 	if (!hash_count) {
-		free(index);
+		free(hash);
 		return NULL;
 	}
 
@@ -183,12 +181,13 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			val = ((val << 8) | data[i]) ^ T[val >> RABIN_SHIFT];
 		if (val == prev_val) {
 			/* keep the lowest of consecutive identical blocks */
-			entry[-1].ptr = data + RABIN_WINDOW;
+			entry[-1].entry.ptr = data + RABIN_WINDOW;
+			--entries;
 		} else {
 			prev_val = val;
 			i = val & hmask;
-			entry->ptr = data + RABIN_WINDOW;
-			entry->val = val;
+			entry->entry.ptr = data + RABIN_WINDOW;
+			entry->entry.val = val;
 			entry->next = hash[i];
 			hash[i] = entry++;
 			hash_count[i]++;
@@ -212,16 +211,59 @@ struct delta_index * create_delta_index(const void *buf, unsigned long bufsize)
 			continue;
 		entry = hash[i];
 		do {
-			struct index_entry *keep = entry;
+			struct unpacked_index_entry *keep = entry;
 			int skip = hash_count[i] / HASH_LIMIT;
 			do {
+				--entries;
 				entry = entry->next;
 			} while(--skip && entry);
+			++entries;
 			keep->next = entry;
 		} while(entry);
 	}
 	free(hash_count);
 
+	/* Now create the packed index in array form rather than
+	 * linked lists */
+
+	memsize = sizeof(*index)
+		+ sizeof(*packed_hash) * (hsize+1)
+		+ sizeof(*packed_entry) * entries;
+
+	mem = malloc(memsize);
+
+	if (!mem) {
+		free(hash);
+		return NULL;
+	}
+
+	index = mem;
+	index->memsize = memsize;
+	index->src_buf = buf;
+	index->src_size = bufsize;
+	index->hash_mask = hmask;
+
+	mem = index + 1;
+	packed_hash = mem;
+	mem = packed_hash + (hsize+1);
+	packed_entry = mem;
+
+	/* Coalesce all entries belonging to one linked list into
+	 * consecutive array entries */
+
+	for (i = 0; i < hsize; i++) {
+		packed_hash[i] = packed_entry;
+		for (entry = hash[i]; entry; entry = entry->next)
+			*packed_entry++ = entry->entry;
+	}
+
+	/* Sentinel value to indicate the length of the last hash
+	 * bucket */
+
+	packed_hash[hsize] = packed_entry;
+	assert(packed_entry - (struct index_entry *)mem == entries);
+	free(hash);
+
 	return index;
 }
 
@@ -302,7 +344,7 @@ create_delta(const struct delta_index *index,
 			val ^= U[data[-RABIN_WINDOW]];
 			val = ((val << 8) | *data) ^ T[val >> RABIN_SHIFT];
 			i = val & index->hash_mask;
-			for (entry = index->hash[i]; entry; entry = entry->next) {
+			for (entry = index->hash[i]; entry < index->hash[i+1]; entry++) {
 				const unsigned char *ref = entry->ptr;
 				const unsigned char *src = data;
 				unsigned int ref_size = ref_top - ref;
-- 
1.5.3.GIT

^ permalink raw reply related	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2007-09-08 21:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2007-09-07 23:38 [PATCH] diff-delta.c: pack the index structure David Kastrup
2007-09-08  2:34 ` Nicolas Pitre
2007-09-08  6:48   ` David Kastrup
2007-09-08  8:36   ` David Kastrup
2007-09-08 20:50     ` David Kastrup
  -- strict thread matches above, loose matches on Subject: below --
2007-09-08  9:31 David Kastrup
2007-09-08 21:17 David Kastrup

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).