From: Nicolas Pitre <nico@cam.org>
To: "Shawn O. Pearce" <spearce@spearce.org>
Cc: Jon Smirl <jonsmirl@gmail.com>,
Martin Koegler <mkoegler@auto.tuwien.ac.at>,
Git Mailing List <git@vger.kernel.org>
Subject: Re: performance on repack
Date: Thu, 30 Aug 2007 00:36:13 -0400 (EDT) [thread overview]
Message-ID: <alpine.LFD.0.999.0708300033540.16727@xanadu.home> (raw)
In-Reply-To: <alpine.LFD.0.999.0708300005110.16727@xanadu.home>
On Thu, 30 Aug 2007, Nicolas Pitre wrote:
> Well, here is a quick implementation of this idea for those who would
> like to give it a try.
[...]
Well, that would help if I provided the full diff of course.
---
diff --git a/Makefile b/Makefile
index 4eb4637..c3c6e68 100644
--- a/Makefile
+++ b/Makefile
@@ -122,6 +122,9 @@ all::
# If not set it defaults to the bare 'wish'. If it is set to the empty
# string then NO_TCLTK will be forced (this is used by configure script).
#
+# Define THREADED_DELTA_SEARCH if you have pthreads and wish to exploit
+# parallel delta searching when packing objects.
+#
GIT-VERSION-FILE: .FORCE-GIT-VERSION-FILE
@$(SHELL_PATH) ./GIT-VERSION-GEN
@@ -662,6 +665,11 @@ ifdef NO_HSTRERROR
COMPAT_OBJS += compat/hstrerror.o
endif
+ifdef THREADED_DELTA_SEARCH
+ BASIC_CFLAGS += -DTHREADED_DELTA_SEARCH
+ EXTLIBS += -lpthread
+endif
+
ifeq ($(TCLTK_PATH),)
NO_TCLTK=NoThanks
endif
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 9b3ef94..7d68f82 100644
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
@@ -15,6 +15,10 @@
#include "list-objects.h"
#include "progress.h"
+#ifdef THREADED_DELTA_SEARCH
+#include <pthread.h>
+#endif
+
static const char pack_usage[] = "\
git-pack-objects [{ -q | --progress | --all-progress }] \n\
[--max-pack-size=N] [--local] [--incremental] \n\
@@ -78,7 +82,6 @@ static unsigned long delta_cache_size = 0;
static unsigned long max_delta_cache_size = 0;
static unsigned long cache_max_small_delta_size = 1000;
-static unsigned long window_memory_usage = 0;
static unsigned long window_memory_limit = 0;
/*
@@ -1291,6 +1294,20 @@ static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
return 0;
}
+#ifdef THREADED_DELTA_SEARCH
+
+static pthread_mutex_t read_sha1_file_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+#define read_lock() pthread_mutex_lock(&read_sha1_file_mutex)
+#define read_unlock() pthread_mutex_unlock(&read_sha1_file_mutex)
+
+#else
+
+#define read_lock() 0
+#define read_unlock() 0
+
+#endif
+
/*
* We search for deltas _backwards_ in a list sorted by type and
* by size, so that we see progressively smaller and smaller files.
@@ -1300,7 +1317,7 @@ static int delta_cacheable(unsigned long src_size, unsigned long trg_size,
* one.
*/
static int try_delta(struct unpacked *trg, struct unpacked *src,
- unsigned max_depth)
+ unsigned max_depth, unsigned long *mem_usage)
{
struct object_entry *trg_entry = trg->entry;
struct object_entry *src_entry = src->entry;
@@ -1313,12 +1330,6 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
if (trg_entry->type != src_entry->type)
return -1;
- /* We do not compute delta to *create* objects we are not
- * going to pack.
- */
- if (trg_entry->preferred_base)
- return -1;
-
/*
* We do not bother to try a delta that we discarded
* on an earlier try, but only when reusing delta data.
@@ -1355,24 +1366,28 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
/* Load data if not already done */
if (!trg->data) {
+ read_lock();
trg->data = read_sha1_file(trg_entry->idx.sha1, &type, &sz);
+ read_unlock();
if (!trg->data)
die("object %s cannot be read",
sha1_to_hex(trg_entry->idx.sha1));
if (sz != trg_size)
die("object %s inconsistent object length (%lu vs %lu)",
sha1_to_hex(trg_entry->idx.sha1), sz, trg_size);
- window_memory_usage += sz;
+ *mem_usage += sz;
}
if (!src->data) {
+ read_lock();
src->data = read_sha1_file(src_entry->idx.sha1, &type, &sz);
+ read_unlock();
if (!src->data)
die("object %s cannot be read",
sha1_to_hex(src_entry->idx.sha1));
if (sz != src_size)
die("object %s inconsistent object length (%lu vs %lu)",
sha1_to_hex(src_entry->idx.sha1), sz, src_size);
- window_memory_usage += sz;
+ *mem_usage += sz;
}
if (!src->index) {
src->index = create_delta_index(src->data, src_size);
@@ -1382,7 +1397,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
warning("suboptimal pack - out of memory");
return 0;
}
- window_memory_usage += sizeof_delta_index(src->index);
+ *mem_usage += sizeof_delta_index(src->index);
}
delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
@@ -1425,68 +1440,59 @@ static unsigned int check_delta_limit(struct object_entry *me, unsigned int n)
return m;
}
-static void free_unpacked(struct unpacked *n)
+static unsigned long free_unpacked(struct unpacked *n)
{
- window_memory_usage -= sizeof_delta_index(n->index);
+ unsigned long freed_mem = sizeof_delta_index(n->index);
free_delta_index(n->index);
n->index = NULL;
if (n->data) {
+ freed_mem += n->entry->size;
free(n->data);
n->data = NULL;
- window_memory_usage -= n->entry->size;
}
n->entry = NULL;
n->depth = 0;
+ return freed_mem;
}
-static void find_deltas(struct object_entry **list, int window, int depth)
+static void find_deltas(struct object_entry **list, unsigned list_size,
+ unsigned nr_deltas, int window, int depth)
{
- uint32_t i = nr_objects, idx = 0, count = 0, processed = 0;
+ uint32_t i = list_size, idx = 0, count = 0, processed = 0;
unsigned int array_size = window * sizeof(struct unpacked);
struct unpacked *array;
- int max_depth;
+ unsigned long mem_usage = 0;
- if (!nr_objects)
- return;
array = xmalloc(array_size);
memset(array, 0, array_size);
if (progress)
- start_progress(&progress_state, "Deltifying %u objects...", "", nr_result);
+ start_progress(&progress_state, "Deltifying %u objects...", "", nr_deltas);
do {
struct object_entry *entry = list[--i];
struct unpacked *n = array + idx;
- int j;
-
- if (!entry->preferred_base)
- processed++;
+ int j, max_depth;
- if (progress)
- display_progress(&progress_state, processed);
-
- if (entry->delta)
- /* This happens if we decided to reuse existing
- * delta from a pack. "!no_reuse_delta &&" is implied.
- */
- continue;
-
- if (entry->size < 50)
- continue;
-
- if (entry->no_try_delta)
- continue;
-
- free_unpacked(n);
+ mem_usage -= free_unpacked(n);
n->entry = entry;
while (window_memory_limit &&
- window_memory_usage > window_memory_limit &&
+ mem_usage > window_memory_limit &&
count > 1) {
uint32_t tail = (idx + window - count) % window;
- free_unpacked(array + tail);
+ mem_usage -= free_unpacked(array + tail);
count--;
}
+ /* We do not compute delta to *create* objects we are not
+ * going to pack.
+ */
+ if (entry->preferred_base)
+ goto next;
+
+ if (progress)
+ display_progress(&progress_state, ++processed);
+
/*
* If the current object is at pack edge, take the depth the
* objects that depend on the current object into account
@@ -1508,7 +1514,7 @@ static void find_deltas(struct object_entry **list, int window, int depth)
m = array + other_idx;
if (!m->entry)
break;
- if (try_delta(n, m, max_depth) < 0)
+ if (try_delta(n, m, max_depth, &mem_usage) < 0)
break;
}
@@ -1537,21 +1543,94 @@ static void find_deltas(struct object_entry **list, int window, int depth)
free(array);
}
+#ifdef THREADED_DELTA_SEARCH
+
+struct thread_params {
+ pthread_t thread;
+ struct object_entry **list;
+ unsigned list_size;
+ unsigned nr_deltas;
+ int window;
+ int depth;
+};
+
+static void *threaded_find_deltas(void *arg)
+{
+ struct thread_params *p = arg;
+ if (p->list_size)
+ find_deltas(p->list, p->list_size, p->nr_deltas,
+ p->window, p->depth);
+ return NULL;
+}
+
+static void ll_find_deltas(struct object_entry **list, unsigned list_size,
+ unsigned nr_deltas, int window, int depth)
+{
+ struct thread_params p[4];
+ int i, ret;
+
+ for (i = 0; i < 4; i++) {
+ unsigned sublist_size = list_size / (4 - i);
+ p[i].list = list;
+ p[i].list_size = sublist_size;
+ p[i].nr_deltas = nr_deltas;
+ p[i].window = window;
+ p[i].depth = depth;
+ ret = pthread_create(&p[i].thread, NULL,
+ threaded_find_deltas, &p[i]);
+ if (ret)
+ die("unable to create thread: %s", strerror(ret));
+ list += sublist_size;
+ list_size -= sublist_size;
+ }
+
+ for (i = 0; i < 4; i++) {
+ pthread_join(p[i].thread, NULL);
+ }
+}
+
+#else
+#define ll_find_deltas find_deltas
+#endif
+
static void prepare_pack(int window, int depth)
{
struct object_entry **delta_list;
- uint32_t i;
+ uint32_t i, n, nr_deltas;
get_object_details();
- if (!window || !depth)
+ if (!nr_objects || !window || !depth)
return;
delta_list = xmalloc(nr_objects * sizeof(*delta_list));
- for (i = 0; i < nr_objects; i++)
- delta_list[i] = objects + i;
- qsort(delta_list, nr_objects, sizeof(*delta_list), type_size_sort);
- find_deltas(delta_list, window+1, depth);
+ nr_deltas = n = 0;
+
+ for (i = 0; i < nr_objects; i++) {
+ struct object_entry *entry = objects + i;
+
+ if (entry->delta)
+ /* This happens if we decided to reuse existing
+ * delta from a pack. "!no_reuse_delta &&" is implied.
+ */
+ continue;
+
+ if (entry->size < 50)
+ continue;
+
+ if (entry->no_try_delta)
+ continue;
+
+ if (!entry->preferred_base)
+ nr_deltas++;
+
+ delta_list[n++] = entry;
+ }
+
+ if (nr_deltas) {
+ qsort(delta_list, n, sizeof(*delta_list), type_size_sort);
+ ll_find_deltas(delta_list, n, nr_deltas, window+1, depth);
+ }
free(delta_list);
}
next prev parent reply other threads:[~2007-08-30 4:36 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2007-08-11 21:12 performance on repack Jon Smirl
2007-08-11 22:09 ` David Kastrup
2007-08-11 22:34 ` Linus Torvalds
2007-08-11 23:21 ` Jon Smirl
2007-08-12 10:33 ` Martin Koegler
2007-08-12 13:49 ` Jon Smirl
2007-08-14 3:12 ` Shawn O. Pearce
2007-08-14 4:10 ` Jon Smirl
2007-08-14 5:13 ` Shawn O. Pearce
2007-08-14 5:57 ` Jon Smirl
2007-08-14 14:52 ` Nicolas Pitre
2007-08-14 21:41 ` Nicolas Pitre
2007-08-15 1:20 ` Jon Smirl
2007-08-15 1:59 ` Nicolas Pitre
2007-08-15 5:32 ` Shawn O. Pearce
2007-08-15 15:08 ` Jon Smirl
2007-08-15 17:11 ` Martin Koegler
2007-08-15 18:38 ` Jon Smirl
2007-08-15 19:00 ` Nicolas Pitre
2007-08-15 19:42 ` Jon Smirl
2007-08-16 8:10 ` David Kastrup
2007-08-16 15:34 ` Nicolas Pitre
2007-08-16 16:13 ` Jon Smirl
2007-08-16 16:21 ` Nicolas Pitre
2007-08-15 21:05 ` Nicolas Pitre
2007-08-15 20:49 ` Nicolas Pitre
2007-08-30 4:27 ` Nicolas Pitre
2007-08-30 4:36 ` Nicolas Pitre [this message]
2007-08-30 16:17 ` Jon Smirl
2007-09-01 21:54 ` Jon Smirl
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=alpine.LFD.0.999.0708300033540.16727@xanadu.home \
--to=nico@cam.org \
--cc=git@vger.kernel.org \
--cc=jonsmirl@gmail.com \
--cc=mkoegler@auto.tuwien.ac.at \
--cc=spearce@spearce.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).