From: Jeff King <peff@peff.net>
To: git@vger.kernel.org
Cc: Duy Nguyen <pclouds@gmail.com>, "Shawn O. Pearce" <spearce@spearce.org>
Subject: [PATCH 4/6] introduce a commit metapack
Date: Tue, 29 Jan 2013 04:16:11 -0500 [thread overview]
Message-ID: <20130129091610.GD9999@sigill.intra.peff.net> (raw)
In-Reply-To: <20130129091434.GA6975@sigill.intra.peff.net>
When we are doing a commit traversal that does not need to
look at the commit messages themselves (e.g., rev-list,
merge-base, etc), we spend a lot of time accessing,
decompressing, and parsing the commit objects just to find
the parent and timestamp information. We can make a
space-time tradeoff by caching that information on disk in a
compact, uncompressed format.
TODO: document on-disk format in Documentation/technical
TODO: document API
Signed-off-by: Jeff King <peff@peff.net>
---
Makefile | 2 +
commit-metapack.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
commit-metapack.h | 12 ++++
3 files changed, 189 insertions(+)
create mode 100644 commit-metapack.c
create mode 100644 commit-metapack.h
diff --git a/Makefile b/Makefile
index 3e4ae1b..6ca5320 100644
--- a/Makefile
+++ b/Makefile
@@ -619,6 +619,7 @@ LIB_H += column.h
LIB_H += cache.h
LIB_H += color.h
LIB_H += column.h
+LIB_H += commit-metapack.h
LIB_H += commit.h
LIB_H += compat/bswap.h
LIB_H += compat/cygwin.h
@@ -730,6 +731,7 @@ LIB_OBJS += combine-diff.o
LIB_OBJS += color.o
LIB_OBJS += column.o
LIB_OBJS += combine-diff.o
+LIB_OBJS += commit-metapack.o
LIB_OBJS += commit.o
LIB_OBJS += compat/obstack.o
LIB_OBJS += compat/terminal.o
diff --git a/commit-metapack.c b/commit-metapack.c
new file mode 100644
index 0000000..2c19f48
--- /dev/null
+++ b/commit-metapack.c
@@ -0,0 +1,175 @@
+#include "cache.h"
+#include "commit-metapack.h"
+#include "metapack.h"
+#include "commit.h"
+#include "sha1-lookup.h"
+
+struct commit_metapack {
+ struct metapack mp;
+ uint32_t nr;
+ unsigned char *index;
+ unsigned char *data;
+ struct commit_metapack *next;
+};
+static struct commit_metapack *commit_metapacks;
+
+static struct commit_metapack *alloc_commit_metapack(struct packed_git *pack)
+{
+ struct commit_metapack *it = xcalloc(1, sizeof(*it));
+ uint32_t version;
+
+ if (metapack_init(&it->mp, pack, "commits", &version) < 0) {
+ free(it);
+ return NULL;
+ }
+ if (version != 1) {
+ /*
+ * This file comes from a more recent git version. Don't bother
+ * warning the user, as we'll just fallback to reading the
+ * commits.
+ */
+ metapack_close(&it->mp);
+ free(it);
+ return NULL;
+ }
+
+ if (it->mp.len < 4) {
+ warning("commit metapack for '%s' is truncated", pack->pack_name);
+ metapack_close(&it->mp);
+ free(it);
+ return NULL;
+ }
+ memcpy(&it->nr, it->mp.data, 4);
+ it->nr = ntohl(it->nr);
+
+ /*
+ * We need 84 bytes for each entry: sha1(20), date(4), tree(20),
+ * parents(40).
+ */
+ if (it->mp.len < (84 * it->nr + 4)) {
+ warning("commit metapack for '%s' is truncated", pack->pack_name);
+ metapack_close(&it->mp);
+ free(it);
+ return NULL;
+ }
+
+ it->index = it->mp.data + 4;
+ it->data = it->index + 20 * it->nr;
+
+ return it;
+}
+
+static void prepare_commit_metapacks(void)
+{
+ static int initialized;
+ struct commit_metapack **tail = &commit_metapacks;
+ struct packed_git *p;
+
+ if (initialized)
+ return;
+
+ prepare_packed_git();
+ for (p = packed_git; p; p = p->next) {
+ struct commit_metapack *it = alloc_commit_metapack(p);
+
+ if (it) {
+ *tail = it;
+ tail = &it->next;
+ }
+ }
+
+ initialized = 1;
+}
+
+int commit_metapack(unsigned char *sha1,
+ uint32_t *timestamp,
+ unsigned char **tree,
+ unsigned char **parent1,
+ unsigned char **parent2)
+{
+ struct commit_metapack *p;
+
+ prepare_commit_metapacks();
+ for (p = commit_metapacks; p; p = p->next) {
+ unsigned char *data;
+ int pos = sha1_entry_pos(p->index, 20, 0, 0, p->nr, p->nr, sha1);
+ if (pos < 0)
+ continue;
+
+ /* timestamp(4) + tree(20) + parents(40) */
+ data = p->data + 64 * pos;
+ *timestamp = *(uint32_t *)data;
+ *timestamp = ntohl(*timestamp);
+ data += 4;
+ *tree = data;
+ data += 20;
+ *parent1 = data;
+ data += 20;
+ *parent2 = data;
+
+ return 0;
+ }
+
+ return -1;
+}
+
+static void get_commits(struct metapack_writer *mw,
+ const unsigned char *sha1,
+ void *data)
+{
+ struct commit_list ***tail = data;
+ enum object_type type = sha1_object_info(sha1, NULL);
+ struct commit *c;
+
+ if (type != OBJ_COMMIT)
+ return;
+
+ c = lookup_commit(sha1);
+ if (!c || parse_commit(c))
+ die("unable to read commit %s", sha1_to_hex(sha1));
+
+ /*
+ * Our fixed-size parent list cannot represent root commits, nor
+ * octopus merges. Just skip those commits, as we can fallback
+ * in those rare cases to reading the actual commit object.
+ */
+ if (!c->parents ||
+ (c->parents && c->parents->next && c->parents->next->next))
+ return;
+
+ *tail = &commit_list_insert(c, *tail)->next;
+}
+
+void commit_metapack_write(const char *idx)
+{
+ struct metapack_writer mw;
+ struct commit_list *commits = NULL, *p;
+ struct commit_list **tail = &commits;
+ uint32_t nr = 0;
+
+ metapack_writer_init(&mw, idx, "commits", 1);
+
+ /* Figure out how many eligible commits we've got in this pack. */
+ metapack_writer_foreach(&mw, get_commits, &tail);
+ for (p = commits; p; p = p->next)
+ nr++;
+ metapack_writer_add_uint32(&mw, nr);
+
+ /* Then write an index of commit sha1s */
+ for (p = commits; p; p = p->next)
+ metapack_writer_add(&mw, p->item->object.sha1, 20);
+
+ /* Followed by the actual date/tree/parents data */
+ for (p = commits; p; p = p->next) {
+ struct commit *c = p->item;
+ metapack_writer_add_uint32(&mw, c->date);
+ metapack_writer_add(&mw, c->tree->object.sha1, 20);
+ metapack_writer_add(&mw, c->parents->item->object.sha1, 20);
+ metapack_writer_add(&mw,
+ c->parents->next ?
+ c->parents->next->item->object.sha1 :
+ null_sha1, 20);
+ }
+
+ metapack_writer_finish(&mw);
+}
diff --git a/commit-metapack.h b/commit-metapack.h
new file mode 100644
index 0000000..4684573
--- /dev/null
+++ b/commit-metapack.h
@@ -0,0 +1,12 @@
+#ifndef METAPACK_COMMIT_H
+#define METAPACK_COMMIT_H
+
+int commit_metapack(unsigned char *sha1,
+ uint32_t *timestamp,
+ unsigned char **tree,
+ unsigned char **parent1,
+ unsigned char **parent2);
+
+void commit_metapack_write(const char *idx_file);
+
+#endif
--
1.8.0.2.16.g72e2fc9
next prev parent reply other threads:[~2013-01-29 9:16 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-01-29 9:14 [PATCH/RFC 0/6] commit caching Jeff King
2013-01-29 9:15 ` [PATCH 1/6] csum-file: make sha1write const-correct Jeff King
2013-01-29 9:15 ` [PATCH 2/6] strbuf: add string-chomping functions Jeff King
2013-01-29 10:15 ` Michael Haggerty
2013-01-29 11:10 ` Jeff King
2013-01-30 5:00 ` Michael Haggerty
2013-01-29 9:15 ` [PATCH 3/6] introduce pack metadata cache files Jeff King
2013-01-29 17:35 ` Junio C Hamano
2013-01-30 6:47 ` Jeff King
2013-01-30 1:30 ` Duy Nguyen
2013-01-30 6:50 ` Jeff King
2013-01-29 9:16 ` Jeff King [this message]
2013-01-29 10:24 ` [PATCH 4/6] introduce a commit metapack Michael Haggerty
2013-01-29 11:13 ` Jeff King
2013-01-29 17:38 ` Junio C Hamano
2013-01-29 18:08 ` Junio C Hamano
2013-01-30 7:12 ` Jeff King
2013-01-30 7:17 ` Junio C Hamano
2013-02-01 9:21 ` Jeff King
2013-01-30 15:56 ` Junio C Hamano
2013-01-31 17:03 ` Shawn Pearce
2013-02-01 9:42 ` Jeff King
2013-02-02 17:49 ` Junio C Hamano
2013-01-30 7:07 ` Jeff King
2013-01-30 3:36 ` Duy Nguyen
2013-01-30 7:12 ` Jeff King
2013-01-30 13:56 ` Duy Nguyen
2013-01-30 14:16 ` Duy Nguyen
2013-01-31 11:06 ` Duy Nguyen
2013-02-01 10:15 ` Jeff King
2013-02-02 9:49 ` Duy Nguyen
2013-02-01 10:40 ` Jeff King
2013-03-17 13:21 ` Duy Nguyen
2013-03-18 12:20 ` Jeff King
2013-02-01 10:00 ` Jeff King
2013-01-29 9:16 ` [PATCH 5/6] add git-metapack command Jeff King
2013-01-29 9:16 ` [PATCH 6/6] commit: look up commit info in metapack Jeff King
2013-01-30 3:31 ` [PATCH/RFC 0/6] commit caching Duy Nguyen
2013-01-30 7:18 ` Jeff King
2013-01-30 8:32 ` Duy Nguyen
2013-01-31 17:14 ` Shawn Pearce
2013-02-01 9:11 ` Jeff King
2013-02-02 10:04 ` Shawn Pearce
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20130129091610.GD9999@sigill.intra.peff.net \
--to=peff@peff.net \
--cc=git@vger.kernel.org \
--cc=pclouds@gmail.com \
--cc=spearce@spearce.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).