From: Nicolas Pitre <nico@cam.org>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: Re: kernel.org and GIT tree rebuilding
Date: Tue, 28 Jun 2005 23:55:46 -0400 (EDT) [thread overview]
Message-ID: <Pine.LNX.4.63.0506282314320.1667@localhost.localdomain> (raw)
In-Reply-To: <Pine.LNX.4.58.0506281424420.19755@ppc970.osdl.org>
On Tue, 28 Jun 2005, Linus Torvalds wrote:
>
>
> On Tue, 28 Jun 2005, Nicolas Pitre wrote:
> >
> > OK. New patch below.
>
> Dammit, I wasted all that time doing it myself.
>
> I just committed and pushed out my version. But mine also does sha1_file.c
> right, so that you can use a packed archive in .git/objects/pack. Yours
> has some other cleanups, so..
>
> Can you double-check my version (it hasn't mirrored out yet, it seems, but
> it should be there soon).
OK... See below the cleanups I merged from my version on top of yours:
pack-objects.c | 70 ++++++++++++++-----------------------------------------
pack.h | 17 ++++++++-----
unpack-objects.c | 66 +++++++++++++++++++++++++--------------------------
3 files changed, 63 insertions(+), 90 deletions(-)
I also restored my original object header size ordering (little endian)
for two reasons:
- it is much simpler to generate and therefore allows for removing
quite some code
- it allows for stable bit position which makes it much easier to look
at an hex dump of the binary data for manual debugging
Also a few code optimizations and one error return fix.
Signed-off-by: Nicolas Pitre <nico@cam.org>
diff --git a/pack-objects.c b/pack-objects.c
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -34,7 +34,7 @@ static void *delta_against(void *buf, un
if (!otherbuf)
die("unable to read %s", sha1_to_hex(entry->delta->sha1));
delta_buf = diff_delta(otherbuf, othersize,
- buf, size, &delta_size, ~0UL);
+ buf, size, &delta_size, 0UL);
if (!delta_buf || delta_size != entry->delta_size)
die("delta size changed");
free(buf);
@@ -42,54 +42,13 @@ static void *delta_against(void *buf, un
return delta_buf;
}
-/*
- * The per-object header is a pretty dense thing, which is
- * - first byte: low four bits are "size", then three bits of "type",
- * and the high bit is "size continues".
- * - each byte afterwards: low seven bits are size continuation,
- * with the high bit being "size continues"
- */
-static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
-{
- int n = 1, i;
- unsigned char c;
-
- if (type < OBJ_COMMIT || type > OBJ_DELTA)
- die("bad type %d", type);
-
- /*
- * Shift the size up by 7 bits at a time,
- * until you get bits in the "high four".
- * That will be our beginning. We'll have
- * four size bits in 28..31, then groups
- * of seven in 21..27, 14..20, 7..13 and
- * finally 0..6.
- */
- if (size) {
- n = 5;
- while (!(size & 0xfe000000)) {
- size <<= 7;
- n--;
- }
- }
- c = (type << 4) | (size >> 28);
- for (i = 1; i < n; i++) {
- *hdr++ = c | 0x80;
- c = (size >> 21) & 0x7f;
- size <<= 7;
- }
- *hdr = c;
- return n;
-}
-
static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
{
unsigned long size;
char type[10];
void *buf = read_sha1_file(entry->sha1, type, &size);
- unsigned char header[10];
+ char header[25];
unsigned hdrlen, datalen;
- enum object_type obj_type;
if (!buf)
die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -97,22 +56,31 @@ static unsigned long write_object(struct
die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
/*
- * The object header is a byte of 'type' followed by zero or
- * more bytes of length. For deltas, the 20 bytes of delta sha1
- * follows that.
+ * The object header first byte has its low 3 bits representing the
+ * object type, the 4 upper bits indicating which of the following
+ * bytes are used to build the object size. For delta objects the
+ * sha1 of the reference object is also appended.
*/
- obj_type = entry->type;
if (entry->delta) {
+ header[0] = OBJ_DELTA;
buf = delta_against(buf, size, entry);
size = entry->delta_size;
- obj_type = OBJ_DELTA;
+ } else
+ header[0] = entry->type;
+ header[0] |= size << 3;
+ hdrlen = 1;
+ datalen = size >> 4;
+ while (datalen) {
+ header[hdrlen - 1] |= 0x80;
+ header[hdrlen++] = datalen;
+ datalen >>= 7;
}
- hdrlen = encode_header(obj_type, size, header);
- sha1write(f, header, hdrlen);
if (entry->delta) {
- sha1write(f, entry->delta, 20);
+ memcpy(header+hdrlen, entry->delta, 20);
hdrlen += 20;
}
+
+ sha1write(f, header, hdrlen);
datalen = sha1write_compressed(f, buf, size);
free(buf);
return hdrlen + datalen;
diff --git a/pack.h b/pack.h
--- a/pack.h
+++ b/pack.h
@@ -1,13 +1,18 @@
#ifndef PACK_H
#define PACK_H
+/*
+ * The packed object type is stored in the low 3 bits of a byte.
+ * The type value 0 is a reserved prefix if ever there is more than 7
+ * object types, or any future format extensions.
+ */
enum object_type {
- OBJ_NONE,
- OBJ_COMMIT,
- OBJ_TREE,
- OBJ_BLOB,
- OBJ_TAG,
- OBJ_DELTA,
+ OBJ_EXT = 0,
+ OBJ_COMMIT = 1,
+ OBJ_TREE = 2,
+ OBJ_BLOB = 3,
+ OBJ_TAG = 4,
+ OBJ_DELTA = 7
};
/*
diff --git a/unpack-objects.c b/unpack-objects.c
--- a/unpack-objects.c
+++ b/unpack-objects.c
@@ -13,6 +13,14 @@ struct pack_entry {
unsigned char sha1[20];
};
+static char *type_string[] = {
+ [OBJ_COMMIT] = "commit",
+ [OBJ_TREE] = "tree",
+ [OBJ_BLOB] = "blob",
+ [OBJ_TAG] = "tag",
+ [OBJ_DELTA] = "delta"
+};
+
static void *pack_base;
static unsigned long pack_size;
static void *index_base;
@@ -93,7 +101,7 @@ static int check_index(void)
}
static int unpack_non_delta_entry(struct pack_entry *entry,
- enum object_type kind,
+ char *type,
unsigned char *data,
unsigned long size,
unsigned long left)
@@ -102,9 +110,8 @@ static int unpack_non_delta_entry(struct
z_stream stream;
char *buffer;
unsigned char sha1[20];
- char *type;
- printf("%s %c %lu\n", sha1_to_hex(entry->sha1), ".CTBGD"[kind], size);
+ printf("%s %s %lu\n", sha1_to_hex(entry->sha1), type, size);
if (dry_run)
return 0;
@@ -121,13 +128,6 @@ static int unpack_non_delta_entry(struct
inflateEnd(&stream);
if ((st != Z_STREAM_END) || stream.total_out != size)
goto err_finish;
- switch (kind) {
- case OBJ_COMMIT: type = "commit"; break;
- case OBJ_TREE: type = "tree"; break;
- case OBJ_BLOB: type = "blob"; break;
- case OBJ_TAG: type = "tag"; break;
- default: goto err_finish;
- }
if (write_sha1_file(buffer, size, type, sha1) < 0)
die("failed to write %s (%s)",
sha1_to_hex(entry->sha1), type);
@@ -135,8 +135,8 @@ static int unpack_non_delta_entry(struct
if (memcmp(sha1, entry->sha1, 20))
die("resulting %s have wrong SHA1", type);
- finish:
st = 0;
+ finish:
free(buffer);
return st;
err_finish:
@@ -185,15 +185,13 @@ static int unpack_delta_entry(struct pac
die("truncated pack file");
data = base_sha1 + 20;
data_size = left - 20;
- printf("%s D %lu", sha1_to_hex(entry->sha1), delta_size);
+ printf("%s delta %lu", sha1_to_hex(entry->sha1), delta_size);
printf(" %s\n", sha1_to_hex(base_sha1));
if (dry_run)
return 0;
- /* pack+5 is the base sha1, unless we have it, we need to
- * unpack it first.
- */
+ /* unless we have the base sha1, we need to unpack it first. */
if (!has_sha1_file(base_sha1)) {
struct pack_entry *base;
if (!find_pack_entry(base_sha1, &base))
@@ -238,8 +236,9 @@ static int unpack_delta_entry(struct pac
static void unpack_entry(struct pack_entry *entry)
{
unsigned long offset, size, left;
- unsigned char *pack, c;
- int type;
+ unsigned char c, *pack = pack_base;
+ int i;
+ enum object_type type;
/* Have we done this one already due to deltas based on it? */
if (lookup_object(entry->sha1))
@@ -247,20 +246,17 @@ static void unpack_entry(struct pack_ent
offset = ntohl(entry->offset);
if (offset >= pack_size)
- goto bad;
-
- pack = pack_base + offset;
- c = *pack++;
- offset++;
- type = (c >> 4) & 7;
- size = (c & 15);
+ goto out_of_bound;
+ c = pack[offset++];
+ type = c & 0x07;
+ size = (c & ~0x80) >> 3;
+ i = 4;
while (c & 0x80) {
if (offset >= pack_size)
- goto bad;
- offset++;
- c = *pack++;
- size = (size << 7) + (c & 0x7f);
-
+ goto out_of_bound;
+ c = pack[offset++];
+ size |= (c & ~0x80) << i;
+ i += 7;
}
left = pack_size - offset;
switch (type) {
@@ -268,14 +264,18 @@ static void unpack_entry(struct pack_ent
case OBJ_TREE:
case OBJ_BLOB:
case OBJ_TAG:
- unpack_non_delta_entry(entry, type, pack, size, left);
+ unpack_non_delta_entry(entry, type_string[type],
+ pack+offset, size, left);
return;
case OBJ_DELTA:
- unpack_delta_entry(entry, pack, size, left);
+ unpack_delta_entry(entry, pack+offset, size, left);
return;
+ default:
+ die("corrupted pack file(unknown object type %d)", type);
}
-bad:
- die("corrupted pack file");
+
+ out_of_bound:
+ die("corrupted pack file (object offset out of bound)");
}
/*
next prev parent reply other threads:[~2005-06-29 3:50 UTC|newest]
Thread overview: 39+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-06-25 4:20 kernel.org and GIT tree rebuilding David S. Miller
2005-06-25 4:40 ` Jeff Garzik
2005-06-25 5:23 ` Linus Torvalds
2005-06-25 5:48 ` Jeff Garzik
2005-06-25 6:16 ` Linus Torvalds
2005-06-26 16:41 ` Linus Torvalds
2005-06-26 18:39 ` Junio C Hamano
2005-06-26 19:19 ` Linus Torvalds
2005-06-26 19:45 ` Junio C Hamano
[not found] ` <7v1x6om6o5.fsf@assigned-by-dhcp.cox.net>
[not found] ` <Pine.LNX.4.58.0506271227160.19755@ppc970.osdl.org>
[not found] ` <7v64vzyqyw.fsf_-_@assigned-by-dhcp.cox.net>
2005-06-28 6:56 ` [PATCH] Obtain sha1_file_info() for deltified pack entry properly Junio C Hamano
2005-06-28 6:58 ` Junio C Hamano
2005-06-28 6:58 ` [PATCH 2/3] git-cat-file: use sha1_object_info() on '-t' Junio C Hamano
2005-06-28 6:59 ` [PATCH 3/3] git-cat-file: '-s' to find out object size Junio C Hamano
2005-06-26 20:52 ` kernel.org and GIT tree rebuilding Chris Mason
2005-06-26 21:03 ` Chris Mason
2005-06-26 21:40 ` Linus Torvalds
2005-06-26 22:34 ` Linus Torvalds
2005-06-28 18:06 ` Nicolas Pitre
2005-06-28 19:28 ` Linus Torvalds
2005-06-28 21:08 ` Nicolas Pitre
2005-06-28 21:27 ` Linus Torvalds
2005-06-28 21:55 ` [PATCH] Bugfix: initialize pack_base to NULL Junio C Hamano
2005-06-29 3:55 ` Nicolas Pitre [this message]
2005-06-29 5:16 ` kernel.org and GIT tree rebuilding Nicolas Pitre
2005-06-29 5:43 ` Linus Torvalds
2005-06-29 5:54 ` Linus Torvalds
2005-06-29 7:16 ` Last mile for 1.0 again Junio C Hamano
2005-06-29 9:51 ` [PATCH] Add git-verify-pack command Junio C Hamano
2005-06-29 16:15 ` Linus Torvalds
2005-07-04 21:40 ` Last mile for 1.0 again Daniel Barkalow
2005-07-04 21:45 ` Junio C Hamano
2005-07-04 21:59 ` Linus Torvalds
2005-07-04 22:41 ` Daniel Barkalow
2005-07-04 23:06 ` Junio C Hamano
2005-07-05 1:54 ` Daniel Barkalow
2005-07-05 6:24 ` Junio C Hamano
2005-07-05 13:34 ` Marco Costalba
2005-06-25 5:04 ` kernel.org and GIT tree rebuilding Junio C Hamano
-- strict thread matches above, loose matches on Subject: below --
2005-07-03 2:51 linux
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Pine.LNX.4.63.0506282314320.1667@localhost.localdomain \
--to=nico@cam.org \
--cc=git@vger.kernel.org \
--cc=torvalds@osdl.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).