git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Nicolas Pitre <nico@cam.org>
To: Linus Torvalds <torvalds@osdl.org>
Cc: Git Mailing List <git@vger.kernel.org>
Subject: Re: kernel.org and GIT tree rebuilding
Date: Tue, 28 Jun 2005 23:55:46 -0400 (EDT)	[thread overview]
Message-ID: <Pine.LNX.4.63.0506282314320.1667@localhost.localdomain> (raw)
In-Reply-To: <Pine.LNX.4.58.0506281424420.19755@ppc970.osdl.org>

On Tue, 28 Jun 2005, Linus Torvalds wrote:

> 
> 
> On Tue, 28 Jun 2005, Nicolas Pitre wrote:
> > 
> > OK.  New patch below.
> 
> Dammit, I wasted all that time doing it myself.
> 
> I just committed and pushed out my version. But mine also does sha1_file.c 
> right, so that you can use a packed archive in .git/objects/pack. Yours 
> has some other cleanups, so..
> 
> Can you double-check my version (it hasn't mirrored out yet, it seems, but 
> it should be there soon).

OK... See below the cleanups I merged from my version on top of yours:

 pack-objects.c   |   70 ++++++++++++++-----------------------------------------
 pack.h           |   17 ++++++++-----
 unpack-objects.c |   66 +++++++++++++++++++++++++--------------------------
 3 files changed, 63 insertions(+), 90 deletions(-)

I also restored my original object header size ordering (little endian) 
for two reasons:

 - it is much simpler to generate and therefore allows for removing 
   quite some code

 - it allows for stable bit position which makes it much easier to look 
   at an hex dump of the binary data for manual debugging

Also a few code optimizations and one error return fix.

Signed-off-by: Nicolas Pitre <nico@cam.org>

diff --git a/pack-objects.c b/pack-objects.c
--- a/pack-objects.c
+++ b/pack-objects.c
@@ -34,7 +34,7 @@ static void *delta_against(void *buf, un
 	if (!otherbuf)
 		die("unable to read %s", sha1_to_hex(entry->delta->sha1));
         delta_buf = diff_delta(otherbuf, othersize,
-			       buf, size, &delta_size, ~0UL);
+			       buf, size, &delta_size, 0UL);
         if (!delta_buf || delta_size != entry->delta_size)
         	die("delta size changed");
         free(buf);
@@ -42,54 +42,13 @@ static void *delta_against(void *buf, un
 	return delta_buf;
 }
 
-/*
- * The per-object header is a pretty dense thing, which is
- *  - first byte: low four bits are "size", then three bits of "type",
- *    and the high bit is "size continues".
- *  - each byte afterwards: low seven bits are size continuation,
- *    with the high bit being "size continues"
- */
-static int encode_header(enum object_type type, unsigned long size, unsigned char *hdr)
-{
-	int n = 1, i;
-	unsigned char c;
-
-	if (type < OBJ_COMMIT || type > OBJ_DELTA)
-		die("bad type %d", type);
-
-	/*
-	 * Shift the size up by 7 bits at a time,
-	 * until you get bits in the "high four".
-	 * That will be our beginning. We'll have
-	 * four size bits in 28..31, then groups
-	 * of seven in 21..27, 14..20, 7..13 and
-	 * finally 0..6.
-	 */
-	if (size) {
-		n = 5;
-		while (!(size & 0xfe000000)) {
-			size <<= 7;
-			n--;
-		}
-	}
-	c = (type << 4) | (size >> 28);
-	for (i = 1; i < n; i++) {
-		*hdr++ = c | 0x80;
-		c = (size >> 21) & 0x7f;
-		size <<= 7;
-	}
-	*hdr = c;
-	return n;
-}
-
 static unsigned long write_object(struct sha1file *f, struct object_entry *entry)
 {
 	unsigned long size;
 	char type[10];
 	void *buf = read_sha1_file(entry->sha1, type, &size);
-	unsigned char header[10];
+	char header[25];
 	unsigned hdrlen, datalen;
-	enum object_type obj_type;
 
 	if (!buf)
 		die("unable to read %s", sha1_to_hex(entry->sha1));
@@ -97,22 +56,31 @@ static unsigned long write_object(struct
 		die("object %s size inconsistency (%lu vs %lu)", sha1_to_hex(entry->sha1), size, entry->size);
 
 	/*
-	 * The object header is a byte of 'type' followed by zero or
-	 * more bytes of length.  For deltas, the 20 bytes of delta sha1
-	 * follows that.
+	 * The object header first byte has its low 3 bits representing the
+	 * object type, the 4 upper bits indicating which of the following
+	 * bytes are used to build the object size.  For delta objects the
+	 * sha1 of the reference object is also appended.
 	 */
-	obj_type = entry->type;
 	if (entry->delta) {
+		header[0] = OBJ_DELTA;
 		buf = delta_against(buf, size, entry);
 		size = entry->delta_size;
-		obj_type = OBJ_DELTA;
+	} else
+		header[0] = entry->type;
+	header[0] |= size << 3;
+	hdrlen = 1;
+	datalen = size >> 4;
+	while (datalen) {
+		header[hdrlen - 1] |= 0x80;
+		header[hdrlen++] = datalen;
+		datalen >>= 7;
 	}
-	hdrlen = encode_header(obj_type, size, header);
-	sha1write(f, header, hdrlen);
 	if (entry->delta) {
-		sha1write(f, entry->delta, 20);
+		memcpy(header+hdrlen, entry->delta, 20);
 		hdrlen += 20;
 	}
+
+	sha1write(f, header, hdrlen);
 	datalen = sha1write_compressed(f, buf, size);
 	free(buf);
 	return hdrlen + datalen;
diff --git a/pack.h b/pack.h
--- a/pack.h
+++ b/pack.h
@@ -1,13 +1,18 @@
 #ifndef PACK_H
 #define PACK_H
 
+/*
+ * The packed object type is stored in the low 3 bits of a byte.
+ * The type value 0 is a reserved prefix if ever there is more than 7
+ * object types, or any future format extensions.
+ */
 enum object_type {
-	OBJ_NONE,
-	OBJ_COMMIT,
-	OBJ_TREE,
-	OBJ_BLOB,
-	OBJ_TAG,
-	OBJ_DELTA,
+	OBJ_EXT = 0,
+	OBJ_COMMIT = 1,
+	OBJ_TREE = 2,
+	OBJ_BLOB = 3,
+	OBJ_TAG = 4,
+	OBJ_DELTA = 7
 };
 
 /*
diff --git a/unpack-objects.c b/unpack-objects.c
--- a/unpack-objects.c
+++ b/unpack-objects.c
@@ -13,6 +13,14 @@ struct pack_entry {
 	unsigned char sha1[20];
 };
 
+static char *type_string[] = {
+	[OBJ_COMMIT]	= "commit",
+	[OBJ_TREE]	= "tree",
+	[OBJ_BLOB]	= "blob",
+	[OBJ_TAG]	= "tag",
+	[OBJ_DELTA]	= "delta"
+};
+
 static void *pack_base;
 static unsigned long pack_size;
 static void *index_base;
@@ -93,7 +101,7 @@ static int check_index(void)
 }
 
 static int unpack_non_delta_entry(struct pack_entry *entry,
-				  enum object_type kind,
+				  char *type,
 				  unsigned char *data,
 				  unsigned long size,
 				  unsigned long left)
@@ -102,9 +110,8 @@ static int unpack_non_delta_entry(struct
 	z_stream stream;
 	char *buffer;
 	unsigned char sha1[20];
-	char *type;
 
-	printf("%s %c %lu\n", sha1_to_hex(entry->sha1), ".CTBGD"[kind], size);
+	printf("%s %s %lu\n", sha1_to_hex(entry->sha1), type, size);
 	if (dry_run)
 		return 0;
 
@@ -121,13 +128,6 @@ static int unpack_non_delta_entry(struct
 	inflateEnd(&stream);
 	if ((st != Z_STREAM_END) || stream.total_out != size)
 		goto err_finish;
-	switch (kind) {
-	case OBJ_COMMIT: type = "commit"; break;
-	case OBJ_TREE:   type = "tree"; break;
-	case OBJ_BLOB:   type = "blob"; break;
-	case OBJ_TAG:    type = "tag"; break;
-	default: goto err_finish;
-	}
 	if (write_sha1_file(buffer, size, type, sha1) < 0)
 		die("failed to write %s (%s)",
 		    sha1_to_hex(entry->sha1), type);
@@ -135,8 +135,8 @@ static int unpack_non_delta_entry(struct
 	if (memcmp(sha1, entry->sha1, 20))
 		die("resulting %s have wrong SHA1", type);
 
- finish:
 	st = 0;
+ finish:
 	free(buffer);
 	return st;
  err_finish:
@@ -185,15 +185,13 @@ static int unpack_delta_entry(struct pac
 		die("truncated pack file");
 	data = base_sha1 + 20;
 	data_size = left - 20;
-	printf("%s D %lu", sha1_to_hex(entry->sha1), delta_size);
+	printf("%s delta %lu", sha1_to_hex(entry->sha1), delta_size);
 	printf(" %s\n", sha1_to_hex(base_sha1));
 
 	if (dry_run)
 		return 0;
 
-	/* pack+5 is the base sha1, unless we have it, we need to
-	 * unpack it first.
-	 */
+	/* unless we have the base sha1, we need to unpack it first. */
 	if (!has_sha1_file(base_sha1)) {
 		struct pack_entry *base;
 		if (!find_pack_entry(base_sha1, &base))
@@ -238,8 +236,9 @@ static int unpack_delta_entry(struct pac
 static void unpack_entry(struct pack_entry *entry)
 {
 	unsigned long offset, size, left;
-	unsigned char *pack, c;
-	int type;
+	unsigned char c, *pack = pack_base;
+	int i;
+	enum object_type type;
 
 	/* Have we done this one already due to deltas based on it? */
 	if (lookup_object(entry->sha1))
@@ -247,20 +246,17 @@ static void unpack_entry(struct pack_ent
 
 	offset = ntohl(entry->offset);
 	if (offset >= pack_size)
-		goto bad;
-
-	pack = pack_base + offset;
-	c = *pack++;
-	offset++;
-	type = (c >> 4) & 7;
-	size = (c & 15);
+		goto out_of_bound;
+	c = pack[offset++];
+	type = c & 0x07;
+	size = (c & ~0x80) >> 3;
+	i = 4;
 	while (c & 0x80) {
 		if (offset >= pack_size)
-			goto bad;
-		offset++;
-		c = *pack++;
-		size = (size << 7) + (c & 0x7f);
-		
+			goto out_of_bound;
+		c = pack[offset++];
+		size |= (c & ~0x80) << i;
+		i += 7;
 	}
 	left = pack_size - offset;
 	switch (type) {
@@ -268,14 +264,18 @@ static void unpack_entry(struct pack_ent
 	case OBJ_TREE:
 	case OBJ_BLOB:
 	case OBJ_TAG:
-		unpack_non_delta_entry(entry, type, pack, size, left);
+		unpack_non_delta_entry(entry, type_string[type],
+				       pack+offset, size, left);
 		return;
 	case OBJ_DELTA:
-		unpack_delta_entry(entry, pack, size, left);
+		unpack_delta_entry(entry, pack+offset, size, left);
 		return;
+	default:
+		die("corrupted pack file(unknown object type %d)", type);
 	}
-bad:
-	die("corrupted pack file");
+
+ out_of_bound:
+	die("corrupted pack file (object offset out of bound)");
 }
 
 /*

  parent reply	other threads:[~2005-06-29  3:50 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-06-25  4:20 kernel.org and GIT tree rebuilding David S. Miller
2005-06-25  4:40 ` Jeff Garzik
2005-06-25  5:23   ` Linus Torvalds
2005-06-25  5:48     ` Jeff Garzik
2005-06-25  6:16       ` Linus Torvalds
2005-06-26 16:41         ` Linus Torvalds
2005-06-26 18:39           ` Junio C Hamano
2005-06-26 19:19             ` Linus Torvalds
2005-06-26 19:45               ` Junio C Hamano
     [not found]                 ` <7v1x6om6o5.fsf@assigned-by-dhcp.cox.net>
     [not found]                   ` <Pine.LNX.4.58.0506271227160.19755@ppc970.osdl.org>
     [not found]                     ` <7v64vzyqyw.fsf_-_@assigned-by-dhcp.cox.net>
2005-06-28  6:56                       ` [PATCH] Obtain sha1_file_info() for deltified pack entry properly Junio C Hamano
2005-06-28  6:58                         ` Junio C Hamano
2005-06-28  6:58                         ` [PATCH 2/3] git-cat-file: use sha1_object_info() on '-t' Junio C Hamano
2005-06-28  6:59                         ` [PATCH 3/3] git-cat-file: '-s' to find out object size Junio C Hamano
2005-06-26 20:52           ` kernel.org and GIT tree rebuilding Chris Mason
2005-06-26 21:03             ` Chris Mason
2005-06-26 21:40             ` Linus Torvalds
2005-06-26 22:34               ` Linus Torvalds
2005-06-28 18:06           ` Nicolas Pitre
2005-06-28 19:28             ` Linus Torvalds
2005-06-28 21:08               ` Nicolas Pitre
2005-06-28 21:27                 ` Linus Torvalds
2005-06-28 21:55                   ` [PATCH] Bugfix: initialize pack_base to NULL Junio C Hamano
2005-06-29  3:55                   ` Nicolas Pitre [this message]
2005-06-29  5:16                     ` kernel.org and GIT tree rebuilding Nicolas Pitre
2005-06-29  5:43                       ` Linus Torvalds
2005-06-29  5:54                         ` Linus Torvalds
2005-06-29  7:16                           ` Last mile for 1.0 again Junio C Hamano
2005-06-29  9:51                             ` [PATCH] Add git-verify-pack command Junio C Hamano
2005-06-29 16:15                               ` Linus Torvalds
2005-07-04 21:40                             ` Last mile for 1.0 again Daniel Barkalow
2005-07-04 21:45                               ` Junio C Hamano
2005-07-04 21:59                               ` Linus Torvalds
2005-07-04 22:41                                 ` Daniel Barkalow
2005-07-04 23:06                                   ` Junio C Hamano
2005-07-05  1:54                                     ` Daniel Barkalow
2005-07-05  6:24                                       ` Junio C Hamano
2005-07-05 13:34                                         ` Marco Costalba
2005-06-25  5:04 ` kernel.org and GIT tree rebuilding Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2005-07-03  2:51 linux

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.63.0506282314320.1667@localhost.localdomain \
    --to=nico@cam.org \
    --cc=git@vger.kernel.org \
    --cc=torvalds@osdl.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).