git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 1/3] unpack-objects: prevent writing of inconsistent objects
@ 2008-03-05 10:09 Junio C Hamano
  2008-03-05 10:09 ` [PATCH 2/3] t5300: add test for "unpack-objects --strict" Junio C Hamano
  0 siblings, 1 reply; 3+ messages in thread
From: Junio C Hamano @ 2008-03-05 10:09 UTC (permalink / raw)
  To: git; +Cc: Martin Koegler

From: Martin Koegler <mkoegler@auto.tuwien.ac.at>

This patch introduces a strict mode, which ensures that:
- no malformed object will be written
- no object with broken links will be written

The patch ensures this by delaying the write of all non blob object.
These object are written, after all objects they link to are written.

An error can only result in unreferenced objects.

Signed-off-by: Martin Koegler <mkoegler@auto.tuwien.ac.at>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * This is resurrection of the reverted patch, as-is.

 Documentation/git-unpack-objects.txt |    3 +
 builtin-unpack-objects.c             |  110 +++++++++++++++++++++++++++++++--
 2 files changed, 106 insertions(+), 7 deletions(-)

diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt
index b79be3f..3697896 100644
--- a/Documentation/git-unpack-objects.txt
+++ b/Documentation/git-unpack-objects.txt
@@ -40,6 +40,9 @@ OPTIONS
 	and make the best effort to recover as many objects as
 	possible.
 
+--strict::
+	Don't write objects with broken content or links.
+
 
 Author
 ------
diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index 50e07fa..9d2a854 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -7,11 +7,13 @@
 #include "commit.h"
 #include "tag.h"
 #include "tree.h"
+#include "tree-walk.h"
 #include "progress.h"
 #include "decorate.h"
+#include "fsck.h"
 
-static int dry_run, quiet, recover, has_errors;
-static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file";
+static int dry_run, quiet, recover, has_errors, strict;
+static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] [--strict] < pack-file";
 
 /* We always read in 4kB chunks. */
 static unsigned char buffer[4096];
@@ -31,6 +33,16 @@ static struct obj_buffer *lookup_object_buffer(struct object *base)
 	return lookup_decoration(&obj_decorate, base);
 }
 
+static void add_object_buffer(struct object *object, char *buffer, unsigned long size)
+{
+	struct obj_buffer *obj;
+	obj = xcalloc(1, sizeof(struct obj_buffer));
+	obj->buffer = buffer;
+	obj->size = size;
+	if (add_decoration(&obj_decorate, object, obj))
+		die("object %s tried to add buffer twice!", sha1_to_hex(object->sha1));
+}
+
 /*
  * Make sure at least "min" bytes are available in the buffer, and
  * return the pointer to the buffer.
@@ -134,9 +146,58 @@ static void add_delta_to_list(unsigned nr, unsigned const char *base_sha1,
 struct obj_info {
 	off_t offset;
 	unsigned char sha1[20];
+	struct object *obj;
 };
 
+#define FLAG_OPEN (1u<<20)
+#define FLAG_WRITTEN (1u<<21)
+
 static struct obj_info *obj_list;
+unsigned nr_objects;
+
+static void write_cached_object(struct object *obj)
+{
+	unsigned char sha1[20];
+	struct obj_buffer *obj_buf = lookup_object_buffer(obj);
+	if (write_sha1_file(obj_buf->buffer, obj_buf->size, typename(obj->type), sha1) < 0)
+		die("failed to write object %s", sha1_to_hex(obj->sha1));
+	obj->flags |= FLAG_WRITTEN;
+}
+
+static int check_object(struct object *obj, int type, void *data)
+{
+	if (!obj)
+		return 0;
+
+	if (obj->flags & FLAG_WRITTEN)
+		return 1;
+
+	if (type != OBJ_ANY && obj->type != type)
+		die("object type mismatch");
+
+	if (!(obj->flags & FLAG_OPEN)) {
+		unsigned long size;
+		int type = sha1_object_info(obj->sha1, &size);
+		if (type != obj->type || type <= 0)
+			die("object of unexpected type");
+		obj->flags |= FLAG_WRITTEN;
+		return 1;
+	}
+
+	if (fsck_object(obj, 1, fsck_error_function))
+		die("Error in object");
+	if (!fsck_walk(obj, check_object, 0))
+		die("Error on reachable objects of %s", sha1_to_hex(obj->sha1));
+	write_cached_object(obj);
+	return 1;
+}
+
+static void write_rest(void)
+{
+	unsigned i;
+	for (i = 0; i < nr_objects; i++)
+		check_object(obj_list[i].obj, OBJ_ANY, 0);
+}
 
 static void added_object(unsigned nr, enum object_type type,
 			 void *data, unsigned long size);
@@ -144,9 +205,36 @@ static void added_object(unsigned nr, enum object_type type,
 static void write_object(unsigned nr, enum object_type type,
 			 void *buf, unsigned long size)
 {
-	if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
-		die("failed to write object");
 	added_object(nr, type, buf, size);
+	if (!strict) {
+		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
+			die("failed to write object");
+		free(buf);
+		obj_list[nr].obj = 0;
+	} else if (type == OBJ_BLOB) {
+		struct blob *blob;
+		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
+			die("failed to write object");
+		free(buf);
+
+		blob = lookup_blob(obj_list[nr].sha1);
+		if (blob)
+			blob->object.flags |= FLAG_WRITTEN;
+		else
+			die("invalid blob object");
+		obj_list[nr].obj = 0;
+	} else {
+		struct object *obj;
+		int eaten;
+		hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
+		obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten);
+		if (!obj)
+			die("invalid %s", typename(type));
+		/* buf is stored via add_object_buffer and in obj, if its a tree or commit */
+		add_object_buffer(obj, buf, size);
+		obj->flags |= FLAG_OPEN;
+		obj_list[nr].obj = obj;
+	}
 }
 
 static void resolve_delta(unsigned nr, enum object_type type,
@@ -163,7 +251,6 @@ static void resolve_delta(unsigned nr, enum object_type type,
 		die("failed to apply delta");
 	free(delta);
 	write_object(nr, type, result, result_size);
-	free(result);
 }
 
 static void added_object(unsigned nr, enum object_type type,
@@ -193,7 +280,8 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 
 	if (!dry_run && buf)
 		write_object(nr, type, buf, size);
-	free(buf);
+	else
+		free(buf);
 }
 
 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
@@ -336,7 +424,8 @@ static void unpack_all(void)
 	int i;
 	struct progress *progress = NULL;
 	struct pack_header *hdr = fill(sizeof(struct pack_header));
-	unsigned nr_objects = ntohl(hdr->hdr_entries);
+
+	nr_objects = ntohl(hdr->hdr_entries);
 
 	if (ntohl(hdr->hdr_signature) != PACK_SIGNATURE)
 		die("bad pack file");
@@ -347,6 +436,7 @@ static void unpack_all(void)
 	if (!quiet)
 		progress = start_progress("Unpacking objects", nr_objects);
 	obj_list = xmalloc(nr_objects * sizeof(*obj_list));
+	memset(obj_list, 0, nr_objects * sizeof(*obj_list));
 	for (i = 0; i < nr_objects; i++) {
 		unpack_one(i);
 		display_progress(progress, i + 1);
@@ -382,6 +472,10 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 				recover = 1;
 				continue;
 			}
+			if (!strcmp(arg, "--strict")) {
+				strict = 1;
+				continue;
+			}
 			if (!prefixcmp(arg, "--pack_header=")) {
 				struct pack_header *hdr;
 				char *c;
@@ -407,6 +501,8 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix)
 	unpack_all();
 	SHA1_Update(&ctx, buffer, offset);
 	SHA1_Final(sha1, &ctx);
+	if (strict)
+		write_rest();
 	if (hashcmp(fill(20), sha1))
 		die("final sha1 did not match");
 	use(20);
-- 
1.5.4.3.529.gb25fb


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 2/3] t5300: add test for "unpack-objects --strict"
  2008-03-05 10:09 [PATCH 1/3] unpack-objects: prevent writing of inconsistent objects Junio C Hamano
@ 2008-03-05 10:09 ` Junio C Hamano
  2008-03-05 10:09   ` [PATCH 3/3] unpack-objects: fix --strict handling Junio C Hamano
  0 siblings, 1 reply; 3+ messages in thread
From: Junio C Hamano @ 2008-03-05 10:09 UTC (permalink / raw)
  To: git; +Cc: Martin Koegler, Sergey Vlasov

This adds test for unpacking deltified objects with --strict option.

 - unpacking full trees with --strict should pass;

 - unpacking only trees with --strict should be rejected due to
   missing blobs;

 - unpacking only trees with --strict into an existing
   repository with necessary blobs should succeed.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * Third time lucky.

 t/t5300-pack-object.sh |   49 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 49 insertions(+), 0 deletions(-)

diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index cd3c149..b297393 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -274,4 +274,53 @@ test_expect_success \
      packname_4=$(git pack-objects test-4 <obj-list) &&
      test 3 = $(ls test-4-*.pack | wc -l)'
 
+test_expect_failure 'unpacking with --strict' '
+
+	git config --unset pack.packsizelimit &&
+	COPYING=$(git hash-object -w ../../COPYING) &&
+	for j in a b c d e f g
+	do
+		for i in 0 1 2 3 4 5 6 7 8 9
+		do
+			o=$(echo $j$i | git hash-object -w --stdin) &&
+			echo "100644 $o	0 $j$i"
+		done
+	done >LIST &&
+	rm -f .git/index &&
+	git update-index --index-info <LIST &&
+	LIST=$(git write-tree) &&
+	rm -f .git/index &&
+	head -n 10 LIST | git update-index --index-info &&
+	LI=$(git write-tree) &&
+	rm -f .git/index &&
+	tail -n 10 LIST | git update-index --index-info &&
+	ST=$(git write-tree) &&
+	PACK5=$( git rev-list --objects "$LIST" "$LI" "$ST" | \
+		git pack-objects test-5 ) &&
+	PACK6=$( (
+			echo "$LIST"
+			echo "$LI"
+			echo "$ST"
+		 ) | git pack-objects test-6 ) &&
+	test_create_repo test-5 &&
+	(
+		cd test-5 &&
+		git unpack-objects --strict <../test-5-$PACK5.pack &&
+		git ls-tree -r $LIST &&
+		git ls-tree -r $LI &&
+		git ls-tree -r $ST
+	) &&
+	test_create_repo test-6 &&
+	(
+		# tree-only into empty repo -- many unreachables
+		cd test-6 &&
+		test_must_fail git unpack-objects --strict <../test-6-$PACK6.pack
+	) &&
+	(
+		# already populated -- no unreachables
+		cd test-5 &&
+		git unpack-objects --strict <../test-6-$PACK6.pack
+	)
+'
+
 test_done
-- 
1.5.4.3.529.gb25fb


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH 3/3] unpack-objects: fix --strict handling
  2008-03-05 10:09 ` [PATCH 2/3] t5300: add test for "unpack-objects --strict" Junio C Hamano
@ 2008-03-05 10:09   ` Junio C Hamano
  0 siblings, 0 replies; 3+ messages in thread
From: Junio C Hamano @ 2008-03-05 10:09 UTC (permalink / raw)
  To: git; +Cc: Martin Koegler, Sergey Vlasov

Earlier attempt (which was reverted) called added_object() (by the way,
the function should be renamed to resolve_dependents() --- it is called
when we have a complete object data, and is responsible to resolve pending
deltified objects that use this object as their delta base object) without
updating obj_list[nr].sha1 with the correct value.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---

 * I still doubt that FLAG_OPEN is necessary, but at least this
   seems to fix the issue for me.  I am sick and tired of having
   spent two nights looking at this issue, and I ran out of time
   to deal with other topics I looked at on the list.

   The moral of the story is _NOT_ "do not to trust Martin's
   patches", but "I should ignore topics unless I have enough
   time to read them line-by-line.  Queuing them, hoping that
   problems are caught by somebody while they are in 'next',
   would NOT work".

 builtin-unpack-objects.c |   73 ++++++++++++++++++++++++++++++++++++----------
 t/t5300-pack-object.sh   |    2 +-
 2 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c
index 9d2a854..fecf0be 100644
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
@@ -21,6 +21,11 @@ static unsigned int offset, len;
 static off_t consumed_bytes;
 static SHA_CTX ctx;
 
+/*
+ * When running under --strict mode, objects whose reachability are
+ * suspect are kept in core without getting written in the object
+ * store.
+ */
 struct obj_buffer {
 	char *buffer;
 	unsigned long size;
@@ -155,6 +160,10 @@ struct obj_info {
 static struct obj_info *obj_list;
 unsigned nr_objects;
 
+/*
+ * Called only from check_object() after it verified this object
+ * is Ok.
+ */
 static void write_cached_object(struct object *obj)
 {
 	unsigned char sha1[20];
@@ -164,6 +173,11 @@ static void write_cached_object(struct object *obj)
 	obj->flags |= FLAG_WRITTEN;
 }
 
+/*
+ * At the very end of the processing, write_rest() scans the objects
+ * that have reachability requirements and calls this function.
+ * Verify its reachability and validity recursively and write it out.
+ */
 static int check_object(struct object *obj, int type, void *data)
 {
 	if (!obj)
@@ -202,19 +216,25 @@ static void write_rest(void)
 static void added_object(unsigned nr, enum object_type type,
 			 void *data, unsigned long size);
 
+/*
+ * Write out nr-th object from the list, now we know the contents
+ * of it.  Under --strict, this buffers structured objects in-core,
+ * to be checked at the end.
+ */
 static void write_object(unsigned nr, enum object_type type,
 			 void *buf, unsigned long size)
 {
-	added_object(nr, type, buf, size);
 	if (!strict) {
 		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 			die("failed to write object");
+		added_object(nr, type, buf, size);
 		free(buf);
-		obj_list[nr].obj = 0;
+		obj_list[nr].obj = NULL;
 	} else if (type == OBJ_BLOB) {
 		struct blob *blob;
 		if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0)
 			die("failed to write object");
+		added_object(nr, type, buf, size);
 		free(buf);
 
 		blob = lookup_blob(obj_list[nr].sha1);
@@ -222,15 +242,15 @@ static void write_object(unsigned nr, enum object_type type,
 			blob->object.flags |= FLAG_WRITTEN;
 		else
 			die("invalid blob object");
-		obj_list[nr].obj = 0;
+		obj_list[nr].obj = NULL;
 	} else {
 		struct object *obj;
 		int eaten;
 		hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1);
+		added_object(nr, type, buf, size);
 		obj = parse_object_buffer(obj_list[nr].sha1, type, size, buf, &eaten);
 		if (!obj)
 			die("invalid %s", typename(type));
-		/* buf is stored via add_object_buffer and in obj, if its a tree or commit */
 		add_object_buffer(obj, buf, size);
 		obj->flags |= FLAG_OPEN;
 		obj_list[nr].obj = obj;
@@ -253,6 +273,10 @@ static void resolve_delta(unsigned nr, enum object_type type,
 	write_object(nr, type, result, result_size);
 }
 
+/*
+ * We now know the contents of an object (which is nr-th in the pack);
+ * resolve all the deltified objects that are based on it.
+ */
 static void added_object(unsigned nr, enum object_type type,
 			 void *data, unsigned long size)
 {
@@ -284,13 +308,28 @@ static void unpack_non_delta_entry(enum object_type type, unsigned long size,
 		free(buf);
 }
 
+static int resolve_against_held(unsigned nr, const unsigned char *base,
+				void *delta_data, unsigned long delta_size)
+{
+	struct object *obj;
+	struct obj_buffer *obj_buffer;
+	obj = lookup_object(base);
+	if (!obj)
+		return 0;
+	obj_buffer = lookup_object_buffer(obj);
+	if (!obj_buffer)
+		return 0;
+	resolve_delta(nr, obj->type, obj_buffer->buffer,
+		      obj_buffer->size, delta_data, delta_size);
+	return 1;
+}
+
 static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 			       unsigned nr)
 {
 	void *delta_data, *base;
 	unsigned long base_size;
 	unsigned char base_sha1[20];
-	struct object *obj;
 
 	if (type == OBJ_REF_DELTA) {
 		hashcpy(base_sha1, fill(20));
@@ -300,7 +339,13 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 			free(delta_data);
 			return;
 		}
-		if (!has_sha1_file(base_sha1)) {
+		if (has_sha1_file(base_sha1))
+			; /* Ok we have this one */
+		else if (resolve_against_held(nr, base_sha1,
+					      delta_data, delta_size))
+			return; /* we are done */
+		else {
+			/* cannot resolve yet --- queue it */
 			hashcpy(obj_list[nr].sha1, null_sha1);
 			add_delta_to_list(nr, base_sha1, 0, delta_data, delta_size);
 			return;
@@ -346,22 +391,18 @@ static void unpack_delta_entry(enum object_type type, unsigned long delta_size,
 			}
 		}
 		if (!base_found) {
-			/* The delta base object is itself a delta that
-			   has not been	resolved yet. */
+			/*
+			 * The delta base object is itself a delta that
+			 * has not been resolved yet.
+			 */
 			hashcpy(obj_list[nr].sha1, null_sha1);
 			add_delta_to_list(nr, null_sha1, base_offset, delta_data, delta_size);
 			return;
 		}
 	}
 
-	obj = lookup_object(base_sha1);
-	if (obj) {
-		struct obj_buffer *obj_buf = lookup_object_buffer(obj);
-		if (obj_buf) {
-			resolve_delta(nr, obj->type, obj_buf->buffer, obj_buf->size, delta_data, delta_size);
-			return;
-		}
-	}
+	if (resolve_against_held(nr, base_sha1, delta_data, delta_size))
+		return;
 
 	base = read_sha1_file(base_sha1, &type, &base_size);
 	if (!base) {
diff --git a/t/t5300-pack-object.sh b/t/t5300-pack-object.sh
index b297393..b7610d1 100755
--- a/t/t5300-pack-object.sh
+++ b/t/t5300-pack-object.sh
@@ -274,7 +274,7 @@ test_expect_success \
      packname_4=$(git pack-objects test-4 <obj-list) &&
      test 3 = $(ls test-4-*.pack | wc -l)'
 
-test_expect_failure 'unpacking with --strict' '
+test_expect_success 'unpacking with --strict' '
 
 	git config --unset pack.packsizelimit &&
 	COPYING=$(git hash-object -w ../../COPYING) &&
-- 
1.5.4.3.529.gb25fb


^ permalink raw reply related	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2008-03-05 10:10 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2008-03-05 10:09 [PATCH 1/3] unpack-objects: prevent writing of inconsistent objects Junio C Hamano
2008-03-05 10:09 ` [PATCH 2/3] t5300: add test for "unpack-objects --strict" Junio C Hamano
2008-03-05 10:09   ` [PATCH 3/3] unpack-objects: fix --strict handling Junio C Hamano

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).