git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org, Junio C Hamano <gitster@pobox.com>
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 2/2] Make hash-object more robust against malformed objects
Date: Sat,  5 Feb 2011 17:52:21 +0700	[thread overview]
Message-ID: <1296903141-27075-2-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1296903141-27075-1-git-send-email-pclouds@gmail.com>

Commits, trees and tags have structure. Don't let users feed git
with malformed ones. Sooner or later git will die() when
encountering them.

Note that this patch does not check semantics. A tree that points
to non-existent objects is perfectly OK (and should be so, users
may choose to add commit first, then its associated tree for example)

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/hash-object.c  |    2 +-
 cache.h                |    2 +-
 read-cache.c           |    2 +-
 sha1_file.c            |   54 +++++++++++++++++++++++++++++++++++++++++------
 t/t1007-hash-object.sh |   13 +++++++++++
 5 files changed, 63 insertions(+), 10 deletions(-)

diff --git a/builtin/hash-object.c b/builtin/hash-object.c
index 080af1a..c90acdd 100644
--- a/builtin/hash-object.c
+++ b/builtin/hash-object.c
@@ -15,7 +15,7 @@ static void hash_fd(int fd, const char *type, int write_object, const char *path
 	struct stat st;
 	unsigned char sha1[20];
 	if (fstat(fd, &st) < 0 ||
-	    index_fd(sha1, fd, &st, write_object, type_from_string(type), path))
+	    index_fd(sha1, fd, &st, write_object, type_from_string(type), path, 1))
 		die(write_object
 		    ? "Unable to add %s to database"
 		    : "Unable to hash %s", path);
diff --git a/cache.h b/cache.h
index d83d68c..9186a56 100644
--- a/cache.h
+++ b/cache.h
@@ -501,7 +501,7 @@ extern int ie_match_stat(const struct index_state *, struct cache_entry *, struc
 extern int ie_modified(const struct index_state *, struct cache_entry *, struct stat *, unsigned int);
 
 extern int ce_path_match(const struct cache_entry *ce, const char **pathspec);
-extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path);
+extern int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object, enum object_type type, const char *path, int format_check);
 extern int index_path(unsigned char *sha1, const char *path, struct stat *st, int write_object);
 extern void fill_stat_cache_info(struct cache_entry *ce, struct stat *st);
 
diff --git a/read-cache.c b/read-cache.c
index 4f2e890..fbc12f3 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -92,7 +92,7 @@ static int ce_compare_data(struct cache_entry *ce, struct stat *st)
 
 	if (fd >= 0) {
 		unsigned char sha1[20];
-		if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name))
+		if (!index_fd(sha1, fd, st, 0, OBJ_BLOB, ce->name, 0))
 			match = hashcmp(sha1, ce->sha1);
 		/* index_fd() closed the file descriptor already */
 	}
diff --git a/sha1_file.c b/sha1_file.c
index d86a8db..58ca858 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -13,6 +13,7 @@
 #include "commit.h"
 #include "tag.h"
 #include "tree.h"
+#include "tree-walk.h"
 #include "refs.h"
 #include "pack-revindex.h"
 #include "sha1-lookup.h"
@@ -2471,8 +2472,37 @@ int has_sha1_file(const unsigned char *sha1)
 	return has_loose_object(sha1);
 }
 
+static void check_tree(const void *buf, size_t size)
+{
+	struct tree_desc desc;
+	struct name_entry entry;
+
+	init_tree_desc(&desc, buf, size);
+	while (tree_entry(&desc, &entry))
+		/* do nothing
+		 * tree_entry() will die() on malformed entries */
+		;
+}
+
+static void check_commit(const void *buf, size_t size)
+{
+	struct commit c;
+	memset(&c, 0, sizeof(c));
+	if (parse_commit_buffer(&c, buf, size))
+		die("corrupt commit");
+}
+
+static void check_tag(const void *buf, size_t size)
+{
+	struct tag t;
+	memset(&t, 0, sizeof(t));
+	if (parse_tag_buffer(&t, buf, size))
+		die("corrupt tag");
+}
+
 static int index_mem(unsigned char *sha1, void *buf, size_t size,
-		     int write_object, enum object_type type, const char *path)
+		     int write_object, enum object_type type,
+		     const char *path, int format_check)
 {
 	int ret, re_allocated = 0;
 
@@ -2490,6 +2520,14 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 			re_allocated = 1;
 		}
 	}
+	if (format_check) {
+		if (type == OBJ_TREE)
+			check_tree(buf, size);
+		if (type == OBJ_COMMIT)
+			check_commit(buf, size);
+		if (type == OBJ_TAG)
+			check_tag(buf, size);
+	}
 
 	if (write_object)
 		ret = write_sha1_file(buf, size, typename(type), sha1);
@@ -2503,7 +2541,7 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 #define SMALL_FILE_SIZE (32*1024)
 
 int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
-	     enum object_type type, const char *path)
+	     enum object_type type, const char *path, int format_check)
 {
 	int ret;
 	size_t size = xsize_t(st->st_size);
@@ -2512,23 +2550,25 @@ int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 		struct strbuf sbuf = STRBUF_INIT;
 		if (strbuf_read(&sbuf, fd, 4096) >= 0)
 			ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
-					type, path);
+					type, path, format_check);
 		else
 			ret = -1;
 		strbuf_release(&sbuf);
 	} else if (!size) {
-		ret = index_mem(sha1, NULL, size, write_object, type, path);
+		ret = index_mem(sha1, NULL, size, write_object, type, path,
+				format_check);
 	} else if (size <= SMALL_FILE_SIZE) {
 		char *buf = xmalloc(size);
 		if (size == read_in_full(fd, buf, size))
 			ret = index_mem(sha1, buf, size, write_object, type,
-					path);
+					path, format_check);
 		else
 			ret = error("short read %s", strerror(errno));
 		free(buf);
 	} else {
 		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-		ret = index_mem(sha1, buf, size, write_object, type, path);
+		ret = index_mem(sha1, buf, size, write_object, type, path,
+				format_check);
 		munmap(buf, size);
 	}
 	close(fd);
@@ -2546,7 +2586,7 @@ int index_path(unsigned char *sha1, const char *path, struct stat *st, int write
 		if (fd < 0)
 			return error("open(\"%s\"): %s", path,
 				     strerror(errno));
-		if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path) < 0)
+		if (index_fd(sha1, fd, st, write_object, OBJ_BLOB, path, 0) < 0)
 			return error("%s: failed to insert into database",
 				     path);
 		break;
diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index dd32432..6d52b82 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -188,4 +188,17 @@ for args in "-w --stdin-paths" "--stdin-paths -w"; do
 	pop_repo
 done
 
+test_expect_success 'corrupt tree' '
+	echo abc >malformed-tree
+	test_must_fail git hash-object -t tree malformed-tree
+'
+
+test_expect_success 'corrupt commit' '
+	test_must_fail git hash-object -t commit --stdin </dev/null
+'
+
+test_expect_success 'corrupt tag' '
+	test_must_fail git hash-object -t tag --stdin </dev/null
+'
+
 test_done
-- 
1.7.3.4.878.g439c7

  reply	other threads:[~2011-02-05 10:54 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-02-05 10:52 [PATCH 1/2] Add const to parse_{commit,tag}_buffer() Nguyễn Thái Ngọc Duy
2011-02-05 10:52 ` Nguyễn Thái Ngọc Duy [this message]
2011-02-12 11:42   ` [PATCH 2/2] Make hash-object more robust against malformed objects Thomas Rast
2011-02-12 14:47     ` Nguyen Thai Ngoc Duy
2011-02-14 13:02       ` [PATCH] parse_tag_buffer(): do not prefixcmp() out of range Nguyễn Thái Ngọc Duy
2011-02-15 21:18         ` Junio C Hamano
2011-02-16  3:39           ` Nguyen Thai Ngoc Duy
2011-02-17 12:43             ` René Scharfe
2011-02-18 12:49               ` [PATCH] parse_tag_buffer(): avoid out of bound access Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1296903141-27075-2-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).