git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <gitster@pobox.com>
To: Dmitry Potapov <dpotapov@gmail.com>
Cc: Zygo Blaxell <zblaxell@esightcorp.com>,
	Ilari Liusvaara <ilari.liusvaara@elisanet.fi>,
	Thomas Rast <trast@student.ethz.ch>,
	Jonathan Nieder <jrnieder@gmail.com>,
	git@vger.kernel.org
Subject: [PATCH] Teach "git add" and friends to be paranoid
Date: Wed, 17 Feb 2010 17:16:23 -0800	[thread overview]
Message-ID: <7vljer1gyg.fsf_-_@alter.siamese.dyndns.org> (raw)
In-Reply-To: <20100214011812.GA2175@dpotapov.dyndns.org> (Dmitry Potapov's message of "Sun\, 14 Feb 2010 04\:18\:12 +0300")

When creating a loose object, we normally mmap(2) the entire file, and
hash and then compress to write it out in two separate steps for
efficiency.

This is perfectly good for the intended use of git---nobody is supposed to
be insane enough to expect that it won't break anything to muck with the
contents of a file after telling git to index it and before getting the
control back from git.

But the nature of breakage caused by such an abuse is rather bad.  We will
end up with loose object files, whose names do not match what are stored
and recovered when uncompressed.

This teaches the index_mem() codepath to be paranoid and hash and compress
the data after reading it in core.  The contents hashed may not match the
contents of the file in an insane use case, but at least this way the
result will be internally consistent.

Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 sha1_file.c |   81 ++++++++++++++++++++++++++++++++++++++++++++++++-----------
 1 files changed, 66 insertions(+), 15 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 657825e..d8a7722 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2278,7 +2278,8 @@ static int create_tmpfile(char *buffer, size_t bufsiz, const char *filename)
 }
 
 static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
-			      void *buf, unsigned long len, time_t mtime)
+			      void *buf, unsigned long len, time_t mtime,
+			      int paranoid)
 {
 	int fd, ret;
 	size_t size;
@@ -2286,6 +2287,7 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 	z_stream stream;
 	char *filename;
 	static char tmpfile[PATH_MAX];
+	git_SHA_CTX ctx;
 
 	filename = sha1_file_name(sha1);
 	fd = create_tmpfile(tmpfile, sizeof(tmpfile), filename);
@@ -2312,12 +2314,41 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 	stream.next_in = (unsigned char *)hdr;
 	stream.avail_in = hdrlen;
 	while (deflate(&stream, 0) == Z_OK)
-		/* nothing */;
+		; /* nothing */
 
 	/* Then the data itself.. */
-	stream.next_in = buf;
-	stream.avail_in = len;
-	ret = deflate(&stream, Z_FINISH);
+	if (paranoid) {
+		unsigned char stablebuf[262144];
+		char *bufptr = buf;
+		unsigned long remainder = len;
+
+		git_SHA1_Init(&ctx);
+		git_SHA1_Update(&ctx, hdr, hdrlen);
+
+		ret = Z_OK;
+		while (remainder) {
+			unsigned long chunklen = remainder;
+
+			if (sizeof(stablebuf) <= chunklen)
+				chunklen = sizeof(stablebuf);
+			memcpy(stablebuf, bufptr, chunklen);
+			git_SHA1_Update(&ctx, stablebuf, chunklen);
+			stream.next_in = stablebuf;
+			stream.avail_in = chunklen;
+			do {
+				ret = deflate(&stream, Z_NO_FLUSH);
+			} while (ret == Z_OK);
+			bufptr += chunklen;
+			remainder -= chunklen;
+		}
+		if (ret != Z_STREAM_END)
+			ret = deflate(&stream, Z_FINISH);
+	} else {
+		stream.next_in = buf;
+		stream.avail_in = len;
+		ret = deflate(&stream, Z_FINISH);
+	}
+
 	if (ret != Z_STREAM_END)
 		die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
 
@@ -2327,6 +2358,12 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 
 	size = stream.total_out;
 
+	if (paranoid) {
+		unsigned char paranoid_sha1[20];
+		git_SHA1_Final(paranoid_sha1, &ctx);
+		if (hashcmp(paranoid_sha1, sha1))
+			die("hashed file is volatile");
+	}
 	if (write_buffer(fd, compressed, size) < 0)
 		die("unable to write sha1 file");
 	close_sha1_file(fd);
@@ -2344,7 +2381,7 @@ static int write_loose_object(const unsigned char *sha1, char *hdr, int hdrlen,
 	return move_temp_to_file(tmpfile, filename);
 }
 
-int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
+static int write_sha1_file_paranoid(void *buf, unsigned long len, const char *type, unsigned char *returnsha1, int paranoid)
 {
 	unsigned char sha1[20];
 	char hdr[32];
@@ -2358,7 +2395,12 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha
 		hashcpy(returnsha1, sha1);
 	if (has_sha1_file(sha1))
 		return 0;
-	return write_loose_object(sha1, hdr, hdrlen, buf, len, 0);
+	return write_loose_object(sha1, hdr, hdrlen, buf, len, 0, paranoid);
+}
+
+int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1)
+{
+	return write_sha1_file_paranoid(buf, len, type, returnsha1, 0);
 }
 
 int force_object_loose(const unsigned char *sha1, time_t mtime)
@@ -2376,7 +2418,7 @@ int force_object_loose(const unsigned char *sha1, time_t mtime)
 	if (!buf)
 		return error("cannot read sha1_file for %s", sha1_to_hex(sha1));
 	hdrlen = sprintf(hdr, "%s %lu", typename(type), len) + 1;
-	ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime);
+	ret = write_loose_object(sha1, hdr, hdrlen, buf, len, mtime, 0);
 	free(buf);
 
 	return ret;
@@ -2405,10 +2447,15 @@ int has_sha1_file(const unsigned char *sha1)
 	return has_loose_object(sha1);
 }
 
+#define INDEX_MEM_WRITE_OBJECT  01
+#define INDEX_MEM_PARANOID      02
+
 static int index_mem(unsigned char *sha1, void *buf, size_t size,
-		     int write_object, enum object_type type, const char *path)
+		     enum object_type type, const char *path, int flag)
 {
 	int ret, re_allocated = 0;
+	int write_object = flag & INDEX_MEM_WRITE_OBJECT;
+	int paranoid = flag & INDEX_MEM_PARANOID;
 
 	if (!type)
 		type = OBJ_BLOB;
@@ -2426,9 +2473,11 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	}
 
 	if (write_object)
-		ret = write_sha1_file(buf, size, typename(type), sha1);
+		ret = write_sha1_file_paranoid(buf, size, typename(type),
+					       sha1, paranoid);
 	else
 		ret = hash_sha1_file(buf, size, typename(type), sha1);
+
 	if (re_allocated)
 		free(buf);
 	return ret;
@@ -2437,23 +2486,25 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	     enum object_type type, const char *path)
 {
-	int ret;
+	int ret, flag;
 	size_t size = xsize_t(st->st_size);
 
+	flag = write_object ? INDEX_MEM_WRITE_OBJECT : 0;
 	if (!S_ISREG(st->st_mode)) {
 		struct strbuf sbuf = STRBUF_INIT;
 		if (strbuf_read(&sbuf, fd, 4096) >= 0)
-			ret = index_mem(sha1, sbuf.buf, sbuf.len, write_object,
-					type, path);
+			ret = index_mem(sha1, sbuf.buf, sbuf.len,
+					type, path, flag);
 		else
 			ret = -1;
 		strbuf_release(&sbuf);
 	} else if (size) {
 		void *buf = xmmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
-		ret = index_mem(sha1, buf, size, write_object, type, path);
+		flag |= INDEX_MEM_PARANOID;
+		ret = index_mem(sha1, buf, size, type, path, flag);
 		munmap(buf, size);
 	} else
-		ret = index_mem(sha1, NULL, size, write_object, type, path);
+		ret = index_mem(sha1, NULL, size, type, path, flag);
 	close(fd);
 	return ret;
 }
-- 
1.7.0.81.g58679

  parent reply	other threads:[~2010-02-18  1:17 UTC|newest]

Thread overview: 84+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20100211234753.22574.48799.reportbug@gibbs.hungrycats.org>
2010-02-12  0:27 ` Bug#569505: git-core: 'git add' corrupts repository if the working directory is modified as it runs Jonathan Nieder
2010-02-12  1:23   ` Zygo Blaxell
2010-02-13 12:12     ` Jonathan Nieder
2010-02-13 13:39       ` Ilari Liusvaara
2010-02-13 14:39         ` Thomas Rast
2010-02-13 16:29           ` Ilari Liusvaara
2010-02-13 22:09             ` Dmitry Potapov
2010-02-13 22:37               ` Zygo Blaxell
2010-02-14  1:18                 ` [PATCH] don't use mmap() to hash files Dmitry Potapov
2010-02-14  1:37                   ` Junio C Hamano
2010-02-14  2:18                     ` Dmitry Potapov
2010-02-14  3:14                       ` Junio C Hamano
2010-02-14 11:14                         ` Thomas Rast
2010-02-14 11:46                           ` Junio C Hamano
2010-02-14  1:53                   ` Johannes Schindelin
2010-02-14  2:00                     ` Junio C Hamano
2010-02-14  2:42                     ` Dmitry Potapov
2010-02-14 11:07                       ` Jakub Narebski
2010-02-14 11:55                       ` Paolo Bonzini
2010-02-14 18:10                       ` Johannes Schindelin
2010-02-14 19:06                         ` Dmitry Potapov
2010-02-14 19:22                           ` Johannes Schindelin
2010-02-14 19:28                             ` Johannes Schindelin
2010-02-14 19:56                               ` Dmitry Potapov
2010-02-14 23:52                                 ` Zygo Blaxell
2010-02-15  5:05                                 ` Nicolas Pitre
2010-02-15 12:23                                   ` Dmitry Potapov
2010-02-15  7:48                                 ` Paolo Bonzini
2010-02-15 12:25                                   ` Dmitry Potapov
2010-02-14 19:55                             ` Dmitry Potapov
2010-02-14 23:13                           ` Avery Pennarun
2010-02-15  4:16                             ` Nicolas Pitre
2010-02-15  5:01                               ` Avery Pennarun
2010-02-15  5:48                                 ` Nicolas Pitre
2010-02-15 19:19                                   ` Avery Pennarun
2010-02-15 19:29                                     ` Nicolas Pitre
2010-02-14  3:05                   ` [PATCH v2] " Dmitry Potapov
2010-02-18  1:16                   ` Junio C Hamano [this message]
2010-02-18  1:20                     ` [PATCH] Teach "git add" and friends to be paranoid Junio C Hamano
2010-02-18 15:32                       ` Zygo Blaxell
2010-02-19 17:51                         ` Junio C Hamano
2010-02-18  1:38                     ` Jeff King
2010-02-18  4:55                       ` Nicolas Pitre
2010-02-18  5:36                         ` Junio C Hamano
2010-02-18  7:27                           ` Wincent Colaiuta
2010-02-18 16:18                             ` Zygo Blaxell
2010-02-18 18:12                               ` Jonathan Nieder
2010-02-18 18:35                                 ` Junio C Hamano
2010-02-22 12:59                           ` Paolo Bonzini
2010-02-22 13:33                             ` Dmitry Potapov
2010-02-18 10:14                     ` Thomas Rast
2010-02-18 18:16                       ` Junio C Hamano
2010-02-18 19:58                         ` Nicolas Pitre
2010-02-18 20:11                           ` 16 gig, 350,000 file repository Bill Lear
2010-02-18 20:58                             ` Nicolas Pitre
2010-02-19  9:27                               ` Erik Faye-Lund
2010-02-22 22:20                               ` Bill Lear
2010-02-22 22:31                                 ` Nicolas Pitre
2010-02-18 20:14                           ` [PATCH] Teach "git add" and friends to be paranoid Peter Harris
2010-02-18 20:17                           ` Junio C Hamano
2010-02-18 21:30                             ` Nicolas Pitre
2010-02-19  1:04                               ` Jonathan Nieder
2010-02-19 15:26                                 ` Zygo Blaxell
2010-02-19 17:52                                   ` Junio C Hamano
2010-02-19 19:08                                     ` Zygo Blaxell
2010-02-19  8:28                     ` Dmitry Potapov
2010-02-19 17:52                       ` Junio C Hamano
2010-02-20 19:23                         ` Junio C Hamano
2010-02-21  7:21                           ` Dmitry Potapov
2010-02-21 19:32                             ` Junio C Hamano
2010-02-22  3:35                               ` Dmitry Potapov
2010-02-22  6:59                                 ` Junio C Hamano
2010-02-22 12:25                                   ` Dmitry Potapov
2010-02-22 15:40                                   ` Nicolas Pitre
2010-02-22 16:01                                     ` Dmitry Potapov
2010-02-22 17:31                                     ` Zygo Blaxell
2010-02-22 18:01                                       ` Nicolas Pitre
2010-02-22 19:56                                         ` Junio C Hamano
2010-02-22 20:52                                           ` Nicolas Pitre
2010-02-22 18:05                                       ` Dmitry Potapov
2010-02-22 18:14                                         ` Nicolas Pitre
2010-02-14  1:36   ` mmap with MAP_PRIVATE is useless (was Re: Bug#569505: git-core: 'git add' corrupts repository if the working directory is modified as it runs) Paolo Bonzini
2010-02-14  1:53     ` mmap with MAP_PRIVATE is useless Junio C Hamano
2010-02-14  2:11       ` Paolo Bonzini

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vljer1gyg.fsf_-_@alter.siamese.dyndns.org \
    --to=gitster@pobox.com \
    --cc=dpotapov@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=ilari.liusvaara@elisanet.fi \
    --cc=jrnieder@gmail.com \
    --cc=trast@student.ethz.ch \
    --cc=zblaxell@esightcorp.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).