git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH WIP 4/4] index_fd: support indexing large files
Date: Thu, 28 May 2009 15:29:10 +1000	[thread overview]
Message-ID: <1243488550-15357-5-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1243488550-15357-4-git-send-email-pclouds@gmail.com>

This patch is less impressed than the previous one as memory usage is
usually lower. But then systems without proper mmap() would still love it.

TODO: again, file limit

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 sha1_file.c |  120 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 120 insertions(+), 0 deletions(-)

diff --git a/sha1_file.c b/sha1_file.c
index 2ed06a2..f4f90ab 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2609,12 +2609,132 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
 	return ret;
 }
 
+static int index_and_write_fd(unsigned char *sha1, int fd, struct stat *st,
+			      enum object_type type, const char *path)
+{
+	int fdo, ret;
+	z_stream stream;
+	char *filename;
+	static char tmpfile[PATH_MAX];
+	int hdrlen;
+	int bufin_length = 8192, bufout_length = 8192;
+	char bufin[8192], bufout[8192];
+	int written_out = 0;
+	git_SHA_CTX c;
+
+	fdo = create_tmpfile(tmpfile, sizeof(tmpfile), "foo");
+	if (fdo < 0) {
+		if (errno == EACCES)
+			return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
+		else
+			return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
+	}
+
+	hdrlen = sprintf(bufin, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, bufin, hdrlen);
+
+	/* Set it up */
+	memset(&stream, 0, sizeof(stream));
+	deflateInit(&stream, zlib_compression_level);
+	stream.next_out = (unsigned char *)bufout;
+	stream.avail_out = bufout_length;
+
+	/* First header.. */
+	stream.next_in = (unsigned char *)bufin;
+	stream.avail_in = hdrlen;
+	while (deflate(&stream, 0) == Z_OK)
+		/* nothing */;
+
+	written_out = stream.total_out;
+	write_or_die(fdo, bufout, written_out);
+	stream.next_out = (unsigned char *)bufout;
+	stream.avail_out = bufout_length;
+
+	/* Then the data itself.. */
+	stream.next_in = (unsigned char *)bufin;
+	stream.avail_in = xread(fd, bufin, bufin_length);
+	git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+
+	while ((ret = deflate(&stream, Z_NO_FLUSH)) == Z_OK || ret == Z_BUF_ERROR) {
+		if (stream.total_out > written_out) {
+			write_or_die(fdo, bufout, stream.total_out - written_out);
+			written_out = stream.total_out;
+			stream.next_out = (unsigned char *)bufout;
+			stream.avail_out = bufout_length;
+		}
+		if (stream.avail_in == 0) {
+			stream.next_in = (unsigned char *)bufin;
+			stream.avail_in = xread(fd, bufin, bufin_length);
+			if (!stream.avail_in)
+				break;
+			git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+		}
+	}
+
+	/* Done computing SHA-1 */
+	git_SHA1_Final(sha1, &c);
+
+	/* Make sure everything is flushed out */
+	while ((ret = deflate(&stream, Z_FINISH)) == Z_OK) {
+		write_or_die(fdo, bufout, stream.total_out - written_out);
+		written_out = stream.total_out;
+		stream.next_out = (unsigned char *)bufout;
+		stream.avail_out = bufout_length;
+	}
+
+	if (ret != Z_STREAM_END)
+		die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
+
+	/* Last piece */
+	if (written_out < stream.total_out)
+		write_or_die(fdo, bufout, stream.total_out - written_out);
+	close_sha1_file(fdo);
+
+	ret = deflateEnd(&stream);
+	if (ret != Z_OK)
+		die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
+
+	/* Now generate proper path from SHA-1 */
+	filename = sha1_file_name(sha1);
+	safe_create_leading_directories_const(filename);
+	return move_temp_to_file(tmpfile, filename);
+}
+
+static int hash_fd(unsigned char *sha1, int fd, struct stat *st,
+		   enum object_type type, const char *path)
+{
+	git_SHA_CTX c;
+	char buf[8192];
+	int buflen;
+
+	/* Generate the header */
+	buflen = sprintf(buf, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+
+	/* Sha1.. */
+	git_SHA1_Init(&c);
+	git_SHA1_Update(&c, buf, buflen);
+	while ((buflen = xread(fd, buf, 8192)) > 0)
+		git_SHA1_Update(&c, buf, buflen);
+	git_SHA1_Final(sha1, &c);
+	return 0;
+}
+
 int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
 	     enum object_type type, const char *path)
 {
 	int ret;
 	size_t size = xsize_t(st->st_size);
 
+	if (S_ISREG(st->st_mode) && path && !convert_to_git_needed(path, size)) {
+		if (write_object)
+			ret = index_and_write_fd(sha1, fd, st, type, path);
+		else
+			ret = hash_fd(sha1, fd, st, type, path);
+		close(fd);
+		return ret;
+	}
+
 	if (!S_ISREG(st->st_mode)) {
 		struct strbuf sbuf = STRBUF_INIT;
 		if (strbuf_read(&sbuf, fd, 4096) >= 0)
-- 
1.6.3.1.257.gbd13

  reply	other threads:[~2009-05-28  5:30 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-05-28  5:29 [PATCH WIP 0/4] Special code path for large blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29 ` [PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content Nguyễn Thái Ngọc Duy
2009-05-28  5:29   ` [PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs Nguyễn Thái Ngọc Duy
2009-05-28  5:29     ` [PATCH WIP 3/4] write_entry: use streaming interface for checkout large files Nguyễn Thái Ngọc Duy
2009-05-28  5:29       ` Nguyễn Thái Ngọc Duy [this message]
2009-05-28 18:03 ` [PATCH WIP 0/4] Special code path for large blobs Nicolas Pitre
2009-06-02  4:46   ` Nguyen Thai Ngoc Duy
2009-06-02 14:45     ` Shawn O. Pearce
2009-06-02 17:22       ` Nicolas Pitre

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1243488550-15357-5-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).