From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH WIP 4/4] index_fd: support indexing large files
Date: Thu, 28 May 2009 15:29:10 +1000 [thread overview]
Message-ID: <1243488550-15357-5-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1243488550-15357-4-git-send-email-pclouds@gmail.com>
This patch is less impressed than the previous one as memory usage is
usually lower. But then systems without proper mmap() would still love it.
TODO: again, file limit
Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
sha1_file.c | 120 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 120 insertions(+), 0 deletions(-)
diff --git a/sha1_file.c b/sha1_file.c
index 2ed06a2..f4f90ab 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -2609,12 +2609,132 @@ static int index_mem(unsigned char *sha1, void *buf, size_t size,
return ret;
}
+static int index_and_write_fd(unsigned char *sha1, int fd, struct stat *st,
+ enum object_type type, const char *path)
+{
+ int fdo, ret;
+ z_stream stream;
+ char *filename;
+ static char tmpfile[PATH_MAX];
+ int hdrlen;
+ int bufin_length = 8192, bufout_length = 8192;
+ char bufin[8192], bufout[8192];
+ int written_out = 0;
+ git_SHA_CTX c;
+
+ fdo = create_tmpfile(tmpfile, sizeof(tmpfile), "foo");
+ if (fdo < 0) {
+ if (errno == EACCES)
+ return error("insufficient permission for adding an object to repository database %s\n", get_object_directory());
+ else
+ return error("unable to create temporary sha1 filename %s: %s\n", tmpfile, strerror(errno));
+ }
+
+ hdrlen = sprintf(bufin, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, bufin, hdrlen);
+
+ /* Set it up */
+ memset(&stream, 0, sizeof(stream));
+ deflateInit(&stream, zlib_compression_level);
+ stream.next_out = (unsigned char *)bufout;
+ stream.avail_out = bufout_length;
+
+ /* First header.. */
+ stream.next_in = (unsigned char *)bufin;
+ stream.avail_in = hdrlen;
+ while (deflate(&stream, 0) == Z_OK)
+ /* nothing */;
+
+ written_out = stream.total_out;
+ write_or_die(fdo, bufout, written_out);
+ stream.next_out = (unsigned char *)bufout;
+ stream.avail_out = bufout_length;
+
+ /* Then the data itself.. */
+ stream.next_in = (unsigned char *)bufin;
+ stream.avail_in = xread(fd, bufin, bufin_length);
+ git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+
+ while ((ret = deflate(&stream, Z_NO_FLUSH)) == Z_OK || ret == Z_BUF_ERROR) {
+ if (stream.total_out > written_out) {
+ write_or_die(fdo, bufout, stream.total_out - written_out);
+ written_out = stream.total_out;
+ stream.next_out = (unsigned char *)bufout;
+ stream.avail_out = bufout_length;
+ }
+ if (stream.avail_in == 0) {
+ stream.next_in = (unsigned char *)bufin;
+ stream.avail_in = xread(fd, bufin, bufin_length);
+ if (!stream.avail_in)
+ break;
+ git_SHA1_Update(&c, stream.next_in, stream.avail_in);
+ }
+ }
+
+ /* Done computing SHA-1 */
+ git_SHA1_Final(sha1, &c);
+
+ /* Make sure everything is flushed out */
+ while ((ret = deflate(&stream, Z_FINISH)) == Z_OK) {
+ write_or_die(fdo, bufout, stream.total_out - written_out);
+ written_out = stream.total_out;
+ stream.next_out = (unsigned char *)bufout;
+ stream.avail_out = bufout_length;
+ }
+
+ if (ret != Z_STREAM_END)
+ die("unable to deflate new object %s (%d)", sha1_to_hex(sha1), ret);
+
+ /* Last piece */
+ if (written_out < stream.total_out)
+ write_or_die(fdo, bufout, stream.total_out - written_out);
+ close_sha1_file(fdo);
+
+ ret = deflateEnd(&stream);
+ if (ret != Z_OK)
+ die("deflateEnd on object %s failed (%d)", sha1_to_hex(sha1), ret);
+
+ /* Now generate proper path from SHA-1 */
+ filename = sha1_file_name(sha1);
+ safe_create_leading_directories_const(filename);
+ return move_temp_to_file(tmpfile, filename);
+}
+
+static int hash_fd(unsigned char *sha1, int fd, struct stat *st,
+ enum object_type type, const char *path)
+{
+ git_SHA_CTX c;
+ char buf[8192];
+ int buflen;
+
+ /* Generate the header */
+ buflen = sprintf(buf, "%s %lu", typename(type), (unsigned long)st->st_size)+1;
+
+ /* Sha1.. */
+ git_SHA1_Init(&c);
+ git_SHA1_Update(&c, buf, buflen);
+ while ((buflen = xread(fd, buf, 8192)) > 0)
+ git_SHA1_Update(&c, buf, buflen);
+ git_SHA1_Final(sha1, &c);
+ return 0;
+}
+
int index_fd(unsigned char *sha1, int fd, struct stat *st, int write_object,
enum object_type type, const char *path)
{
int ret;
size_t size = xsize_t(st->st_size);
+ if (S_ISREG(st->st_mode) && path && !convert_to_git_needed(path, size)) {
+ if (write_object)
+ ret = index_and_write_fd(sha1, fd, st, type, path);
+ else
+ ret = hash_fd(sha1, fd, st, type, path);
+ close(fd);
+ return ret;
+ }
+
if (!S_ISREG(st->st_mode)) {
struct strbuf sbuf = STRBUF_INIT;
if (strbuf_read(&sbuf, fd, 4096) >= 0)
--
1.6.3.1.257.gbd13
next prev parent reply other threads:[~2009-05-28 5:30 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-05-28 5:29 [PATCH WIP 0/4] Special code path for large blobs Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` [PATCH WIP 1/4] convert.c: refactor in order to skip conversion early without looking into file content Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` [PATCH WIP 2/4] sha1_file.c: add streaming interface for reading blobs Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` [PATCH WIP 3/4] write_entry: use streaming interface for checkout large files Nguyễn Thái Ngọc Duy
2009-05-28 5:29 ` Nguyễn Thái Ngọc Duy [this message]
2009-05-28 18:03 ` [PATCH WIP 0/4] Special code path for large blobs Nicolas Pitre
2009-06-02 4:46 ` Nguyen Thai Ngoc Duy
2009-06-02 14:45 ` Shawn O. Pearce
2009-06-02 17:22 ` Nicolas Pitre
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1243488550-15357-5-git-send-email-pclouds@gmail.com \
--to=pclouds@gmail.com \
--cc=git@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).