From: Han Xin <chiyutianyi@gmail.com>
To: Junio C Hamano <gitster@pobox.com>,
Git List <git@vger.kernel.org>, Jeff King <peff@peff.net>,
Jiang Xin <zhiyou.jx@alibaba-inc.com>,
Philip Oakley <philipoakley@iee.email>
Cc: Han Xin <hanxin.hx@alibaba-inc.com>
Subject: [PATCH v3 0/5] unpack large objects in stream
Date: Mon, 22 Nov 2021 11:32:15 +0800 [thread overview]
Message-ID: <20211122033220.32883-1-chiyutianyi@gmail.com> (raw)
In-Reply-To: <20211009082058.41138-1-chiyutianyi@gmail.com>
From: Han Xin <hanxin.hx@alibaba-inc.com>
Although we do not recommend users push large binary files to the git repositories,
it's difficult to prevent them from doing so. Once, we found a problem with a surge
in memory usage on the server. The source of the problem is that a user submitted
a single object with a size of 15GB. Once someone initiates a git push, the git
process will immediately allocate 15G of memory, resulting in an OOM risk.
Through further analysis, we found that when we execute git unpack-objects, in
unpack_non_delta_entry(), "void *buf = get_data(size);" will directly allocate
memory equal to the size of the object. This is quite a scary thing, because the
pre-receive hook has not been executed at this time, and we cannot avoid this by hooks.
I got inspiration from the deflate process of zlib, maybe it would be a good idea
to change unpack-objects to stream deflate.
Changes since v2:
* Rewrite commit messages and make changes suggested by Jiang Xin.
* Remove the commit "object-file.c: add dry_run mode for write_loose_object()" and
use a new commit "unpack-objects.c: add dry_run mode for get_data()" instead.
Han Xin (5):
object-file: refactor write_loose_object() to read buffer from stream
object-file.c: handle undetermined oid in write_loose_object()
object-file.c: read stream in a loop in write_loose_object()
unpack-objects.c: add dry_run mode for get_data()
unpack-objects: unpack_non_delta_entry() read data in a stream
builtin/unpack-objects.c | 92 +++++++++++++++++++++++++--
object-file.c | 98 +++++++++++++++++++++++++----
object-store.h | 9 +++
t/t5590-unpack-non-delta-objects.sh | 76 ++++++++++++++++++++++
4 files changed, 257 insertions(+), 18 deletions(-)
create mode 100755 t/t5590-unpack-non-delta-objects.sh
Range-diff against v2:
1: 01672f50a0 ! 1: 8640b04f6d object-file: refactor write_loose_object() to support inputstream
@@ Metadata
Author: Han Xin <hanxin.hx@alibaba-inc.com>
## Commit message ##
- object-file: refactor write_loose_object() to support inputstream
+ object-file: refactor write_loose_object() to read buffer from stream
- Refactor write_loose_object() to support inputstream, in the same way
- that zlib reading is chunked.
+ We used to call "get_data()" in "unpack_non_delta_entry()" to read the
+ entire contents of a blob object, no matter how big it is. This
+ implementation may consume all the memory and cause OOM.
- Using "in_stream" instead of "void *buf", we needn't to allocate enough
- memory in advance, and only part of the contents will be read when
- called "in_stream.read()".
+ This can be improved by feeding data to "write_loose_object()" in a
+ stream. The input stream is implemented as an interface. In the first
+ step, we make a simple implementation, feeding the entire buffer in the
+ "stream" to "write_loose_object()" as a refactor.
Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
@@ object-file.c: static int create_tmpfile(struct strbuf *tmp, const char *filenam
return fd;
}
-+struct input_data_from_buffer {
-+ const char *buf;
++struct simple_input_stream_data {
++ const void *buf;
+ unsigned long len;
+};
+
-+static const char *read_input_stream_from_buffer(void *data, unsigned long *len)
++static const void *feed_simple_input_stream(struct input_stream *in_stream, unsigned long *len)
+{
-+ struct input_data_from_buffer *input = (struct input_data_from_buffer *)data;
++ struct simple_input_stream_data *data = in_stream->data;
+
-+ if (input->len == 0) {
++ if (data->len == 0) {
+ *len = 0;
+ return NULL;
+ }
-+ *len = input->len;
-+ input->len = 0;
-+ return input->buf;
++ *len = data->len;
++ data->len = 0;
++ return data->buf;
+}
+
static int write_loose_object(const struct object_id *oid, char *hdr,
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
struct object_id parano_oid;
static struct strbuf tmp_file = STRBUF_INIT;
static struct strbuf filename = STRBUF_INIT;
-+ const char *buf;
++ const void *buf;
+ unsigned long len;
loose_object_path(the_repository, &filename, oid);
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
the_hash_algo->update_fn(&c, hdr, hdrlen);
/* Then the data itself.. */
-+ buf = in_stream->read(in_stream->data, &len);
++ buf = in_stream->read(in_stream, &len);
stream.next_in = (void *)buf;
stream.avail_in = len;
do {
@@ object-file.c: int write_object_file_flags(const void *buf, unsigned long len,
char hdr[MAX_HEADER_LEN];
int hdrlen = sizeof(hdr);
+ struct input_stream in_stream = {
-+ .read = read_input_stream_from_buffer,
-+ .data = (void *)&(struct input_data_from_buffer) {
++ .read = feed_simple_input_stream,
++ .data = (void *)&(struct simple_input_stream_data) {
+ .buf = buf,
+ .len = len,
+ },
@@ object-file.c: int hash_object_file_literally(const void *buf, unsigned long len
char *header;
int hdrlen, status = 0;
+ struct input_stream in_stream = {
-+ .read = read_input_stream_from_buffer,
-+ .data = (void *)&(struct input_data_from_buffer) {
++ .read = feed_simple_input_stream,
++ .data = (void *)&(struct simple_input_stream_data) {
+ .buf = buf,
+ .len = len,
+ },
@@ object-file.c: int force_object_loose(const struct object_id *oid, time_t mtime)
char hdr[MAX_HEADER_LEN];
int hdrlen;
int ret;
-+ struct input_data_from_buffer data;
++ struct simple_input_stream_data data;
+ struct input_stream in_stream = {
-+ .read = read_input_stream_from_buffer,
++ .read = feed_simple_input_stream,
+ .data = &data,
+ };
@@ object-store.h: struct object_directory {
};
+struct input_stream {
-+ const char *(*read)(void* data, unsigned long *len);
++ const void *(*read)(struct input_stream *, unsigned long *len);
+ void *data;
+};
+
2: a309b7e391 < -: ---------- object-file.c: add dry_run mode for write_loose_object()
3: b0a5b53710 ! 2: d4a2caf2bd object-file.c: handle nil oid in write_loose_object()
@@ Metadata
Author: Han Xin <hanxin.hx@alibaba-inc.com>
## Commit message ##
- object-file.c: handle nil oid in write_loose_object()
+ object-file.c: handle undetermined oid in write_loose_object()
- When read input stream, oid can't get before reading all, and it will be
- filled after reading.
+ When streaming a large blob object to "write_loose_object()", we have no
+ chance to run "write_object_file_prepare()" to calculate the oid in
+ advance. So we need to handle undetermined oid in function
+ "write_loose_object()".
+
+ In the original implementation, we know the oid and we can write the
+ temporary file in the same directory as the final object, but for an
+ object with an undetermined oid, we don't know the exact directory for
+ the object, so we have to save the temporary file in ".git/objects/"
+ directory instead.
Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
## object-file.c ##
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *hdr,
- const char *buf;
+ const void *buf;
unsigned long len;
- loose_object_path(the_repository, &filename, oid);
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
+ strbuf_reset(&filename);
+ strbuf_addstr(&filename, the_repository->objects->odb->path);
+ strbuf_addch(&filename, '/');
-+ } else
++ } else {
+ loose_object_path(the_repository, &filename, oid);
++ }
- if (!dry_run) {
- fd = create_tmpfile(&tmp_file, filename.buf);
+ fd = create_tmpfile(&tmp_file, filename.buf);
+ if (fd < 0) {
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *hdr,
die(_("deflateEnd on object %s failed (%d)"), oid_to_hex(oid),
ret);
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
die(_("confused by unstable object source data for %s"),
oid_to_hex(oid));
-@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *hdr,
-
close_loose_object(fd);
+ if (is_null_oid(oid)) {
+ int dirlen;
+
-+ /* copy oid */
+ oidcpy((struct object_id *)oid, ¶no_oid);
-+ /* We get the oid now */
+ loose_object_path(the_repository, &filename, oid);
+
++ /* We finally know the object path, and create the missing dir. */
+ dirlen = directory_size(filename.buf);
+ if (dirlen) {
+ struct strbuf dir = STRBUF_INIT;
-+ /*
-+ * Make sure the directory exists; note that the
-+ * contents of the buffer are undefined after mkstemp
-+ * returns an error, so we have to rewrite the whole
-+ * buffer from scratch.
-+ */
-+ strbuf_reset(&dir);
+ strbuf_add(&dir, filename.buf, dirlen - 1);
+ if (mkdir(dir.buf, 0777) && errno != EEXIST)
+ return -1;
++ if (adjust_shared_perm(dir.buf))
++ return -1;
++ strbuf_release(&dir);
+ }
+ }
+
4: 09d438b692 ! 3: 2575900449 object-file.c: read input stream repeatedly in write_loose_object()
@@ Metadata
Author: Han Xin <hanxin.hx@alibaba-inc.com>
## Commit message ##
- object-file.c: read input stream repeatedly in write_loose_object()
+ object-file.c: read stream in a loop in write_loose_object()
- Read input stream repeatedly in write_loose_object() unless reach the
- end, so that we can divide the large blob write into many small blocks.
+ In order to prepare the stream version of "write_loose_object()", read
+ the input stream in a loop in "write_loose_object()", so that we can
+ feed the contents of large blob object to "write_loose_object()" using
+ a small fixed buffer.
+ Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
## object-file.c ##
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *hdr,
static struct strbuf tmp_file = STRBUF_INIT;
static struct strbuf filename = STRBUF_INIT;
- const char *buf;
+ const void *buf;
- unsigned long len;
+ int flush = 0;
@@ object-file.c: static int write_loose_object(const struct object_id *oid, char *
the_hash_algo->update_fn(&c, hdr, hdrlen);
/* Then the data itself.. */
-- buf = in_stream->read(in_stream->data, &len);
+- buf = in_stream->read(in_stream, &len);
- stream.next_in = (void *)buf;
- stream.avail_in = len;
do {
unsigned char *in0 = stream.next_in;
- ret = git_deflate(&stream, Z_FINISH);
+ if (!stream.avail_in) {
-+ if ((buf = in_stream->read(in_stream->data, &stream.avail_in))) {
++ buf = in_stream->read(in_stream, &stream.avail_in);
++ if (buf) {
+ stream.next_in = (void *)buf;
+ in0 = (unsigned char *)buf;
-+ } else
++ } else {
+ flush = Z_FINISH;
++ }
+ }
+ ret = git_deflate(&stream, flush);
the_hash_algo->update_fn(&c, in0, stream.next_in - in0);
- if (!dry_run && write_buffer(fd, compressed, stream.next_out - compressed) < 0)
+ if (write_buffer(fd, compressed, stream.next_out - compressed) < 0)
die(_("unable to write loose object file"));
5: 9fb188d437 < -: ---------- object-store.h: add write_loose_object()
-: ---------- > 4: ca93ecc780 unpack-objects.c: add dry_run mode for get_data()
6: 80468a6fbc ! 5: 39a072ee2a unpack-objects: unpack large object in stream
@@ Metadata
Author: Han Xin <hanxin.hx@alibaba-inc.com>
## Commit message ##
- unpack-objects: unpack large object in stream
+ unpack-objects: unpack_non_delta_entry() read data in a stream
- When calling "unpack_non_delta_entry()", will allocate full memory for
- the whole size of the unpacked object and write the buffer to loose file
- on disk. This may lead to OOM for the git-unpack-objects process when
- unpacking a very large object.
+ We used to call "get_data()" in "unpack_non_delta_entry()" to read the
+ entire contents of a blob object, no matter how big it is. This
+ implementation may consume all the memory and cause OOM.
- In function "unpack_delta_entry()", will also allocate full memory to
- buffer the whole delta, but since there will be no delta for an object
- larger than "core.bigFileThreshold", this issue is moderate.
+ By implementing a zstream version of input_stream interface, we can use
+ a small fixed buffer for "unpack_non_delta_entry()".
- To resolve the OOM issue in "git-unpack-objects", we can unpack large
- object to file in stream, and use "core.bigFileThreshold" to avoid OOM
- limits when called "get_data()".
+ However, unpack non-delta objects from a stream instead of from an entrie
+ buffer will have 10% performance penalty. Therefore, only unpack object
+ larger than the "big_file_threshold" in zstream. See the following
+ benchmarks:
+ $ hyperfine \
+ --prepare 'rm -rf dest.git && git init --bare dest.git' \
+ 'git -C dest.git unpack-objects <binary_320M.pack'
+ Benchmark 1: git -C dest.git unpack-objects <binary_320M.pack
+ Time (mean ± σ): 10.029 s ± 0.270 s [User: 8.265 s, System: 1.522 s]
+ Range (min … max): 9.786 s … 10.603 s 10 runs
+
+ $ hyperfine \
+ --prepare 'rm -rf dest.git && git init --bare dest.git' \
+ 'git -c core.bigFileThreshold=2m -C dest.git unpack-objects <binary_320M.pack'
+ Benchmark 1: git -c core.bigFileThreshold=2m -C dest.git unpack-objects <binary_320M.pack
+ Time (mean ± σ): 10.859 s ± 0.774 s [User: 8.813 s, System: 1.898 s]
+ Range (min … max): 9.884 s … 12.192 s 10 runs
+
+ $ hyperfine \
+ --prepare 'rm -rf dest.git && git init --bare dest.git' \
+ 'git -C dest.git unpack-objects <binary_96M.pack'
+ Benchmark 1: git -C dest.git unpack-objects <binary_96M.pack
+ Time (mean ± σ): 2.678 s ± 0.037 s [User: 2.205 s, System: 0.450 s]
+ Range (min … max): 2.639 s … 2.743 s 10 runs
+
+ $ hyperfine \
+ --prepare 'rm -rf dest.git && git init --bare dest.git' \
+ 'git -c core.bigFileThreshold=2m -C dest.git unpack-objects <binary_96M.pack'
+ Benchmark 1: git -c core.bigFileThreshold=2m -C dest.git unpack-objects <binary_96M.pack
+ Time (mean ± σ): 2.819 s ± 0.124 s [User: 2.216 s, System: 0.564 s]
+ Range (min … max): 2.679 s … 3.125 s 10 runs
+
+ Helped-by: Jiang Xin <zhiyou.jx@alibaba-inc.com>
Signed-off-by: Han Xin <hanxin.hx@alibaba-inc.com>
## builtin/unpack-objects.c ##
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
}
}
-+struct input_data_from_zstream {
++struct input_zstream_data {
+ git_zstream *zstream;
+ unsigned char buf[4096];
+ int status;
+};
+
-+static const char *read_inflate_in_stream(void *data, unsigned long *readlen)
++static const void *feed_input_zstream(struct input_stream *in_stream, unsigned long *readlen)
+{
-+ struct input_data_from_zstream *input = data;
-+ git_zstream *zstream = input->zstream;
++ struct input_zstream_data *data = in_stream->data;
++ git_zstream *zstream = data->zstream;
+ void *in = fill(1);
+
-+ if (!len || input->status == Z_STREAM_END) {
++ if (!len || data->status == Z_STREAM_END) {
+ *readlen = 0;
+ return NULL;
+ }
+
-+ zstream->next_out = input->buf;
-+ zstream->avail_out = sizeof(input->buf);
++ zstream->next_out = data->buf;
++ zstream->avail_out = sizeof(data->buf);
+ zstream->next_in = in;
+ zstream->avail_in = len;
+
-+ input->status = git_inflate(zstream, 0);
++ data->status = git_inflate(zstream, 0);
+ use(len - zstream->avail_in);
-+ *readlen = sizeof(input->buf) - zstream->avail_out;
++ *readlen = sizeof(data->buf) - zstream->avail_out;
+
-+ return (const char *)input->buf;
++ return data->buf;
+}
+
+static void write_stream_blob(unsigned nr, unsigned long size)
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
+ char hdr[32];
+ int hdrlen;
+ git_zstream zstream;
-+ struct input_data_from_zstream data;
++ struct input_zstream_data data;
+ struct input_stream in_stream = {
-+ .read = read_inflate_in_stream,
++ .read = feed_input_zstream,
+ .data = &data,
+ };
+ struct object_id *oid = &obj_list[nr].oid;
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
+ /* Generate the header */
+ hdrlen = xsnprintf(hdr, sizeof(hdr), "%s %"PRIuMAX, type_name(OBJ_BLOB), (uintmax_t)size) + 1;
+
-+ if ((ret = write_loose_object(oid, hdr, hdrlen, &in_stream, dry_run, 0, 0)))
++ if ((ret = write_loose_object(oid, hdr, hdrlen, &in_stream, 0, 0)))
+ die(_("failed to write object in stream %d"), ret);
+
+ if (zstream.total_out != size || data.status != Z_STREAM_END)
@@ builtin/unpack-objects.c: static void added_object(unsigned nr, enum object_type
static void unpack_non_delta_entry(enum object_type type, unsigned long size,
unsigned nr)
{
-- void *buf = get_data(size);
+- void *buf = get_data(size, dry_run);
+ void *buf;
+
+ /* Write large blob in stream without allocating full buffer. */
-+ if (type == OBJ_BLOB && size > big_file_threshold) {
++ if (!dry_run && type == OBJ_BLOB && size > big_file_threshold) {
+ write_stream_blob(nr, size);
+ return;
+ }
-+ buf = get_data(size);
++ buf = get_data(size, dry_run);
if (!dry_run && buf)
write_object(nr, type, buf, size);
else
- ## t/t5590-receive-unpack-objects.sh (new) ##
+ ## object-file.c ##
+@@ object-file.c: static const void *feed_simple_input_stream(struct input_stream *in_stream, unsi
+ return data->buf;
+ }
+
+-static int write_loose_object(const struct object_id *oid, char *hdr,
+- int hdrlen, struct input_stream *in_stream,
+- time_t mtime, unsigned flags)
++int write_loose_object(const struct object_id *oid, char *hdr,
++ int hdrlen, struct input_stream *in_stream,
++ time_t mtime, unsigned flags)
+ {
+ int fd, ret;
+ unsigned char compressed[4096];
+
+ ## object-store.h ##
+@@ object-store.h: int hash_object_file(const struct git_hash_algo *algo, const void *buf,
+ unsigned long len, const char *type,
+ struct object_id *oid);
+
++int write_loose_object(const struct object_id *oid, char *hdr,
++ int hdrlen, struct input_stream *in_stream,
++ time_t mtime, unsigned flags);
++
+ int write_object_file_flags(const void *buf, unsigned long len,
+ const char *type, struct object_id *oid,
+ unsigned flags);
+
+ ## t/t5590-unpack-non-delta-objects.sh (new) ##
@@
+#!/bin/sh
+#
@@ t/t5590-receive-unpack-objects.sh (new)
+ cd .git &&
+ find objects/?? -type f | sort
+ ) >expect &&
-+ git repack -ad
++ PACK=$(echo main | git pack-objects --progress --revs test)
+'
+
+test_expect_success 'setup GIT_ALLOC_LIMIT to 1MB' '
@@ t/t5590-receive-unpack-objects.sh (new)
+ git -C dest.git config receive.unpacklimit 100
+'
+
-+test_expect_success 'fail to push: cannot allocate' '
-+ test_must_fail git push dest.git HEAD 2>err &&
-+ test_i18ngrep "remote: fatal: attempting to allocate" err &&
++test_expect_success 'fail to unpack-objects: cannot allocate' '
++ test_must_fail git -C dest.git unpack-objects <test-$PACK.pack 2>err &&
++ test_i18ngrep "fatal: attempting to allocate" err &&
+ (
+ cd dest.git &&
+ find objects/?? -type f | sort
@@ t/t5590-receive-unpack-objects.sh (new)
+'
+
+test_expect_success 'unpack big object in stream' '
-+ git push dest.git HEAD &&
++ git -C dest.git unpack-objects <test-$PACK.pack &&
+ git -C dest.git fsck &&
+ (
+ cd dest.git &&
@@ t/t5590-receive-unpack-objects.sh (new)
+'
+
+test_expect_success 'setup for unpack-objects dry-run test' '
-+ PACK=$(echo main | git pack-objects --progress --revs test) &&
-+ unset GIT_ALLOC_LIMIT &&
+ git init --bare unpack-test.git
+'
+
-+test_expect_success 'unpack-objects dry-run with large threshold' '
-+ (
-+ cd unpack-test.git &&
-+ git config core.bigFileThreshold 2m &&
-+ git unpack-objects -n <../test-$PACK.pack
-+ ) &&
-+ (
-+ cd unpack-test.git &&
-+ find objects/ -type f
-+ ) >actual &&
-+ test_must_be_empty actual
-+'
-+
-+test_expect_success 'unpack-objects dry-run with small threshold' '
++test_expect_success 'unpack-objects dry-run' '
+ (
+ cd unpack-test.git &&
-+ git config core.bigFileThreshold 1m &&
+ git unpack-objects -n <../test-$PACK.pack
+ ) &&
+ (
--
2.34.0.6.g676eedc724
next prev parent reply other threads:[~2021-11-22 3:35 UTC|newest]
Thread overview: 211+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-10-09 8:20 [PATCH] unpack-objects: unpack large object in stream Han Xin
2021-10-19 7:37 ` Han Xin
2021-10-20 14:42 ` Philip Oakley
2021-10-21 3:42 ` Han Xin
2021-10-21 22:47 ` Philip Oakley
2021-11-03 1:48 ` Han Xin
2021-11-03 10:07 ` Philip Oakley
2021-11-12 9:40 ` [PATCH v2 1/6] object-file: refactor write_loose_object() to support inputstream Han Xin
2021-11-18 4:59 ` Jiang Xin
2021-11-18 6:45 ` Junio C Hamano
2021-11-12 9:40 ` [PATCH v2 2/6] object-file.c: add dry_run mode for write_loose_object() Han Xin
2021-11-18 5:42 ` Jiang Xin
2021-11-12 9:40 ` [PATCH v2 3/6] object-file.c: handle nil oid in write_loose_object() Han Xin
2021-11-18 5:49 ` Jiang Xin
2021-11-12 9:40 ` [PATCH v2 4/6] object-file.c: read input stream repeatedly " Han Xin
2021-11-18 5:56 ` Jiang Xin
2021-11-12 9:40 ` [PATCH v2 5/6] object-store.h: add write_loose_object() Han Xin
2021-11-12 9:40 ` [PATCH v2 6/6] unpack-objects: unpack large object in stream Han Xin
2021-11-18 7:14 ` Jiang Xin
2021-11-22 3:32 ` Han Xin [this message]
2021-11-29 7:01 ` [PATCH v3 0/5] unpack large objects " Han Xin
2021-11-29 19:12 ` Jeff King
2021-11-30 2:57 ` Han Xin
2021-12-03 9:35 ` [PATCH v4 " Han Xin
2021-12-07 16:18 ` Derrick Stolee
2021-12-10 10:34 ` [PATCH v5 0/6] unpack large blobs " Han Xin
2021-12-17 11:26 ` Han Xin
2021-12-21 11:51 ` [PATCH v7 0/5] " Han Xin
2021-12-21 11:51 ` [PATCH v7 1/5] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-21 14:09 ` Ævar Arnfjörð Bjarmason
2021-12-21 14:43 ` René Scharfe
2021-12-21 15:04 ` Ævar Arnfjörð Bjarmason
2021-12-22 11:15 ` Jiang Xin
2021-12-22 11:29 ` Jiang Xin
2021-12-31 3:06 ` Jiang Xin
2021-12-21 11:51 ` [PATCH v7 2/5] object-file API: add a format_object_header() function Han Xin
2021-12-21 14:30 ` René Scharfe
2022-02-01 14:28 ` C99 %z (was: [PATCH v7 2/5] object-file API: add a format_object_header() function) Ævar Arnfjörð Bjarmason
2021-12-31 3:12 ` [PATCH v7 2/5] object-file API: add a format_object_header() function Jiang Xin
2021-12-21 11:51 ` [PATCH v7 3/5] object-file.c: refactor write_loose_object() to reuse in stream version Han Xin
2021-12-21 14:16 ` Ævar Arnfjörð Bjarmason
2021-12-22 12:02 ` Jiang Xin
2021-12-21 11:52 ` [PATCH v7 4/5] object-file.c: add "write_stream_object_file()" to support read in stream Han Xin
2021-12-21 14:20 ` Ævar Arnfjörð Bjarmason
2021-12-21 15:05 ` Ævar Arnfjörð Bjarmason
2021-12-21 11:52 ` [PATCH v7 5/5] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-12-21 15:06 ` Ævar Arnfjörð Bjarmason
2021-12-31 3:19 ` Jiang Xin
2022-01-08 8:54 ` [PATCH v8 0/6] unpack large blobs in stream Han Xin
2022-01-20 11:21 ` [PATCH v9 0/5] " Han Xin
2022-02-01 21:24 ` Ævar Arnfjörð Bjarmason
2022-02-02 8:32 ` Han Xin
2022-02-02 10:59 ` Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 0/6] unpack-objects: support streaming large objects to disk Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 1/6] unpack-objects: low memory footprint for get_data() in dry_run mode Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 2/6] object-file.c: do fsync() and close() before post-write die() Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 3/6] object-file.c: refactor write_loose_object() to several steps Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 4/6] object-file.c: add "stream_loose_object()" to handle large object Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 5/6] core doc: modernize core.bigFileThreshold documentation Ævar Arnfjörð Bjarmason
2022-02-04 14:07 ` [PATCH v10 6/6] unpack-objects: use stream_loose_object() to unpack large objects Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 0/8] unpack-objects: support streaming blobs to disk Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 1/8] unpack-objects: low memory footprint for get_data() in dry_run mode Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 2/8] object-file.c: do fsync() and close() before post-write die() Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 3/8] object-file.c: refactor write_loose_object() to several steps Ævar Arnfjörð Bjarmason
2022-03-19 10:11 ` René Scharfe
2022-03-19 0:23 ` [PATCH v11 4/8] object-file.c: factor out deflate part of write_loose_object() Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 5/8] object-file.c: add "stream_loose_object()" to handle large object Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 6/8] core doc: modernize core.bigFileThreshold documentation Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 7/8] unpack-objects: refactor away unpack_non_delta_entry() Ævar Arnfjörð Bjarmason
2022-03-19 0:23 ` [PATCH v11 8/8] unpack-objects: use stream_loose_object() to unpack large objects Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 0/8] unpack-objects: support streaming blobs to disk Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 1/8] unpack-objects: low memory footprint for get_data() in dry_run mode Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 2/8] object-file.c: do fsync() and close() before post-write die() Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 3/8] object-file.c: refactor write_loose_object() to several steps Ævar Arnfjörð Bjarmason
2022-03-30 7:13 ` Han Xin
2022-03-30 17:34 ` Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 4/8] object-file.c: factor out deflate part of write_loose_object() Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 5/8] object-file.c: add "stream_loose_object()" to handle large object Ævar Arnfjörð Bjarmason
2022-03-31 19:54 ` Neeraj Singh
2022-03-29 13:56 ` [PATCH v12 6/8] core doc: modernize core.bigFileThreshold documentation Ævar Arnfjörð Bjarmason
2022-03-29 13:56 ` [PATCH v12 7/8] unpack-objects: refactor away unpack_non_delta_entry() Ævar Arnfjörð Bjarmason
2022-03-30 19:40 ` René Scharfe
2022-03-31 12:42 ` Ævar Arnfjörð Bjarmason
2022-03-31 16:38 ` René Scharfe
2022-03-29 13:56 ` [PATCH v12 8/8] unpack-objects: use stream_loose_object() to unpack large objects Ævar Arnfjörð Bjarmason
2022-06-04 10:10 ` [PATCH v13 0/7] unpack-objects: support streaming blobs to disk Ævar Arnfjörð Bjarmason
2022-06-04 10:10 ` [PATCH v13 1/7] unpack-objects: low memory footprint for get_data() in dry_run mode Ævar Arnfjörð Bjarmason
2022-06-06 18:35 ` Junio C Hamano
2022-06-09 4:10 ` Han Xin
2022-06-09 18:27 ` Junio C Hamano
2022-06-10 1:50 ` Han Xin
2022-06-10 2:05 ` Ævar Arnfjörð Bjarmason
2022-06-10 12:04 ` Han Xin
2022-06-04 10:10 ` [PATCH v13 2/7] object-file.c: do fsync() and close() before post-write die() Ævar Arnfjörð Bjarmason
2022-06-06 18:45 ` Junio C Hamano
2022-06-04 10:10 ` [PATCH v13 3/7] object-file.c: refactor write_loose_object() to several steps Ævar Arnfjörð Bjarmason
2022-06-04 10:10 ` [PATCH v13 4/7] object-file.c: factor out deflate part of write_loose_object() Ævar Arnfjörð Bjarmason
2022-06-04 10:10 ` [PATCH v13 5/7] object-file.c: add "stream_loose_object()" to handle large object Ævar Arnfjörð Bjarmason
2022-06-06 19:44 ` Junio C Hamano
2022-06-06 20:02 ` Junio C Hamano
2022-06-09 6:04 ` Han Xin
2022-06-09 6:14 ` Han Xin
2022-06-07 19:53 ` Neeraj Singh
2022-06-08 15:34 ` Junio C Hamano
2022-06-09 3:05 ` [RFC PATCH] object-file.c: batched disk flushes for stream_loose_object() Han Xin
2022-06-09 7:35 ` Neeraj Singh
2022-06-09 9:30 ` Johannes Schindelin
2022-06-10 12:55 ` Han Xin
2022-06-04 10:10 ` [PATCH v13 6/7] core doc: modernize core.bigFileThreshold documentation Ævar Arnfjörð Bjarmason
2022-06-06 19:50 ` Junio C Hamano
2022-06-04 10:10 ` [PATCH v13 7/7] unpack-objects: use stream_loose_object() to unpack large objects Ævar Arnfjörð Bjarmason
2022-06-10 14:46 ` [PATCH v14 0/7] unpack-objects: support streaming blobs to disk Han Xin
2022-06-10 14:46 ` [PATCH v14 1/7] unpack-objects: low memory footprint for get_data() in dry_run mode Han Xin
2022-06-10 14:46 ` [PATCH v14 2/7] object-file.c: do fsync() and close() before post-write die() Han Xin
2022-06-10 21:10 ` René Scharfe
2022-06-10 21:33 ` Junio C Hamano
2022-06-11 1:50 ` Han Xin
2022-06-10 14:46 ` [PATCH v14 3/7] object-file.c: refactor write_loose_object() to several steps Han Xin
2022-06-10 14:46 ` [PATCH v14 4/7] object-file.c: factor out deflate part of write_loose_object() Han Xin
2022-06-10 14:46 ` [PATCH v14 5/7] object-file.c: add "stream_loose_object()" to handle large object Han Xin
2022-06-10 14:46 ` [PATCH v14 6/7] core doc: modernize core.bigFileThreshold documentation Han Xin
2022-06-10 21:01 ` Junio C Hamano
2022-06-10 14:46 ` [PATCH v14 7/7] unpack-objects: use stream_loose_object() to unpack large objects Han Xin
2022-06-11 2:44 ` [PATCH v15 0/6] unpack-objects: support streaming blobs to disk Han Xin
2022-06-11 2:44 ` [PATCH v15 1/6] unpack-objects: low memory footprint for get_data() in dry_run mode Han Xin
2022-06-11 2:44 ` [PATCH v15 2/6] object-file.c: refactor write_loose_object() to several steps Han Xin
2022-06-11 2:44 ` [PATCH v15 3/6] object-file.c: factor out deflate part of write_loose_object() Han Xin
2022-06-11 2:44 ` [PATCH v15 4/6] object-file.c: add "stream_loose_object()" to handle large object Han Xin
2022-06-11 2:44 ` [PATCH v15 5/6] core doc: modernize core.bigFileThreshold documentation Han Xin
2022-06-11 2:44 ` [PATCH v15 6/6] unpack-objects: use stream_loose_object() to unpack large objects Han Xin
2022-07-01 2:01 ` Junio C Hamano
2022-05-20 3:05 ` [PATCH 0/1] unpack-objects: low memory footprint for get_data() in dry_run mode Han Xin
2022-05-20 3:05 ` [PATCH 1/1] " Han Xin
2022-01-20 11:21 ` [PATCH v9 1/5] " Han Xin
2022-01-20 11:21 ` [PATCH v9 2/5] object-file.c: refactor write_loose_object() to several steps Han Xin
2022-01-20 11:21 ` [PATCH v9 3/5] object-file.c: add "stream_loose_object()" to handle large object Han Xin
2022-01-20 11:21 ` [PATCH v9 4/5] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2022-01-20 11:21 ` [PATCH v9 5/5] object-file API: add a format_object_header() function Han Xin
2022-01-08 8:54 ` [PATCH v8 1/6] unpack-objects: low memory footprint for get_data() in dry_run mode Han Xin
2022-01-08 12:28 ` René Scharfe
2022-01-11 10:41 ` Han Xin
2022-01-08 8:54 ` [PATCH v8 2/6] object-file.c: refactor write_loose_object() to several steps Han Xin
2022-01-08 12:28 ` René Scharfe
2022-01-11 10:33 ` Han Xin
2022-01-08 8:54 ` [PATCH v8 3/6] object-file.c: remove the slash for directory_size() Han Xin
2022-01-08 17:24 ` René Scharfe
2022-01-11 10:14 ` Han Xin
2022-01-08 8:54 ` [PATCH v8 4/6] object-file.c: add "stream_loose_object()" to handle large object Han Xin
2022-01-08 8:54 ` [PATCH v8 5/6] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2022-01-08 8:54 ` [PATCH v8 6/6] object-file API: add a format_object_header() function Han Xin
2021-12-17 11:26 ` [PATCH v6 1/6] object-file.c: release strbuf in write_loose_object() Han Xin
2021-12-17 19:28 ` René Scharfe
2021-12-18 0:09 ` Junio C Hamano
2021-12-17 11:26 ` [PATCH v6 2/6] object-file.c: refactor object header generation into a function Han Xin
2021-12-20 12:10 ` [RFC PATCH] object-file API: add a format_loose_header() function Ævar Arnfjörð Bjarmason
2021-12-20 12:48 ` Philip Oakley
2021-12-20 22:25 ` Junio C Hamano
2021-12-21 1:42 ` Ævar Arnfjörð Bjarmason
2021-12-21 2:11 ` Junio C Hamano
2021-12-21 2:27 ` Ævar Arnfjörð Bjarmason
2021-12-21 11:43 ` Han Xin
2021-12-17 11:26 ` [PATCH v6 3/6] object-file.c: refactor write_loose_object() to reuse in stream version Han Xin
2021-12-17 11:26 ` [PATCH v6 4/6] object-file.c: make "write_object_file_flags()" to support read in stream Han Xin
2021-12-17 22:52 ` René Scharfe
2021-12-17 11:26 ` [PATCH v6 5/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-17 21:22 ` René Scharfe
2021-12-17 11:26 ` [PATCH v6 6/6] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-12-10 10:34 ` [PATCH v5 1/6] object-file: refactor write_loose_object() to support read from stream Han Xin
2021-12-10 10:34 ` [PATCH v5 2/6] object-file.c: handle undetermined oid in write_loose_object() Han Xin
2021-12-13 7:32 ` Ævar Arnfjörð Bjarmason
2021-12-10 10:34 ` [PATCH v5 3/6] object-file.c: read stream in a loop " Han Xin
2021-12-10 10:34 ` [PATCH v5 4/6] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-10 10:34 ` [PATCH v5 5/6] object-file.c: make "write_object_file_flags()" to support "HASH_STREAM" Han Xin
2021-12-10 10:34 ` [PATCH v5 6/6] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-12-13 8:05 ` Ævar Arnfjörð Bjarmason
2021-12-03 9:35 ` [PATCH v4 1/5] object-file: refactor write_loose_object() to read buffer from stream Han Xin
2021-12-03 13:28 ` Ævar Arnfjörð Bjarmason
2021-12-06 2:07 ` Han Xin
2021-12-03 9:35 ` [PATCH v4 2/5] object-file.c: handle undetermined oid in write_loose_object() Han Xin
2021-12-03 13:21 ` Ævar Arnfjörð Bjarmason
2021-12-06 2:51 ` Han Xin
2021-12-03 13:41 ` Ævar Arnfjörð Bjarmason
2021-12-06 3:12 ` Han Xin
2021-12-03 9:35 ` [PATCH v4 3/5] object-file.c: read stream in a loop " Han Xin
2021-12-03 9:35 ` [PATCH v4 4/5] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-12-03 13:59 ` Ævar Arnfjörð Bjarmason
2021-12-06 3:20 ` Han Xin
2021-12-03 9:35 ` [PATCH v4 5/5] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-12-03 13:07 ` Ævar Arnfjörð Bjarmason
2021-12-07 6:42 ` Han Xin
2021-12-03 13:54 ` Ævar Arnfjörð Bjarmason
2021-12-07 6:17 ` Han Xin
2021-12-03 14:05 ` Ævar Arnfjörð Bjarmason
2021-12-07 6:48 ` Han Xin
2021-11-22 3:32 ` [PATCH v3 1/5] object-file: refactor write_loose_object() to read buffer from stream Han Xin
2021-11-23 23:24 ` Junio C Hamano
2021-11-24 9:00 ` Han Xin
2021-11-22 3:32 ` [PATCH v3 2/5] object-file.c: handle undetermined oid in write_loose_object() Han Xin
2021-11-29 15:10 ` Derrick Stolee
2021-11-29 20:44 ` Junio C Hamano
2021-11-29 22:18 ` Derrick Stolee
2021-11-30 3:23 ` Han Xin
2021-11-22 3:32 ` [PATCH v3 3/5] object-file.c: read stream in a loop " Han Xin
2021-11-22 3:32 ` [PATCH v3 4/5] unpack-objects.c: add dry_run mode for get_data() Han Xin
2021-11-22 3:32 ` [PATCH v3 5/5] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-11-29 17:37 ` Derrick Stolee
2021-11-30 13:49 ` Han Xin
2021-11-30 18:38 ` Derrick Stolee
2021-12-01 20:37 ` "git hyperfine" (was: [PATCH v3 5/5] unpack-objects[...]) Ævar Arnfjörð Bjarmason
2021-12-02 7:33 ` [PATCH v3 5/5] unpack-objects: unpack_non_delta_entry() read data in a stream Han Xin
2021-12-02 13:53 ` Derrick Stolee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211122033220.32883-1-chiyutianyi@gmail.com \
--to=chiyutianyi@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=hanxin.hx@alibaba-inc.com \
--cc=peff@peff.net \
--cc=philipoakley@iee.email \
--cc=zhiyou.jx@alibaba-inc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).