From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
gitster@pobox.com, git@jeffhostetler.com, peartben@gmail.com,
christian.couder@gmail.com
Subject: [PATCH 12/18] fetch-pack: support excluding large blobs
Date: Fri, 29 Sep 2017 13:11:48 -0700 [thread overview]
Message-ID: <f211093280b422c32cc1b7034130072f35c5ed51.1506714999.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1506714999.git.jonathantanmy@google.com>
In-Reply-To: <cover.1506714999.git.jonathantanmy@google.com>
Teach fetch-pack and upload-pack to support excluding large blobs
through a blob-max-bytes parameter.
Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
Documentation/technical/pack-protocol.txt | 9 ++++++++
Documentation/technical/protocol-capabilities.txt | 7 ++++++
builtin/fetch-pack.c | 11 +++++++++
fetch-pack.c | 11 +++++++++
fetch-pack.h | 1 +
t/t5500-fetch-pack.sh | 27 +++++++++++++++++++++++
upload-pack.c | 16 +++++++++++++-
7 files changed, 81 insertions(+), 1 deletion(-)
diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt
index ed1eae8b8..db0e1150b 100644
--- a/Documentation/technical/pack-protocol.txt
+++ b/Documentation/technical/pack-protocol.txt
@@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line.
upload-request = want-list
*shallow-line
*1depth-request
+ *1blob-max-bytes
flush-pkt
want-list = first-want
@@ -223,10 +224,14 @@ out of what the server said it could do with the first 'want' line.
PKT-LINE("deepen-since" SP timestamp) /
PKT-LINE("deepen-not" SP ref)
+ blob-max-bytes = PKT-LINE("blob-max-bytes" SP magnitude)
+
first-want = PKT-LINE("want" SP obj-id SP capability-list)
additional-want = PKT-LINE("want" SP obj-id)
depth = 1*DIGIT
+
+ magnitude = 1*DIGIT
----
Clients MUST send all the obj-ids it wants from the reference
@@ -249,6 +254,10 @@ complete those commits. Commits whose parents are not received as a
result are defined as shallow and marked as such in the server. This
information is sent back to the client in the next step.
+The client can optionally request a partial packfile that omits blobs
+above a certain size threshold using "blob-max-bytes". Files whose names
+start with ".git" are always included in the packfile, however.
+
Once all the 'want's and 'shallow's (and optional 'deepen') are
transferred, clients MUST send a flush-pkt, to tell the server side
that it is done sending the list.
diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt
index 26dcc6f50..7e878fce5 100644
--- a/Documentation/technical/protocol-capabilities.txt
+++ b/Documentation/technical/protocol-capabilities.txt
@@ -309,3 +309,10 @@ to accept a signed push certificate, and asks the <nonce> to be
included in the push certificate. A send-pack client MUST NOT
send a push-cert packet unless the receive-pack server advertises
this capability.
+
+blob-max-bytes
+--------------
+
+If the upload-pack server advertises this capability, fetch-pack
+may send "blob-max-bytes" to request the server to omit blobs above a
+certain size from the packfile.
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 9a7ebf6e9..116be9bf5 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -4,6 +4,7 @@
#include "remote.h"
#include "connect.h"
#include "sha1-array.h"
+#include "config.h"
static const char fetch_pack_usage[] =
"git fetch-pack [--all] [--stdin] [--quiet | -q] [--keep | -k] [--thin] "
@@ -153,6 +154,16 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
args.no_haves = 1;
continue;
}
+ if (skip_prefix(arg, "--blob-max-bytes=", &arg)) {
+ unsigned long *ptr = xmalloc(sizeof(*ptr));
+ if (!git_parse_ulong(arg, ptr)) {
+ error("Invalid --blob-max-bytes value: %s",
+ arg);
+ usage(fetch_pack_usage);
+ }
+ args.blob_max_bytes = ptr;
+ continue;
+ }
usage(fetch_pack_usage);
}
if (deepen_not.nr)
diff --git a/fetch-pack.c b/fetch-pack.c
index d376c4ef1..19b8e9322 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -26,6 +26,7 @@ static int prefer_ofs_delta = 1;
static int no_done;
static int deepen_since_ok;
static int deepen_not_ok;
+static int blob_max_bytes_ok;
static int fetch_fsck_objects = -1;
static int transfer_fsck_objects = -1;
static int agent_supported;
@@ -407,6 +408,13 @@ static int find_common(struct fetch_pack_args *args,
packet_buf_write(&req_buf, "deepen-not %s", s->string);
}
}
+ if (args->blob_max_bytes) {
+ if (blob_max_bytes_ok)
+ packet_buf_write(&req_buf, "blob-max-bytes %ld",
+ *args->blob_max_bytes);
+ else
+ warning("blob-max-bytes not recognized by server, ignoring");
+ }
packet_buf_flush(&req_buf);
state_len = req_buf.len;
@@ -983,6 +991,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
die(_("Server does not support --shallow-exclude"));
if (!server_supports("deepen-relative") && args->deepen_relative)
die(_("Server does not support --deepen"));
+ if (server_supports("blob-max-bytes"))
+ blob_max_bytes_ok = 1;
if (everything_local(args, &ref, sought, nr_sought)) {
packet_flush(fd[1]);
@@ -1169,6 +1179,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
packet_flush(fd[1]);
die(_("no matching remote head"));
}
+
prepare_shallow_info(&si, shallow);
ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,
&si, pack_lockfile);
diff --git a/fetch-pack.h b/fetch-pack.h
index 84904c348..3743a0ab2 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -12,6 +12,7 @@ struct fetch_pack_args {
int depth;
const char *deepen_since;
const struct string_list *deepen_not;
+ unsigned long *blob_max_bytes;
unsigned deepen_relative:1;
unsigned quiet:1;
unsigned keep_pack:1;
diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh
index 80a1a3239..62e384230 100755
--- a/t/t5500-fetch-pack.sh
+++ b/t/t5500-fetch-pack.sh
@@ -755,4 +755,31 @@ test_expect_success 'fetching deepen' '
)
'
+test_expect_success '--blob-max-bytes' '
+ rm -rf server client &&
+ test_create_repo server &&
+ test_commit -C server one &&
+ test_config -C server uploadpack.advertiseblobmaxbytes 1 &&
+
+ test_create_repo client &&
+ git -C client fetch-pack --blob-max-bytes=0 ../server HEAD &&
+
+ # Ensure that object is not inadvertently fetched
+ test_must_fail git -C client cat-file -e $(git hash-object server/one.t)
+'
+
+test_expect_success '--blob-max-bytes has no effect if support for it is not advertised' '
+ rm -rf server client &&
+ test_create_repo server &&
+ test_commit -C server one &&
+
+ test_create_repo client &&
+ git -C client fetch-pack --blob-max-bytes=0 ../server HEAD 2> err &&
+
+ # Ensure that object is fetched
+ git -C client cat-file -e $(git hash-object server/one.t) &&
+
+ test_i18ngrep "blob-max-bytes not recognized by server" err
+'
+
test_done
diff --git a/upload-pack.c b/upload-pack.c
index 7efff2fbf..484704eb6 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -63,6 +63,8 @@ static int use_sideband;
static int advertise_refs;
static int stateless_rpc;
static const char *pack_objects_hook;
+static int advertise_blob_max_bytes;
+static char *blob_max_bytes;
static void reset_timeout(void)
{
@@ -131,6 +133,8 @@ static void create_pack_file(void)
argv_array_push(&pack_objects.args, "--delta-base-offset");
if (use_include_tag)
argv_array_push(&pack_objects.args, "--include-tag");
+ if (blob_max_bytes)
+ argv_array_push(&pack_objects.args, blob_max_bytes);
pack_objects.in = -1;
pack_objects.out = -1;
@@ -794,6 +798,13 @@ static void receive_needs(void)
deepen_rev_list = 1;
continue;
}
+ if (skip_prefix(line, "blob-max-bytes ", &arg)) {
+ unsigned long s;
+ if (!git_parse_ulong(arg, &s))
+ die("git upload-pack: invalid blob-max-bytes value: %s", line);
+ blob_max_bytes = xstrfmt("--blob-max-bytes=%lu", s);
+ continue;
+ }
if (!skip_prefix(line, "want ", &arg) ||
get_oid_hex(arg, &oid_buf))
die("git upload-pack: protocol error, "
@@ -940,7 +951,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
struct strbuf symref_info = STRBUF_INIT;
format_symref_info(&symref_info, cb_data);
- packet_write_fmt(1, "%s %s%c%s%s%s%s%s agent=%s\n",
+ packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n",
oid_to_hex(oid), refname_nons,
0, capabilities,
(allow_unadvertised_object_request & ALLOW_TIP_SHA1) ?
@@ -949,6 +960,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
" allow-reachable-sha1-in-want" : "",
stateless_rpc ? " no-done" : "",
symref_info.buf,
+ advertise_blob_max_bytes ? " blob-max-bytes" : "",
git_user_agent_sanitized());
strbuf_release(&symref_info);
} else {
@@ -1028,6 +1040,8 @@ static int upload_pack_config(const char *var, const char *value, void *unused)
} else if (current_config_scope() != CONFIG_SCOPE_REPO) {
if (!strcmp("uploadpack.packobjectshook", var))
return git_config_string(&pack_objects_hook, var, value);
+ } else if (!strcmp("uploadpack.advertiseblobmaxbytes", var)) {
+ advertise_blob_max_bytes = git_config_bool(var, value);
}
return parse_hide_refs_config(var, value, "uploadpack");
}
--
2.14.2.822.g60be5d43e6-goog
next prev parent reply other threads:[~2017-09-29 20:12 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-09-29 20:11 [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Jonathan Tan
2017-09-29 20:11 ` [PATCH 01/18] fsck: introduce partialclone extension Jonathan Tan
2017-09-29 20:11 ` [PATCH 02/18] fsck: support refs pointing to promisor objects Jonathan Tan
2017-09-29 20:11 ` [PATCH 03/18] fsck: support referenced " Jonathan Tan
2017-09-29 20:11 ` [PATCH 04/18] fsck: support promisor objects as CLI argument Jonathan Tan
2017-09-29 20:11 ` [PATCH 05/18] index-pack: refactor writing of .keep files Jonathan Tan
2017-09-29 20:11 ` [PATCH 06/18] introduce fetch-object: fetch one promisor object Jonathan Tan
2017-09-29 20:11 ` [PATCH 07/18] sha1_file: support lazily fetching missing objects Jonathan Tan
2017-10-12 14:42 ` Christian Couder
2017-10-12 15:45 ` Christian Couder
2017-09-29 20:11 ` [PATCH 08/18] rev-list: support termination at promisor objects Jonathan Tan
2017-09-29 20:11 ` [PATCH 09/18] gc: do not repack promisor packfiles Jonathan Tan
2017-09-29 20:11 ` [PATCH 10/18] pack-objects: rename want_.* to ignore_.* Jonathan Tan
2017-09-29 20:11 ` [PATCH 11/18] pack-objects: support --blob-max-bytes Jonathan Tan
2017-09-29 20:11 ` Jonathan Tan [this message]
2017-09-29 20:11 ` [PATCH 13/18] fetch: refactor calculation of remote list Jonathan Tan
2017-09-29 20:11 ` [PATCH 14/18] fetch: support excluding large blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 15/18] clone: " Jonathan Tan
2017-09-29 20:11 ` [PATCH 16/18] clone: configure blobmaxbytes in created repos Jonathan Tan
2017-09-29 20:11 ` [PATCH 17/18] unpack-trees: batch fetching of missing blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 18/18] fetch-pack: restore save_commit_buffer after use Jonathan Tan
2017-09-29 21:08 ` [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Johannes Schindelin
2017-10-02 4:23 ` Junio C Hamano
2017-10-03 6:15 ` Christian Couder
2017-10-03 8:50 ` Junio C Hamano
2017-10-03 14:39 ` Jeff Hostetler
2017-10-03 23:42 ` Jonathan Tan
2017-10-04 13:30 ` Jeff Hostetler
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f211093280b422c32cc1b7034130072f35c5ed51.1506714999.git.jonathantanmy@google.com \
--to=jonathantanmy@google.com \
--cc=christian.couder@gmail.com \
--cc=git@jeffhostetler.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=peartben@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).