git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
	gitster@pobox.com, git@jeffhostetler.com, peartben@gmail.com,
	christian.couder@gmail.com
Subject: [PATCH 12/18] fetch-pack: support excluding large blobs
Date: Fri, 29 Sep 2017 13:11:48 -0700
Message-ID: <f211093280b422c32cc1b7034130072f35c5ed51.1506714999.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1506714999.git.jonathantanmy@google.com>

Teach fetch-pack and upload-pack to support excluding large blobs
through a blob-max-bytes parameter.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/technical/pack-protocol.txt         |  9 ++++++++
 Documentation/technical/protocol-capabilities.txt |  7 ++++++
 builtin/fetch-pack.c                              | 11 +++++++++
 fetch-pack.c                                      | 11 +++++++++
 fetch-pack.h                                      |  1 +
 t/t5500-fetch-pack.sh                             | 27 +++++++++++++++++++++++
 upload-pack.c                                     | 16 +++++++++++++-
 7 files changed, 81 insertions(+), 1 deletion(-)

diff --git a/Documentation/technical/pack-protocol.txt b/Documentation/technical/pack-protocol.txt
index ed1eae8b8..db0e1150b 100644
--- a/Documentation/technical/pack-protocol.txt
+++ b/Documentation/technical/pack-protocol.txt
@@ -212,6 +212,7 @@ out of what the server said it could do with the first 'want' line.
   upload-request    =  want-list
 		       *shallow-line
 		       *1depth-request
+		       *1blob-max-bytes
 		       flush-pkt
 
   want-list         =  first-want
@@ -223,10 +224,14 @@ out of what the server said it could do with the first 'want' line.
 		       PKT-LINE("deepen-since" SP timestamp) /
 		       PKT-LINE("deepen-not" SP ref)
 
+  blob-max-bytes    =  PKT-LINE("blob-max-bytes" SP magnitude)
+
   first-want        =  PKT-LINE("want" SP obj-id SP capability-list)
   additional-want   =  PKT-LINE("want" SP obj-id)
 
   depth             =  1*DIGIT
+
+  magnitude         =  1*DIGIT
 ----
 
 Clients MUST send all the obj-ids it wants from the reference
@@ -249,6 +254,10 @@ complete those commits. Commits whose parents are not received as a
 result are defined as shallow and marked as such in the server. This
 information is sent back to the client in the next step.
 
+The client can optionally request a partial packfile that omits blobs
+above a certain size threshold using "blob-max-bytes". Files whose names
+start with ".git" are always included in the packfile, however.
+
 Once all the 'want's and 'shallow's (and optional 'deepen') are
 transferred, clients MUST send a flush-pkt, to tell the server side
 that it is done sending the list.
diff --git a/Documentation/technical/protocol-capabilities.txt b/Documentation/technical/protocol-capabilities.txt
index 26dcc6f50..7e878fce5 100644
--- a/Documentation/technical/protocol-capabilities.txt
+++ b/Documentation/technical/protocol-capabilities.txt
@@ -309,3 +309,10 @@ to accept a signed push certificate, and asks the <nonce> to be
 included in the push certificate.  A send-pack client MUST NOT
 send a push-cert packet unless the receive-pack server advertises
 this capability.
+
+blob-max-bytes
+--------------
+
+If the upload-pack server advertises this capability, fetch-pack
+may send "blob-max-bytes" to request the server to omit blobs above a
+certain size from the packfile.
diff --git a/builtin/fetch-pack.c b/builtin/fetch-pack.c
index 9a7ebf6e9..116be9bf5 100644
--- a/builtin/fetch-pack.c
+++ b/builtin/fetch-pack.c
@@ -4,6 +4,7 @@
 #include "remote.h"
 #include "connect.h"
 #include "sha1-array.h"
+#include "config.h"
 
 static const char fetch_pack_usage[] =
 "git fetch-pack [--all] [--stdin] [--quiet | -q] [--keep | -k] [--thin] "
@@ -153,6 +154,16 @@ int cmd_fetch_pack(int argc, const char **argv, const char *prefix)
 			args.no_haves = 1;
 			continue;
 		}
+		if (skip_prefix(arg, "--blob-max-bytes=", &arg)) {
+			unsigned long *ptr = xmalloc(sizeof(*ptr));
+			if (!git_parse_ulong(arg, ptr)) {
+				error("Invalid --blob-max-bytes value: %s",
+				      arg);
+				usage(fetch_pack_usage);
+			}
+			args.blob_max_bytes = ptr;
+			continue;
+		}
 		usage(fetch_pack_usage);
 	}
 	if (deepen_not.nr)
diff --git a/fetch-pack.c b/fetch-pack.c
index d376c4ef1..19b8e9322 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -26,6 +26,7 @@ static int prefer_ofs_delta = 1;
 static int no_done;
 static int deepen_since_ok;
 static int deepen_not_ok;
+static int blob_max_bytes_ok;
 static int fetch_fsck_objects = -1;
 static int transfer_fsck_objects = -1;
 static int agent_supported;
@@ -407,6 +408,13 @@ static int find_common(struct fetch_pack_args *args,
 			packet_buf_write(&req_buf, "deepen-not %s", s->string);
 		}
 	}
+	if (args->blob_max_bytes) {
+		if (blob_max_bytes_ok)
+			packet_buf_write(&req_buf, "blob-max-bytes %ld",
+					 *args->blob_max_bytes);
+		else
+			warning("blob-max-bytes not recognized by server, ignoring");
+	}
 	packet_buf_flush(&req_buf);
 	state_len = req_buf.len;
 
@@ -983,6 +991,8 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args,
 		die(_("Server does not support --shallow-exclude"));
 	if (!server_supports("deepen-relative") && args->deepen_relative)
 		die(_("Server does not support --deepen"));
+	if (server_supports("blob-max-bytes"))
+		blob_max_bytes_ok = 1;
 
 	if (everything_local(args, &ref, sought, nr_sought)) {
 		packet_flush(fd[1]);
@@ -1169,6 +1179,7 @@ struct ref *fetch_pack(struct fetch_pack_args *args,
 		packet_flush(fd[1]);
 		die(_("no matching remote head"));
 	}
+
 	prepare_shallow_info(&si, shallow);
 	ref_cpy = do_fetch_pack(args, fd, ref, sought, nr_sought,
 				&si, pack_lockfile);
diff --git a/fetch-pack.h b/fetch-pack.h
index 84904c348..3743a0ab2 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -12,6 +12,7 @@ struct fetch_pack_args {
 	int depth;
 	const char *deepen_since;
 	const struct string_list *deepen_not;
+	unsigned long *blob_max_bytes;
 	unsigned deepen_relative:1;
 	unsigned quiet:1;
 	unsigned keep_pack:1;
diff --git a/t/t5500-fetch-pack.sh b/t/t5500-fetch-pack.sh
index 80a1a3239..62e384230 100755
--- a/t/t5500-fetch-pack.sh
+++ b/t/t5500-fetch-pack.sh
@@ -755,4 +755,31 @@ test_expect_success 'fetching deepen' '
 	)
 '
 
+test_expect_success '--blob-max-bytes' '
+	rm -rf server client &&
+	test_create_repo server &&
+	test_commit -C server one &&
+	test_config -C server uploadpack.advertiseblobmaxbytes 1 &&
+
+	test_create_repo client &&
+	git -C client fetch-pack --blob-max-bytes=0 ../server HEAD &&
+
+	# Ensure that object is not inadvertently fetched
+	test_must_fail git -C client cat-file -e $(git hash-object server/one.t)
+'
+
+test_expect_success '--blob-max-bytes has no effect if support for it is not advertised' '
+	rm -rf server client &&
+	test_create_repo server &&
+	test_commit -C server one &&
+
+	test_create_repo client &&
+	git -C client fetch-pack --blob-max-bytes=0 ../server HEAD 2> err &&
+
+	# Ensure that object is fetched
+	git -C client cat-file -e $(git hash-object server/one.t) &&
+
+	test_i18ngrep "blob-max-bytes not recognized by server" err
+'
+
 test_done
diff --git a/upload-pack.c b/upload-pack.c
index 7efff2fbf..484704eb6 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -63,6 +63,8 @@ static int use_sideband;
 static int advertise_refs;
 static int stateless_rpc;
 static const char *pack_objects_hook;
+static int advertise_blob_max_bytes;
+static char *blob_max_bytes;
 
 static void reset_timeout(void)
 {
@@ -131,6 +133,8 @@ static void create_pack_file(void)
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
+	if (blob_max_bytes)
+		argv_array_push(&pack_objects.args, blob_max_bytes);
 
 	pack_objects.in = -1;
 	pack_objects.out = -1;
@@ -794,6 +798,13 @@ static void receive_needs(void)
 			deepen_rev_list = 1;
 			continue;
 		}
+		if (skip_prefix(line, "blob-max-bytes ", &arg)) {
+			unsigned long s;
+			if (!git_parse_ulong(arg, &s))
+				die("git upload-pack: invalid blob-max-bytes value: %s", line);
+			blob_max_bytes = xstrfmt("--blob-max-bytes=%lu", s);
+			continue;
+		}
 		if (!skip_prefix(line, "want ", &arg) ||
 		    get_oid_hex(arg, &oid_buf))
 			die("git upload-pack: protocol error, "
@@ -940,7 +951,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 		struct strbuf symref_info = STRBUF_INIT;
 
 		format_symref_info(&symref_info, cb_data);
-		packet_write_fmt(1, "%s %s%c%s%s%s%s%s agent=%s\n",
+		packet_write_fmt(1, "%s %s%c%s%s%s%s%s%s agent=%s\n",
 			     oid_to_hex(oid), refname_nons,
 			     0, capabilities,
 			     (allow_unadvertised_object_request & ALLOW_TIP_SHA1) ?
@@ -949,6 +960,7 @@ static int send_ref(const char *refname, const struct object_id *oid,
 				     " allow-reachable-sha1-in-want" : "",
 			     stateless_rpc ? " no-done" : "",
 			     symref_info.buf,
+			     advertise_blob_max_bytes ? " blob-max-bytes" : "",
 			     git_user_agent_sanitized());
 		strbuf_release(&symref_info);
 	} else {
@@ -1028,6 +1040,8 @@ static int upload_pack_config(const char *var, const char *value, void *unused)
 	} else if (current_config_scope() != CONFIG_SCOPE_REPO) {
 		if (!strcmp("uploadpack.packobjectshook", var))
 			return git_config_string(&pack_objects_hook, var, value);
+	} else if (!strcmp("uploadpack.advertiseblobmaxbytes", var)) {
+		advertise_blob_max_bytes = git_config_bool(var, value);
 	}
 	return parse_hide_refs_config(var, value, "uploadpack");
 }
-- 
2.14.2.822.g60be5d43e6-goog


  parent reply index

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-29 20:11 [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Jonathan Tan
2017-09-29 20:11 ` [PATCH 01/18] fsck: introduce partialclone extension Jonathan Tan
2017-09-29 20:11 ` [PATCH 02/18] fsck: support refs pointing to promisor objects Jonathan Tan
2017-09-29 20:11 ` [PATCH 03/18] fsck: support referenced " Jonathan Tan
2017-09-29 20:11 ` [PATCH 04/18] fsck: support promisor objects as CLI argument Jonathan Tan
2017-09-29 20:11 ` [PATCH 05/18] index-pack: refactor writing of .keep files Jonathan Tan
2017-09-29 20:11 ` [PATCH 06/18] introduce fetch-object: fetch one promisor object Jonathan Tan
2017-09-29 20:11 ` [PATCH 07/18] sha1_file: support lazily fetching missing objects Jonathan Tan
2017-10-12 14:42   ` Christian Couder
2017-10-12 15:45     ` Christian Couder
2017-09-29 20:11 ` [PATCH 08/18] rev-list: support termination at promisor objects Jonathan Tan
2017-09-29 20:11 ` [PATCH 09/18] gc: do not repack promisor packfiles Jonathan Tan
2017-09-29 20:11 ` [PATCH 10/18] pack-objects: rename want_.* to ignore_.* Jonathan Tan
2017-09-29 20:11 ` [PATCH 11/18] pack-objects: support --blob-max-bytes Jonathan Tan
2017-09-29 20:11 ` Jonathan Tan [this message]
2017-09-29 20:11 ` [PATCH 13/18] fetch: refactor calculation of remote list Jonathan Tan
2017-09-29 20:11 ` [PATCH 14/18] fetch: support excluding large blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 15/18] clone: " Jonathan Tan
2017-09-29 20:11 ` [PATCH 16/18] clone: configure blobmaxbytes in created repos Jonathan Tan
2017-09-29 20:11 ` [PATCH 17/18] unpack-trees: batch fetching of missing blobs Jonathan Tan
2017-09-29 20:11 ` [PATCH 18/18] fetch-pack: restore save_commit_buffer after use Jonathan Tan
2017-09-29 21:08 ` [PATCH 00/18] Partial clone (from clone to lazy fetch in 18 patches) Johannes Schindelin
2017-10-02  4:23 ` Junio C Hamano
2017-10-03  6:15 ` Christian Couder
2017-10-03  8:50   ` Junio C Hamano
2017-10-03 14:39     ` Jeff Hostetler
2017-10-03 23:42       ` Jonathan Tan
2017-10-04 13:30         ` Jeff Hostetler

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f211093280b422c32cc1b7034130072f35c5ed51.1506714999.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=christian.couder@gmail.com \
    --cc=git@jeffhostetler.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peartben@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox