git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] fetch-pack: support negotiation tip whitelist
@ 2018-06-25 19:37 Jonathan Tan
  2018-06-26 17:53 ` Jonathan Nieder
                   ` (4 more replies)
  0 siblings, 5 replies; 14+ messages in thread
From: Jonathan Tan @ 2018-06-25 19:37 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan

During negotiation, fetch-pack eventually reports as "have" lines all
commits reachable from all refs. Allow the user to restrict the commits
sent in this way by providing a whitelist of tips; only the tips
themselves and their ancestors will be sent.

This feature is only supported for protocols that support connect or
stateless-connect (such as HTTP with protocol v2).

This will speed up negotiation when the repository has multiple
relatively independent branches (for example, when a repository
interacts with multiple repositories, such as with linux-next [1] and
torvalds/linux [2]), and the user knows which local branch is likely to
have commits in common with the upstream branch they are fetching.

[1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
[2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
This is based on jt/fetch-pack-negotiator, but if that branch is
undesirable for whatever reason, I can port this to master.
---
 builtin/fetch.c    | 21 ++++++++++++++++++
 fetch-pack.c       | 19 ++++++++++++++--
 fetch-pack.h       |  7 ++++++
 t/t5510-fetch.sh   | 55 ++++++++++++++++++++++++++++++++++++++++++++++
 transport-helper.c |  3 +++
 transport.c        |  1 +
 transport.h        | 10 +++++++++
 7 files changed, 114 insertions(+), 2 deletions(-)

diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..12daec0f3 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -63,6 +63,7 @@ static int shown_url = 0;
 static struct refspec refmap = REFSPEC_INIT_FETCH;
 static struct list_objects_filter_options filter_options;
 static struct string_list server_options = STRING_LIST_INIT_DUP;
+static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
 			TRANSPORT_FAMILY_IPV4),
 	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
 			TRANSPORT_FAMILY_IPV6),
+	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
+			N_("report that we have only objects reachable from this object")),
 	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
 	OPT_END()
 };
@@ -1075,6 +1078,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 			   filter_options.filter_spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
 	}
+	if (negotiation_tip.nr) {
+		struct oid_array *oids;
+		if (transport->smart_options) {
+			int i;
+			oids = xcalloc(1, sizeof(*oids));
+			for (i = 0; i < negotiation_tip.nr; i++) {
+				struct object_id oid;
+				if (get_oid(negotiation_tip.items[i].string,
+					    &oid))
+					die("%s is not a valid object",
+					    negotiation_tip.items[i].string);
+				oid_array_append(oids, &oid);
+			}
+			transport->smart_options->negotiation_tips = oids;
+		} else {
+			warning("Ignoring --negotiation-tip because the protocol does not support it.");
+		}
+	}
 	return transport;
 }
 
diff --git a/fetch-pack.c b/fetch-pack.c
index ba12085c4..c66bd49bd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -213,6 +213,21 @@ static int next_flush(int stateless_rpc, int count)
 	return count;
 }
 
+static void mark_tips(struct fetch_negotiator *negotiator,
+		      const struct oid_array *negotiation_tips)
+{
+	int i;
+	if (!negotiation_tips) {
+		for_each_ref(rev_list_insert_ref_oid, negotiator);
+		return;
+	}
+
+	for (i = 0; i < negotiation_tips->nr; i++)
+		rev_list_insert_ref(negotiator, NULL,
+				    &negotiation_tips->oid[i]);
+	return;
+}
+
 static int find_common(struct fetch_negotiator *negotiator,
 		       struct fetch_pack_args *args,
 		       int fd[2], struct object_id *result_oid,
@@ -230,7 +245,7 @@ static int find_common(struct fetch_negotiator *negotiator,
 	if (args->stateless_rpc && multi_ack == 1)
 		die(_("--stateless-rpc requires multi_ack_detailed"));
 
-	for_each_ref(rev_list_insert_ref_oid, negotiator);
+	mark_tips(negotiator, args->negotiation_tips);
 	for_each_cached_alternate(negotiator, insert_one_alternate_object);
 
 	fetching = 0;
@@ -1295,7 +1310,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			else
 				state = FETCH_SEND_REQUEST;
 
-			for_each_ref(rev_list_insert_ref_oid, &negotiator);
+			mark_tips(&negotiator, args->negotiation_tips);
 			for_each_cached_alternate(&negotiator,
 						  insert_one_alternate_object);
 			break;
diff --git a/fetch-pack.h b/fetch-pack.h
index bb45a366a..1859ee927 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,13 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+
+	/*
+	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
+	 * lines only with these tips and their ancestors.
+	 */
+	const struct oid_array *negotiation_tips;
+
 	unsigned deepen_relative:1;
 	unsigned quiet:1;
 	unsigned keep_pack:1;
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index e402aee6a..ea1b5e53c 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -865,4 +865,59 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
 	test_cmp expect actual
 '
 
+negotiator_tip () {
+	SERVER="$1"
+	URL="$2"
+	USE_PROTOCOL_V2="$3"
+
+	rm -rf "$SERVER" client &&
+	git init "$SERVER" &&
+	test_commit -C "$SERVER" alpha_1 &&
+	test_commit -C "$SERVER" alpha_2 &&
+	git -C "$SERVER" checkout --orphan beta &&
+	test_commit -C "$SERVER" beta_1 &&
+	test_commit -C "$SERVER" beta_2 &&
+
+	git clone "$URL" client &&
+
+	if [ "$USE_PROTOCOL_V2" -eq 1 ]
+	then
+		git -C "$SERVER" config protocol.version 2
+		git -C client config protocol.version 2
+	fi &&
+
+	test_commit -C "$SERVER" beta_s &&
+	git -C "$SERVER" checkout master &&
+	test_commit -C "$SERVER" alpha_s &&
+	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2 &&
+
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+
+	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
+	ALPHA_1=$(git -C client rev-parse alpha_1) &&
+	grep "fetch> have $ALPHA_1" trace &&
+	BETA_1=$(git -C client rev-parse beta_1) &&
+	grep "fetch> have $BETA_1" trace &&
+	ALPHA_2=$(git -C client rev-parse alpha_2) &&
+	! grep "fetch> have $ALPHA_2" trace &&
+	BETA_2=$(git -C client rev-parse beta_2) &&
+	! grep "fetch> have $BETA_2" trace
+}
+
+test_expect_success '--negotiator-tip limits "have" lines sent' '
+	negotiator_tip server server 0
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' '
+	negotiator_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
+		"$HTTPD_URL/smart/server" 1
+'
+
+stop_httpd
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index 1f8ff7e94..ad8f7c772 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
 			transport, "filter",
 			data->transport_options.filter_options.filter_spec);
 
+	if (data->transport_options.negotiation_tips)
+		warning("Ignoring --negotiation-tip because the protocol does not support it.");
+
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
diff --git a/transport.c b/transport.c
index a32da30de..9f10f8ad9 100644
--- a/transport.c
+++ b/transport.c
@@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.filter_options = data->options.filter_options;
 	args.stateless_rpc = transport->stateless_rpc;
 	args.server_options = transport->server_options;
+	args.negotiation_tips = data->options.negotiation_tips;
 
 	if (!data->got_remote_heads)
 		refs_tmp = get_refs_via_connect(transport, 0, NULL);
diff --git a/transport.h b/transport.h
index 7792b0858..d31be5be6 100644
--- a/transport.h
+++ b/transport.h
@@ -25,6 +25,16 @@ struct git_transport_options {
 	const char *receivepack;
 	struct push_cas_option *cas;
 	struct list_objects_filter_options filter_options;
+
+	/*
+	 * This is only used during fetch. See the documentation of
+	 * negotiation_tips in struct fetch_pack_args.
+	 *
+	 * This field is only supported by transports that support connect or
+	 * stateless_connect. Set this field directly instead of using
+	 * transport_set_option().
+	 */
+	struct oid_array *negotiation_tips;
 };
 
 enum transport_family {
-- 
2.18.0.rc2.347.g0da03f3a46.dirty


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH] fetch-pack: support negotiation tip whitelist
  2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
@ 2018-06-26 17:53 ` Jonathan Nieder
  2018-06-26 18:59 ` Junio C Hamano
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Jonathan Nieder @ 2018-06-26 17:53 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, Greg Thelen, Michel Lespinasse

Hi,

Jonathan Tan wrote:

> During negotiation, fetch-pack eventually reports as "have" lines all
> commits reachable from all refs. Allow the user to restrict the commits
> sent in this way by providing a whitelist of tips; only the tips
> themselves and their ancestors will be sent.
>
> This feature is only supported for protocols that support connect or
> stateless-connect (such as HTTP with protocol v2).
>
> This will speed up negotiation when the repository has multiple
> relatively independent branches (for example, when a repository
> interacts with multiple repositories, such as with linux-next [1] and
> torvalds/linux [2]), and the user knows which local branch is likely to
> have commits in common with the upstream branch they are fetching.
>
> [1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
> [2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---

Very neat.  Thanks to Greg Thelen and Michel Lespinasse (cc-ed) for
this suggestion.

> This is based on jt/fetch-pack-negotiator, but if that branch is
> undesirable for whatever reason, I can port this to master.
>
>  builtin/fetch.c    | 21 ++++++++++++++++++
>  fetch-pack.c       | 19 ++++++++++++++--
>  fetch-pack.h       |  7 ++++++
>  t/t5510-fetch.sh   | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>  transport-helper.c |  3 +++
>  transport.c        |  1 +
>  transport.h        | 10 +++++++++
>  7 files changed, 114 insertions(+), 2 deletions(-)

Small nit: could this be documented in Documentation/fetch.txt as well?

Thanks,
Jonathan

Patch left unsnipped for reference.

> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index ea5b9669a..12daec0f3 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -63,6 +63,7 @@ static int shown_url = 0;
>  static struct refspec refmap = REFSPEC_INIT_FETCH;
>  static struct list_objects_filter_options filter_options;
>  static struct string_list server_options = STRING_LIST_INIT_DUP;
> +static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
>  			TRANSPORT_FAMILY_IPV4),
>  	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
>  			TRANSPORT_FAMILY_IPV6),
> +	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
> +			N_("report that we have only objects reachable from this object")),
>  	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
>  	OPT_END()
>  };
> @@ -1075,6 +1078,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
>  			   filter_options.filter_spec);
>  		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
>  	}
> +	if (negotiation_tip.nr) {
> +		struct oid_array *oids;
> +		if (transport->smart_options) {
> +			int i;
> +			oids = xcalloc(1, sizeof(*oids));
> +			for (i = 0; i < negotiation_tip.nr; i++) {
> +				struct object_id oid;
> +				if (get_oid(negotiation_tip.items[i].string,
> +					    &oid))
> +					die("%s is not a valid object",
> +					    negotiation_tip.items[i].string);
> +				oid_array_append(oids, &oid);
> +			}
> +			transport->smart_options->negotiation_tips = oids;
> +		} else {
> +			warning("Ignoring --negotiation-tip because the protocol does not support it.");
> +		}
> +	}
>  	return transport;
>  }
>  
> diff --git a/fetch-pack.c b/fetch-pack.c
> index ba12085c4..c66bd49bd 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -213,6 +213,21 @@ static int next_flush(int stateless_rpc, int count)
>  	return count;
>  }
>  
> +static void mark_tips(struct fetch_negotiator *negotiator,
> +		      const struct oid_array *negotiation_tips)
> +{
> +	int i;
> +	if (!negotiation_tips) {
> +		for_each_ref(rev_list_insert_ref_oid, negotiator);
> +		return;
> +	}
> +
> +	for (i = 0; i < negotiation_tips->nr; i++)
> +		rev_list_insert_ref(negotiator, NULL,
> +				    &negotiation_tips->oid[i]);
> +	return;
> +}
> +
>  static int find_common(struct fetch_negotiator *negotiator,
>  		       struct fetch_pack_args *args,
>  		       int fd[2], struct object_id *result_oid,
> @@ -230,7 +245,7 @@ static int find_common(struct fetch_negotiator *negotiator,
>  	if (args->stateless_rpc && multi_ack == 1)
>  		die(_("--stateless-rpc requires multi_ack_detailed"));
>  
> -	for_each_ref(rev_list_insert_ref_oid, negotiator);
> +	mark_tips(negotiator, args->negotiation_tips);
>  	for_each_cached_alternate(negotiator, insert_one_alternate_object);
>  
>  	fetching = 0;
> @@ -1295,7 +1310,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			else
>  				state = FETCH_SEND_REQUEST;
>  
> -			for_each_ref(rev_list_insert_ref_oid, &negotiator);
> +			mark_tips(&negotiator, args->negotiation_tips);
>  			for_each_cached_alternate(&negotiator,
>  						  insert_one_alternate_object);
>  			break;
> diff --git a/fetch-pack.h b/fetch-pack.h
> index bb45a366a..1859ee927 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -16,6 +16,13 @@ struct fetch_pack_args {
>  	const struct string_list *deepen_not;
>  	struct list_objects_filter_options filter_options;
>  	const struct string_list *server_options;
> +
> +	/*
> +	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
> +	 * lines only with these tips and their ancestors.
> +	 */
> +	const struct oid_array *negotiation_tips;
> +
>  	unsigned deepen_relative:1;
>  	unsigned quiet:1;
>  	unsigned keep_pack:1;
> diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
> index e402aee6a..ea1b5e53c 100755
> --- a/t/t5510-fetch.sh
> +++ b/t/t5510-fetch.sh
> @@ -865,4 +865,59 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
>  	test_cmp expect actual
>  '
>  
> +negotiator_tip () {
> +	SERVER="$1"
> +	URL="$2"
> +	USE_PROTOCOL_V2="$3"
> +
> +	rm -rf "$SERVER" client &&
> +	git init "$SERVER" &&
> +	test_commit -C "$SERVER" alpha_1 &&
> +	test_commit -C "$SERVER" alpha_2 &&
> +	git -C "$SERVER" checkout --orphan beta &&
> +	test_commit -C "$SERVER" beta_1 &&
> +	test_commit -C "$SERVER" beta_2 &&
> +
> +	git clone "$URL" client &&
> +
> +	if [ "$USE_PROTOCOL_V2" -eq 1 ]
> +	then
> +		git -C "$SERVER" config protocol.version 2
> +		git -C client config protocol.version 2
> +	fi &&
> +
> +	test_commit -C "$SERVER" beta_s &&
> +	git -C "$SERVER" checkout master &&
> +	test_commit -C "$SERVER" alpha_s &&
> +	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2 &&
> +
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
> +		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
> +		origin alpha_s beta_s &&
> +
> +	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
> +	ALPHA_1=$(git -C client rev-parse alpha_1) &&
> +	grep "fetch> have $ALPHA_1" trace &&
> +	BETA_1=$(git -C client rev-parse beta_1) &&
> +	grep "fetch> have $BETA_1" trace &&
> +	ALPHA_2=$(git -C client rev-parse alpha_2) &&
> +	! grep "fetch> have $ALPHA_2" trace &&
> +	BETA_2=$(git -C client rev-parse beta_2) &&
> +	! grep "fetch> have $BETA_2" trace
> +}
> +
> +test_expect_success '--negotiator-tip limits "have" lines sent' '
> +	negotiator_tip server server 0
> +'
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' '
> +	negotiator_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
> +		"$HTTPD_URL/smart/server" 1
> +'
> +
> +stop_httpd
> +
>  test_done
> diff --git a/transport-helper.c b/transport-helper.c
> index 1f8ff7e94..ad8f7c772 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
>  			transport, "filter",
>  			data->transport_options.filter_options.filter_spec);
>  
> +	if (data->transport_options.negotiation_tips)
> +		warning("Ignoring --negotiation-tip because the protocol does not support it.");
> +
>  	if (data->fetch)
>  		return fetch_with_fetch(transport, nr_heads, to_fetch);
>  
> diff --git a/transport.c b/transport.c
> index a32da30de..9f10f8ad9 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	args.filter_options = data->options.filter_options;
>  	args.stateless_rpc = transport->stateless_rpc;
>  	args.server_options = transport->server_options;
> +	args.negotiation_tips = data->options.negotiation_tips;
>  
>  	if (!data->got_remote_heads)
>  		refs_tmp = get_refs_via_connect(transport, 0, NULL);
> diff --git a/transport.h b/transport.h
> index 7792b0858..d31be5be6 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -25,6 +25,16 @@ struct git_transport_options {
>  	const char *receivepack;
>  	struct push_cas_option *cas;
>  	struct list_objects_filter_options filter_options;
> +
> +	/*
> +	 * This is only used during fetch. See the documentation of
> +	 * negotiation_tips in struct fetch_pack_args.
> +	 *
> +	 * This field is only supported by transports that support connect or
> +	 * stateless_connect. Set this field directly instead of using
> +	 * transport_set_option().
> +	 */
> +	struct oid_array *negotiation_tips;
>  };
>  
>  enum transport_family {
> -- 
> 2.18.0.rc2.347.g0da03f3a46.dirty
> 

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH] fetch-pack: support negotiation tip whitelist
  2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
  2018-06-26 17:53 ` Jonathan Nieder
@ 2018-06-26 18:59 ` Junio C Hamano
  2018-06-27 18:28 ` [PATCH v2] " Jonathan Tan
                   ` (2 subsequent siblings)
  4 siblings, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2018-06-26 18:59 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

Jonathan Tan <jonathantanmy@google.com> writes:

> During negotiation, fetch-pack eventually reports as "have" lines all
> commits reachable from all refs. Allow the user to restrict the commits
> sent in this way by providing a whitelist of tips; only the tips
> themselves and their ancestors will be sent.
>
> This feature is only supported for protocols that support connect or
> stateless-connect (such as HTTP with protocol v2).
>
> This will speed up negotiation when the repository has multiple
> relatively independent branches (for example, when a repository
> interacts with multiple repositories, such as with linux-next [1] and
> torvalds/linux [2]), and the user knows which local branch is likely to
> have commits in common with the upstream branch they are fetching.
>
> [1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
> [2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> This is based on jt/fetch-pack-negotiator, but if that branch is
> undesirable for whatever reason, I can port this to master.
> ---
>  builtin/fetch.c    | 21 ++++++++++++++++++
>  fetch-pack.c       | 19 ++++++++++++++--
>  fetch-pack.h       |  7 ++++++
>  t/t5510-fetch.sh   | 55 ++++++++++++++++++++++++++++++++++++++++++++++
>  transport-helper.c |  3 +++
>  transport.c        |  1 +
>  transport.h        | 10 +++++++++
>  7 files changed, 114 insertions(+), 2 deletions(-)

What's the plan to expose this "feature" to end-users?  There is no
end-user facing documentation added by this patch, and in-code
comments only talk about what (mechanical) effect the option has,
but not when a user may want to use the feature, or how the user
would best decide the set of commits to pass to this new option.

Would something like this

    git fetch $(git for-each-ref \
	--format=--nego-tip="%(objectname)" \
	refs/remotes/linux-next/) \
	linux-next

be an expected typical way to pull from one remote, exposing only
the tips of refs we got from that remote and not the ones we
obtained from other places?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v2] fetch-pack: support negotiation tip whitelist
  2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
  2018-06-26 17:53 ` Jonathan Nieder
  2018-06-26 18:59 ` Junio C Hamano
@ 2018-06-27 18:28 ` Jonathan Tan
  2018-06-28 15:56   ` Brandon Williams
  2018-06-28 22:15 ` [PATCH v3] " Jonathan Tan
  2018-07-02 22:39 ` [PATCH v4] " Jonathan Tan
  4 siblings, 1 reply; 14+ messages in thread
From: Jonathan Tan @ 2018-06-27 18:28 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, jrnieder, gitster

During negotiation, fetch-pack eventually reports as "have" lines all
commits reachable from all refs. Allow the user to restrict the commits
sent in this way by providing a whitelist of tips; only the tips
themselves and their ancestors will be sent.

This feature is only supported for protocols that support connect or
stateless-connect (such as HTTP with protocol v2).

This will speed up negotiation when the repository has multiple
relatively independent branches (for example, when a repository
interacts with multiple repositories, such as with linux-next [1] and
torvalds/linux [2]), and the user knows which local branch is likely to
have commits in common with the upstream branch they are fetching.

[1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
[2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
v2 is exactly the same as the original, except with user-facing
documentation in Documentation/fetch-options.txt.

> What's the plan to expose this "feature" to end-users?  There is no
> end-user facing documentation added by this patch, and in-code
> comments only talk about what (mechanical) effect the option has,
> but not when a user may want to use the feature, or how the user
> would best decide the set of commits to pass to this new option.

Jonathan Nieder also mentioned this. Lack of documentation was an
oversight, sorry. I've added it in this version.

> Would something like this
>
>     git fetch $(git for-each-ref \
> 	--format=--nego-tip="%(objectname)" \
> 	refs/remotes/linux-next/) \
> 	linux-next
>
> be an expected typical way to pull from one remote, exposing only
> the tips of refs we got from that remote and not the ones we
> obtained from other places?

Yes, that is one way. Alternatively, if the user is only fetching one
branch, they may also want to specify a single branch.
---
 Documentation/fetch-options.txt | 12 +++++++
 builtin/fetch.c                 | 21 +++++++++++++
 fetch-pack.c                    | 19 ++++++++++--
 fetch-pack.h                    |  7 +++++
 t/t5510-fetch.sh                | 55 +++++++++++++++++++++++++++++++++
 transport-helper.c              |  3 ++
 transport.c                     |  1 +
 transport.h                     | 10 ++++++
 8 files changed, 126 insertions(+), 2 deletions(-)

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 97d3217df9..80c4c94595 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -42,6 +42,18 @@ the current repository has the same history as the source repository.
 	.git/shallow. This option updates .git/shallow and accept such
 	refs.
 
+--negotiation-tip::
+	By default, Git will report, to the server, commits reachable
+	from all local refs to find common commits in an attempt to
+	reduce the size of the to-be-received packfile. If specified,
+	Git will only report commits reachable from the given commit.
+	This is useful to speed up fetches when the user knows which
+	local ref is likely to have commits in common with the
+	upstream ref being fetched.
++
+This option may be specified more than once; if so, Git will report
+commits reachable from any of the given commits.
+
 ifndef::git-pull[]
 --dry-run::
 	Show what would be done, without making any changes.
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669ad..12daec0f3b 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -63,6 +63,7 @@ static int shown_url = 0;
 static struct refspec refmap = REFSPEC_INIT_FETCH;
 static struct list_objects_filter_options filter_options;
 static struct string_list server_options = STRING_LIST_INIT_DUP;
+static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
 			TRANSPORT_FAMILY_IPV4),
 	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
 			TRANSPORT_FAMILY_IPV6),
+	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
+			N_("report that we have only objects reachable from this object")),
 	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
 	OPT_END()
 };
@@ -1075,6 +1078,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 			   filter_options.filter_spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
 	}
+	if (negotiation_tip.nr) {
+		struct oid_array *oids;
+		if (transport->smart_options) {
+			int i;
+			oids = xcalloc(1, sizeof(*oids));
+			for (i = 0; i < negotiation_tip.nr; i++) {
+				struct object_id oid;
+				if (get_oid(negotiation_tip.items[i].string,
+					    &oid))
+					die("%s is not a valid object",
+					    negotiation_tip.items[i].string);
+				oid_array_append(oids, &oid);
+			}
+			transport->smart_options->negotiation_tips = oids;
+		} else {
+			warning("Ignoring --negotiation-tip because the protocol does not support it.");
+		}
+	}
 	return transport;
 }
 
diff --git a/fetch-pack.c b/fetch-pack.c
index ba12085c4a..c66bd49bd1 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -213,6 +213,21 @@ static int next_flush(int stateless_rpc, int count)
 	return count;
 }
 
+static void mark_tips(struct fetch_negotiator *negotiator,
+		      const struct oid_array *negotiation_tips)
+{
+	int i;
+	if (!negotiation_tips) {
+		for_each_ref(rev_list_insert_ref_oid, negotiator);
+		return;
+	}
+
+	for (i = 0; i < negotiation_tips->nr; i++)
+		rev_list_insert_ref(negotiator, NULL,
+				    &negotiation_tips->oid[i]);
+	return;
+}
+
 static int find_common(struct fetch_negotiator *negotiator,
 		       struct fetch_pack_args *args,
 		       int fd[2], struct object_id *result_oid,
@@ -230,7 +245,7 @@ static int find_common(struct fetch_negotiator *negotiator,
 	if (args->stateless_rpc && multi_ack == 1)
 		die(_("--stateless-rpc requires multi_ack_detailed"));
 
-	for_each_ref(rev_list_insert_ref_oid, negotiator);
+	mark_tips(negotiator, args->negotiation_tips);
 	for_each_cached_alternate(negotiator, insert_one_alternate_object);
 
 	fetching = 0;
@@ -1295,7 +1310,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			else
 				state = FETCH_SEND_REQUEST;
 
-			for_each_ref(rev_list_insert_ref_oid, &negotiator);
+			mark_tips(&negotiator, args->negotiation_tips);
 			for_each_cached_alternate(&negotiator,
 						  insert_one_alternate_object);
 			break;
diff --git a/fetch-pack.h b/fetch-pack.h
index bb45a366a8..1859ee9275 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,13 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+
+	/*
+	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
+	 * lines only with these tips and their ancestors.
+	 */
+	const struct oid_array *negotiation_tips;
+
 	unsigned deepen_relative:1;
 	unsigned quiet:1;
 	unsigned keep_pack:1;
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index e402aee6a2..ea1b5e53c1 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -865,4 +865,59 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
 	test_cmp expect actual
 '
 
+negotiator_tip () {
+	SERVER="$1"
+	URL="$2"
+	USE_PROTOCOL_V2="$3"
+
+	rm -rf "$SERVER" client &&
+	git init "$SERVER" &&
+	test_commit -C "$SERVER" alpha_1 &&
+	test_commit -C "$SERVER" alpha_2 &&
+	git -C "$SERVER" checkout --orphan beta &&
+	test_commit -C "$SERVER" beta_1 &&
+	test_commit -C "$SERVER" beta_2 &&
+
+	git clone "$URL" client &&
+
+	if [ "$USE_PROTOCOL_V2" -eq 1 ]
+	then
+		git -C "$SERVER" config protocol.version 2
+		git -C client config protocol.version 2
+	fi &&
+
+	test_commit -C "$SERVER" beta_s &&
+	git -C "$SERVER" checkout master &&
+	test_commit -C "$SERVER" alpha_s &&
+	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2 &&
+
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+
+	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
+	ALPHA_1=$(git -C client rev-parse alpha_1) &&
+	grep "fetch> have $ALPHA_1" trace &&
+	BETA_1=$(git -C client rev-parse beta_1) &&
+	grep "fetch> have $BETA_1" trace &&
+	ALPHA_2=$(git -C client rev-parse alpha_2) &&
+	! grep "fetch> have $ALPHA_2" trace &&
+	BETA_2=$(git -C client rev-parse beta_2) &&
+	! grep "fetch> have $BETA_2" trace
+}
+
+test_expect_success '--negotiator-tip limits "have" lines sent' '
+	negotiator_tip server server 0
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' '
+	negotiator_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
+		"$HTTPD_URL/smart/server" 1
+'
+
+stop_httpd
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index 1f8ff7e942..ad8f7c7726 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
 			transport, "filter",
 			data->transport_options.filter_options.filter_spec);
 
+	if (data->transport_options.negotiation_tips)
+		warning("Ignoring --negotiation-tip because the protocol does not support it.");
+
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
diff --git a/transport.c b/transport.c
index a32da30dee..9f10f8ad9f 100644
--- a/transport.c
+++ b/transport.c
@@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.filter_options = data->options.filter_options;
 	args.stateless_rpc = transport->stateless_rpc;
 	args.server_options = transport->server_options;
+	args.negotiation_tips = data->options.negotiation_tips;
 
 	if (!data->got_remote_heads)
 		refs_tmp = get_refs_via_connect(transport, 0, NULL);
diff --git a/transport.h b/transport.h
index 7792b08582..d31be5be63 100644
--- a/transport.h
+++ b/transport.h
@@ -25,6 +25,16 @@ struct git_transport_options {
 	const char *receivepack;
 	struct push_cas_option *cas;
 	struct list_objects_filter_options filter_options;
+
+	/*
+	 * This is only used during fetch. See the documentation of
+	 * negotiation_tips in struct fetch_pack_args.
+	 *
+	 * This field is only supported by transports that support connect or
+	 * stateless_connect. Set this field directly instead of using
+	 * transport_set_option().
+	 */
+	struct oid_array *negotiation_tips;
 };
 
 enum transport_family {
-- 
2.18.0.rc2.346.g013aa6912e-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] fetch-pack: support negotiation tip whitelist
  2018-06-27 18:28 ` [PATCH v2] " Jonathan Tan
@ 2018-06-28 15:56   ` Brandon Williams
  2018-06-28 16:12     ` Jonathan Tan
  0 siblings, 1 reply; 14+ messages in thread
From: Brandon Williams @ 2018-06-28 15:56 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, jrnieder, gitster

On 06/27, Jonathan Tan wrote:
> During negotiation, fetch-pack eventually reports as "have" lines all
> commits reachable from all refs. Allow the user to restrict the commits
> sent in this way by providing a whitelist of tips; only the tips
> themselves and their ancestors will be sent.
> 
> This feature is only supported for protocols that support connect or
> stateless-connect (such as HTTP with protocol v2).
> 
> This will speed up negotiation when the repository has multiple
> relatively independent branches (for example, when a repository
> interacts with multiple repositories, such as with linux-next [1] and
> torvalds/linux [2]), and the user knows which local branch is likely to
> have commits in common with the upstream branch they are fetching.
> 
> [1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
> [2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> v2 is exactly the same as the original, except with user-facing
> documentation in Documentation/fetch-options.txt.
> 
> > What's the plan to expose this "feature" to end-users?  There is no
> > end-user facing documentation added by this patch, and in-code
> > comments only talk about what (mechanical) effect the option has,
> > but not when a user may want to use the feature, or how the user
> > would best decide the set of commits to pass to this new option.
> 
> Jonathan Nieder also mentioned this. Lack of documentation was an
> oversight, sorry. I've added it in this version.
> 
> > Would something like this
> >
> >     git fetch $(git for-each-ref \
> > 	--format=--nego-tip="%(objectname)" \
> > 	refs/remotes/linux-next/) \
> > 	linux-next
> >
> > be an expected typical way to pull from one remote, exposing only
> > the tips of refs we got from that remote and not the ones we
> > obtained from other places?
> 
> Yes, that is one way. Alternatively, if the user is only fetching one
> branch, they may also want to specify a single branch.
> ---
>  Documentation/fetch-options.txt | 12 +++++++
>  builtin/fetch.c                 | 21 +++++++++++++
>  fetch-pack.c                    | 19 ++++++++++--
>  fetch-pack.h                    |  7 +++++
>  t/t5510-fetch.sh                | 55 +++++++++++++++++++++++++++++++++
>  transport-helper.c              |  3 ++
>  transport.c                     |  1 +
>  transport.h                     | 10 ++++++
>  8 files changed, 126 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index 97d3217df9..80c4c94595 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -42,6 +42,18 @@ the current repository has the same history as the source repository.
>  	.git/shallow. This option updates .git/shallow and accept such
>  	refs.
>  
> +--negotiation-tip::
> +	By default, Git will report, to the server, commits reachable
> +	from all local refs to find common commits in an attempt to
> +	reduce the size of the to-be-received packfile. If specified,
> +	Git will only report commits reachable from the given commit.
> +	This is useful to speed up fetches when the user knows which
> +	local ref is likely to have commits in common with the
> +	upstream ref being fetched.

This seems like a pretty difficult to use feature, requiring that I
provide the actual OIDs.  I think a much better UI would probably be to
accept a number of different things ranging from exact OIDs to actual
ref names or even better, allowing for ref-patterns which include globs.
That way I can do the following:
  
  git fetch --negotiation-tip=refs/remotes/my-remote/* my-remote

in order to easily limit the tips to all the refs I have from that
particular remote.

> ++
> +This option may be specified more than once; if so, Git will report
> +commits reachable from any of the given commits.
> +
>  ifndef::git-pull[]
>  --dry-run::
>  	Show what would be done, without making any changes.
> diff --git a/builtin/fetch.c b/builtin/fetch.c
> index ea5b9669ad..12daec0f3b 100644
> --- a/builtin/fetch.c
> +++ b/builtin/fetch.c
> @@ -63,6 +63,7 @@ static int shown_url = 0;
>  static struct refspec refmap = REFSPEC_INIT_FETCH;
>  static struct list_objects_filter_options filter_options;
>  static struct string_list server_options = STRING_LIST_INIT_DUP;
> +static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
>  
>  static int git_fetch_config(const char *k, const char *v, void *cb)
>  {
> @@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
>  			TRANSPORT_FAMILY_IPV4),
>  	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
>  			TRANSPORT_FAMILY_IPV6),
> +	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
> +			N_("report that we have only objects reachable from this object")),
>  	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
>  	OPT_END()
>  };
> @@ -1075,6 +1078,24 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
>  			   filter_options.filter_spec);
>  		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
>  	}
> +	if (negotiation_tip.nr) {
> +		struct oid_array *oids;
> +		if (transport->smart_options) {
> +			int i;
> +			oids = xcalloc(1, sizeof(*oids));
> +			for (i = 0; i < negotiation_tip.nr; i++) {
> +				struct object_id oid;
> +				if (get_oid(negotiation_tip.items[i].string,
> +					    &oid))
> +					die("%s is not a valid object",
> +					    negotiation_tip.items[i].string);
> +				oid_array_append(oids, &oid);
> +			}
> +			transport->smart_options->negotiation_tips = oids;
> +		} else {
> +			warning("Ignoring --negotiation-tip because the protocol does not support it.");
> +		}
> +	}
>  	return transport;
>  }
>  
> diff --git a/fetch-pack.c b/fetch-pack.c
> index ba12085c4a..c66bd49bd1 100644
> --- a/fetch-pack.c
> +++ b/fetch-pack.c
> @@ -213,6 +213,21 @@ static int next_flush(int stateless_rpc, int count)
>  	return count;
>  }
>  
> +static void mark_tips(struct fetch_negotiator *negotiator,
> +		      const struct oid_array *negotiation_tips)
> +{
> +	int i;
> +	if (!negotiation_tips) {
> +		for_each_ref(rev_list_insert_ref_oid, negotiator);
> +		return;
> +	}
> +
> +	for (i = 0; i < negotiation_tips->nr; i++)
> +		rev_list_insert_ref(negotiator, NULL,
> +				    &negotiation_tips->oid[i]);
> +	return;
> +}
> +
>  static int find_common(struct fetch_negotiator *negotiator,
>  		       struct fetch_pack_args *args,
>  		       int fd[2], struct object_id *result_oid,
> @@ -230,7 +245,7 @@ static int find_common(struct fetch_negotiator *negotiator,
>  	if (args->stateless_rpc && multi_ack == 1)
>  		die(_("--stateless-rpc requires multi_ack_detailed"));
>  
> -	for_each_ref(rev_list_insert_ref_oid, negotiator);
> +	mark_tips(negotiator, args->negotiation_tips);
>  	for_each_cached_alternate(negotiator, insert_one_alternate_object);
>  
>  	fetching = 0;
> @@ -1295,7 +1310,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
>  			else
>  				state = FETCH_SEND_REQUEST;
>  
> -			for_each_ref(rev_list_insert_ref_oid, &negotiator);
> +			mark_tips(&negotiator, args->negotiation_tips);
>  			for_each_cached_alternate(&negotiator,
>  						  insert_one_alternate_object);
>  			break;
> diff --git a/fetch-pack.h b/fetch-pack.h
> index bb45a366a8..1859ee9275 100644
> --- a/fetch-pack.h
> +++ b/fetch-pack.h
> @@ -16,6 +16,13 @@ struct fetch_pack_args {
>  	const struct string_list *deepen_not;
>  	struct list_objects_filter_options filter_options;
>  	const struct string_list *server_options;
> +
> +	/*
> +	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
> +	 * lines only with these tips and their ancestors.
> +	 */
> +	const struct oid_array *negotiation_tips;
> +
>  	unsigned deepen_relative:1;
>  	unsigned quiet:1;
>  	unsigned keep_pack:1;
> diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
> index e402aee6a2..ea1b5e53c1 100755
> --- a/t/t5510-fetch.sh
> +++ b/t/t5510-fetch.sh
> @@ -865,4 +865,59 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
>  	test_cmp expect actual
>  '
>  
> +negotiator_tip () {
> +	SERVER="$1"
> +	URL="$2"
> +	USE_PROTOCOL_V2="$3"
> +
> +	rm -rf "$SERVER" client &&
> +	git init "$SERVER" &&
> +	test_commit -C "$SERVER" alpha_1 &&
> +	test_commit -C "$SERVER" alpha_2 &&
> +	git -C "$SERVER" checkout --orphan beta &&
> +	test_commit -C "$SERVER" beta_1 &&
> +	test_commit -C "$SERVER" beta_2 &&
> +
> +	git clone "$URL" client &&
> +
> +	if [ "$USE_PROTOCOL_V2" -eq 1 ]
> +	then
> +		git -C "$SERVER" config protocol.version 2
> +		git -C client config protocol.version 2
> +	fi &&
> +
> +	test_commit -C "$SERVER" beta_s &&
> +	git -C "$SERVER" checkout master &&
> +	test_commit -C "$SERVER" alpha_s &&
> +	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2 &&
> +
> +	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
> +		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
> +		origin alpha_s beta_s &&
> +
> +	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
> +	ALPHA_1=$(git -C client rev-parse alpha_1) &&
> +	grep "fetch> have $ALPHA_1" trace &&
> +	BETA_1=$(git -C client rev-parse beta_1) &&
> +	grep "fetch> have $BETA_1" trace &&
> +	ALPHA_2=$(git -C client rev-parse alpha_2) &&
> +	! grep "fetch> have $ALPHA_2" trace &&
> +	BETA_2=$(git -C client rev-parse beta_2) &&
> +	! grep "fetch> have $BETA_2" trace
> +}
> +
> +test_expect_success '--negotiator-tip limits "have" lines sent' '
> +	negotiator_tip server server 0
> +'
> +
> +. "$TEST_DIRECTORY"/lib-httpd.sh
> +start_httpd
> +
> +test_expect_success 'fetch with --filter=blob:limit=0 and HTTP' '
> +	negotiator_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
> +		"$HTTPD_URL/smart/server" 1
> +'
> +
> +stop_httpd
> +
>  test_done
> diff --git a/transport-helper.c b/transport-helper.c
> index 1f8ff7e942..ad8f7c7726 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
>  			transport, "filter",
>  			data->transport_options.filter_options.filter_spec);
>  
> +	if (data->transport_options.negotiation_tips)
> +		warning("Ignoring --negotiation-tip because the protocol does not support it.");
> +
>  	if (data->fetch)
>  		return fetch_with_fetch(transport, nr_heads, to_fetch);
>  
> diff --git a/transport.c b/transport.c
> index a32da30dee..9f10f8ad9f 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
>  	args.filter_options = data->options.filter_options;
>  	args.stateless_rpc = transport->stateless_rpc;
>  	args.server_options = transport->server_options;
> +	args.negotiation_tips = data->options.negotiation_tips;
>  
>  	if (!data->got_remote_heads)
>  		refs_tmp = get_refs_via_connect(transport, 0, NULL);
> diff --git a/transport.h b/transport.h
> index 7792b08582..d31be5be63 100644
> --- a/transport.h
> +++ b/transport.h
> @@ -25,6 +25,16 @@ struct git_transport_options {
>  	const char *receivepack;
>  	struct push_cas_option *cas;
>  	struct list_objects_filter_options filter_options;
> +
> +	/*
> +	 * This is only used during fetch. See the documentation of
> +	 * negotiation_tips in struct fetch_pack_args.
> +	 *
> +	 * This field is only supported by transports that support connect or
> +	 * stateless_connect. Set this field directly instead of using
> +	 * transport_set_option().
> +	 */
> +	struct oid_array *negotiation_tips;
>  };
>  
>  enum transport_family {
> -- 
> 2.18.0.rc2.346.g013aa6912e-goog
> 

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] fetch-pack: support negotiation tip whitelist
  2018-06-28 15:56   ` Brandon Williams
@ 2018-06-28 16:12     ` Jonathan Tan
  2018-06-28 16:16       ` Brandon Williams
  0 siblings, 1 reply; 14+ messages in thread
From: Jonathan Tan @ 2018-06-28 16:12 UTC (permalink / raw)
  To: bmwill; +Cc: jonathantanmy, git, jrnieder, gitster

> This seems like a pretty difficult to use feature, requiring that I
> provide the actual OIDs.  I think a much better UI would probably be to
> accept a number of different things ranging from exact OIDs to actual
> ref names or even better, allowing for ref-patterns which include globs.
> That way I can do the following:
>   
>   git fetch --negotiation-tip=refs/remotes/my-remote/* my-remote
> 
> in order to easily limit the tips to all the refs I have from that
> particular remote.

Actual ref names are already supported - it uses the same DWIM as
others. As for globs, I thought of supporting both DWIM objects and the
LHS of refspecs, but (1) it might be strange to support master and
refs/heads/* but not heads/*, and (2) I can't think of anywhere in Git
where you can provide either one - it's either SHA-1 and DWIM name, or
SHA-1 and refspec, but not all three.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v2] fetch-pack: support negotiation tip whitelist
  2018-06-28 16:12     ` Jonathan Tan
@ 2018-06-28 16:16       ` Brandon Williams
  0 siblings, 0 replies; 14+ messages in thread
From: Brandon Williams @ 2018-06-28 16:16 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, jrnieder, gitster

On 06/28, Jonathan Tan wrote:
> > This seems like a pretty difficult to use feature, requiring that I
> > provide the actual OIDs.  I think a much better UI would probably be to
> > accept a number of different things ranging from exact OIDs to actual
> > ref names or even better, allowing for ref-patterns which include globs.
> > That way I can do the following:
> >   
> >   git fetch --negotiation-tip=refs/remotes/my-remote/* my-remote
> > 
> > in order to easily limit the tips to all the refs I have from that
> > particular remote.
> 
> Actual ref names are already supported - it uses the same DWIM as
> others. As for globs, I thought of supporting both DWIM objects and the
> LHS of refspecs, but (1) it might be strange to support master and
> refs/heads/* but not heads/*,

I don't think that would be strange at all, and no where in git do we
handle heads/* but we do already handle refs/heads/* as well as DWIM
master.


> and (2) I can't think of anywhere in Git
> where you can provide either one - it's either SHA-1 and DWIM name, or
> SHA-1 and refspec, but not all three.

fetch is a perfect example of supporting all three.  I can do

  git fetch origin SHA1
  git fetch origin master
  git fetch origin refs/heads/*:refs/heads/*

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v3] fetch-pack: support negotiation tip whitelist
  2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
                   ` (2 preceding siblings ...)
  2018-06-27 18:28 ` [PATCH v2] " Jonathan Tan
@ 2018-06-28 22:15 ` Jonathan Tan
  2018-06-28 22:20   ` Brandon Williams
  2018-06-29 16:28   ` Junio C Hamano
  2018-07-02 22:39 ` [PATCH v4] " Jonathan Tan
  4 siblings, 2 replies; 14+ messages in thread
From: Jonathan Tan @ 2018-06-28 22:15 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, bmwill

During negotiation, fetch-pack eventually reports as "have" lines all
commits reachable from all refs. Allow the user to restrict the commits
sent in this way by providing a whitelist of tips; only the tips
themselves and their ancestors will be sent.

Both globs and single objects are supported.

This feature is only supported for protocols that support connect or
stateless-connect (such as HTTP with protocol v2).

This will speed up negotiation when the repository has multiple
relatively independent branches (for example, when a repository
interacts with multiple repositories, such as with linux-next [1] and
torvalds/linux [2]), and the user knows which local branch is likely to
have commits in common with the upstream branch they are fetching.

[1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
[2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
This is on jt/fetch-pack-negotiator.

> I don't think that would be strange at all, and no where in git do we
> handle heads/* but we do already handle refs/heads/* as well as DWIM
> master.
>
> > and (2) I can't think of anywhere in Git
> > where you can provide either one - it's either SHA-1 and DWIM name, or
> > SHA-1 and refspec, but not all three.
>
> fetch is a perfect example of supporting all three.  I can do
>
>   git fetch origin SHA1
>   git fetch origin master
>   git fetch origin refs/heads/*:refs/heads/*

OK, Brandon managed to convince me that this is fine. I've included glob
support, supporting the same globs that git notes supports.
---
 Documentation/fetch-options.txt | 16 +++++++
 builtin/fetch.c                 | 41 +++++++++++++++++
 fetch-pack.c                    | 19 +++++++-
 fetch-pack.h                    |  7 +++
 t/t5510-fetch.sh                | 78 +++++++++++++++++++++++++++++++++
 transport-helper.c              |  3 ++
 transport.c                     |  1 +
 transport.h                     | 10 +++++
 8 files changed, 173 insertions(+), 2 deletions(-)

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 97d3217df..6e4db1738 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -42,6 +42,22 @@ the current repository has the same history as the source repository.
 	.git/shallow. This option updates .git/shallow and accept such
 	refs.
 
+--negotiation-tip=<commit|glob>::
+	By default, Git will report, to the server, commits reachable
+	from all local refs to find common commits in an attempt to
+	reduce the size of the to-be-received packfile. If specified,
+	Git will only report commits reachable from the given tips.
+	This is useful to speed up fetches when the user knows which
+	local ref is likely to have commits in common with the
+	upstream ref being fetched.
++
+This option may be specified more than once; if so, Git will report
+commits reachable from any of the given commits.
++
+The argument to this option may be a glob on ref names, a ref, or the (possibly
+abbreviated SHA-1 of a commit. Specifying a glob is equivalent to specifying
+this option multiple times, one for each matching ref name.
+
 ifndef::git-pull[]
 --dry-run::
 	Show what would be done, without making any changes.
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..1a7ef305d 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -63,6 +63,7 @@ static int shown_url = 0;
 static struct refspec refmap = REFSPEC_INIT_FETCH;
 static struct list_objects_filter_options filter_options;
 static struct string_list server_options = STRING_LIST_INIT_DUP;
+static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
 			TRANSPORT_FAMILY_IPV4),
 	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
 			TRANSPORT_FAMILY_IPV6),
+	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
+			N_("report that we have only objects reachable from this object")),
 	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
 	OPT_END()
 };
@@ -1049,6 +1052,38 @@ static void set_option(struct transport *transport, const char *name, const char
 			name, transport->url);
 }
 
+
+static int add_oid(const char *refname, const struct object_id *oid, int flags,
+		   void *cb_data)
+{
+	struct oid_array *oids = cb_data;
+	oid_array_append(oids, oid);
+	return 0;
+}
+
+static void add_negotiation_tips(struct git_transport_options *smart_options)
+{
+	struct oid_array *oids = xcalloc(1, sizeof(*oids));
+	int i;
+	for (i = 0; i < negotiation_tip.nr; i++) {
+		const char *s = negotiation_tip.items[i].string;
+		int old_nr;
+		if (!has_glob_specials(s)) {
+			struct object_id oid;
+			if (get_oid(s, &oid))
+				die("%s is not a valid object", s);
+			oid_array_append(oids, &oid);
+			continue;
+		}
+		old_nr = oids->nr;
+		for_each_glob_ref(add_oid, s, oids);
+		if (old_nr == oids->nr)
+			warning("Ignoring --negotiation-tip=%s because it does not match any refs",
+				s);
+	}
+	smart_options->negotiation_tips = oids;
+}
+
 static struct transport *prepare_transport(struct remote *remote, int deepen)
 {
 	struct transport *transport;
@@ -1075,6 +1110,12 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 			   filter_options.filter_spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
 	}
+	if (negotiation_tip.nr) {
+		if (transport->smart_options)
+			add_negotiation_tips(transport->smart_options);
+		else
+			warning("Ignoring --negotiation-tip because the protocol does not support it.");
+	}
 	return transport;
 }
 
diff --git a/fetch-pack.c b/fetch-pack.c
index ba12085c4..c66bd49bd 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -213,6 +213,21 @@ static int next_flush(int stateless_rpc, int count)
 	return count;
 }
 
+static void mark_tips(struct fetch_negotiator *negotiator,
+		      const struct oid_array *negotiation_tips)
+{
+	int i;
+	if (!negotiation_tips) {
+		for_each_ref(rev_list_insert_ref_oid, negotiator);
+		return;
+	}
+
+	for (i = 0; i < negotiation_tips->nr; i++)
+		rev_list_insert_ref(negotiator, NULL,
+				    &negotiation_tips->oid[i]);
+	return;
+}
+
 static int find_common(struct fetch_negotiator *negotiator,
 		       struct fetch_pack_args *args,
 		       int fd[2], struct object_id *result_oid,
@@ -230,7 +245,7 @@ static int find_common(struct fetch_negotiator *negotiator,
 	if (args->stateless_rpc && multi_ack == 1)
 		die(_("--stateless-rpc requires multi_ack_detailed"));
 
-	for_each_ref(rev_list_insert_ref_oid, negotiator);
+	mark_tips(negotiator, args->negotiation_tips);
 	for_each_cached_alternate(negotiator, insert_one_alternate_object);
 
 	fetching = 0;
@@ -1295,7 +1310,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			else
 				state = FETCH_SEND_REQUEST;
 
-			for_each_ref(rev_list_insert_ref_oid, &negotiator);
+			mark_tips(&negotiator, args->negotiation_tips);
 			for_each_cached_alternate(&negotiator,
 						  insert_one_alternate_object);
 			break;
diff --git a/fetch-pack.h b/fetch-pack.h
index bb45a366a..1859ee927 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,13 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+
+	/*
+	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
+	 * lines only with these tips and their ancestors.
+	 */
+	const struct oid_array *negotiation_tips;
+
 	unsigned deepen_relative:1;
 	unsigned quiet:1;
 	unsigned keep_pack:1;
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index e402aee6a..8532a6faf 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -865,4 +865,82 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
 	test_cmp expect actual
 '
 
+setup_negotiation_tip () {
+	SERVER="$1"
+	URL="$2"
+	USE_PROTOCOL_V2="$3"
+
+	rm -rf "$SERVER" client trace &&
+	git init "$SERVER" &&
+	test_commit -C "$SERVER" alpha_1 &&
+	test_commit -C "$SERVER" alpha_2 &&
+	git -C "$SERVER" checkout --orphan beta &&
+	test_commit -C "$SERVER" beta_1 &&
+	test_commit -C "$SERVER" beta_2 &&
+
+	git clone "$URL" client &&
+
+	if [ "$USE_PROTOCOL_V2" -eq 1 ]
+	then
+		git -C "$SERVER" config protocol.version 2
+		git -C client config protocol.version 2
+	fi &&
+
+	test_commit -C "$SERVER" beta_s &&
+	git -C "$SERVER" checkout master &&
+	test_commit -C "$SERVER" alpha_s &&
+	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2
+}
+
+check_negotiation_tip () {
+	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
+	ALPHA_1=$(git -C client rev-parse alpha_1) &&
+	grep "fetch> have $ALPHA_1" trace &&
+	BETA_1=$(git -C client rev-parse beta_1) &&
+	grep "fetch> have $BETA_1" trace &&
+	ALPHA_2=$(git -C client rev-parse alpha_2) &&
+	! grep "fetch> have $ALPHA_2" trace &&
+	BETA_2=$(git -C client rev-parse beta_2) &&
+	! grep "fetch> have $BETA_2" trace
+}
+
+test_expect_success '--negotiation-tip limits "have" lines sent' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+test_expect_success '--negotiation-tip understands globs' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=*_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+test_expect_success '--negotiation-tip understands abbreviated SHA-1' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=$(git -C client rev-parse --short alpha_1) \
+		--negotiation-tip=$(git -C client rev-parse --short beta_1) \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success '--negotiation-tip limits "have" lines sent with HTTP protocol v2' '
+	setup_negotiation_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
+		"$HTTPD_URL/smart/server" 1 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+stop_httpd
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index 1f8ff7e94..ad8f7c772 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
 			transport, "filter",
 			data->transport_options.filter_options.filter_spec);
 
+	if (data->transport_options.negotiation_tips)
+		warning("Ignoring --negotiation-tip because the protocol does not support it.");
+
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
diff --git a/transport.c b/transport.c
index a32da30de..9f10f8ad9 100644
--- a/transport.c
+++ b/transport.c
@@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.filter_options = data->options.filter_options;
 	args.stateless_rpc = transport->stateless_rpc;
 	args.server_options = transport->server_options;
+	args.negotiation_tips = data->options.negotiation_tips;
 
 	if (!data->got_remote_heads)
 		refs_tmp = get_refs_via_connect(transport, 0, NULL);
diff --git a/transport.h b/transport.h
index 7792b0858..d31be5be6 100644
--- a/transport.h
+++ b/transport.h
@@ -25,6 +25,16 @@ struct git_transport_options {
 	const char *receivepack;
 	struct push_cas_option *cas;
 	struct list_objects_filter_options filter_options;
+
+	/*
+	 * This is only used during fetch. See the documentation of
+	 * negotiation_tips in struct fetch_pack_args.
+	 *
+	 * This field is only supported by transports that support connect or
+	 * stateless_connect. Set this field directly instead of using
+	 * transport_set_option().
+	 */
+	struct oid_array *negotiation_tips;
 };
 
 enum transport_family {
-- 
2.18.0.rc2.346.g013aa6912e-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] fetch-pack: support negotiation tip whitelist
  2018-06-28 22:15 ` [PATCH v3] " Jonathan Tan
@ 2018-06-28 22:20   ` Brandon Williams
  2018-06-29 16:28   ` Junio C Hamano
  1 sibling, 0 replies; 14+ messages in thread
From: Brandon Williams @ 2018-06-28 22:20 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git

On 06/28, Jonathan Tan wrote:
> During negotiation, fetch-pack eventually reports as "have" lines all
> commits reachable from all refs. Allow the user to restrict the commits
> sent in this way by providing a whitelist of tips; only the tips
> themselves and their ancestors will be sent.
> 
> Both globs and single objects are supported.
> 
> This feature is only supported for protocols that support connect or
> stateless-connect (such as HTTP with protocol v2).
> 
> This will speed up negotiation when the repository has multiple
> relatively independent branches (for example, when a repository
> interacts with multiple repositories, such as with linux-next [1] and
> torvalds/linux [2]), and the user knows which local branch is likely to
> have commits in common with the upstream branch they are fetching.
> 
> [1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
> [2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> This is on jt/fetch-pack-negotiator.
> 
> > I don't think that would be strange at all, and no where in git do we
> > handle heads/* but we do already handle refs/heads/* as well as DWIM
> > master.
> >
> > > and (2) I can't think of anywhere in Git
> > > where you can provide either one - it's either SHA-1 and DWIM name, or
> > > SHA-1 and refspec, but not all three.
> >
> > fetch is a perfect example of supporting all three.  I can do
> >
> >   git fetch origin SHA1
> >   git fetch origin master
> >   git fetch origin refs/heads/*:refs/heads/*
> 
> OK, Brandon managed to convince me that this is fine. I've included glob
> support, supporting the same globs that git notes supports.
> ---
>  Documentation/fetch-options.txt | 16 +++++++
>  builtin/fetch.c                 | 41 +++++++++++++++++
>  fetch-pack.c                    | 19 +++++++-
>  fetch-pack.h                    |  7 +++
>  t/t5510-fetch.sh                | 78 +++++++++++++++++++++++++++++++++
>  transport-helper.c              |  3 ++
>  transport.c                     |  1 +
>  transport.h                     | 10 +++++
>  8 files changed, 173 insertions(+), 2 deletions(-)
> 
> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index 97d3217df..6e4db1738 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -42,6 +42,22 @@ the current repository has the same history as the source repository.
>  	.git/shallow. This option updates .git/shallow and accept such
>  	refs.
>  
> +--negotiation-tip=<commit|glob>::
> +	By default, Git will report, to the server, commits reachable
> +	from all local refs to find common commits in an attempt to
> +	reduce the size of the to-be-received packfile. If specified,
> +	Git will only report commits reachable from the given tips.
> +	This is useful to speed up fetches when the user knows which
> +	local ref is likely to have commits in common with the
> +	upstream ref being fetched.
> ++
> +This option may be specified more than once; if so, Git will report
> +commits reachable from any of the given commits.
> ++
> +The argument to this option may be a glob on ref names, a ref, or the (possibly
> +abbreviated SHA-1 of a commit. Specifying a glob is equivalent to specifying
> +this option multiple times, one for each matching ref name.

I think you're missing a closing ')'

Aside from that nit this patch looks good, thanks!


-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v3] fetch-pack: support negotiation tip whitelist
  2018-06-28 22:15 ` [PATCH v3] " Jonathan Tan
  2018-06-28 22:20   ` Brandon Williams
@ 2018-06-29 16:28   ` Junio C Hamano
  1 sibling, 0 replies; 14+ messages in thread
From: Junio C Hamano @ 2018-06-29 16:28 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, bmwill

Jonathan Tan <jonathantanmy@google.com> writes:

>> fetch is a perfect example of supporting all three.  I can do
>>
>>   git fetch origin SHA1
>>   git fetch origin master
>>   git fetch origin refs/heads/*:refs/heads/*
>
> OK, Brandon managed to convince me that this is fine. I've included glob
> support, supporting the same globs that git notes supports.

"git notes"???

As this is to be used in the context of "git fetch", using glob
e.g. "refs/heads/*", is sensible and good enough.  I was actually
wondering if we want the head-match refs/heads/, but as "git fetch
origin refs/heads/" does not work that way, I think we shouldn't.

This is a tangent, but didn't ref-in-want wanted to use head-match
refs/heads/ to match everything under refs/heads/?  If the latest
incarnation wants to do so, we may want to fix that.

> diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
> index 97d3217df..6e4db1738 100644
> --- a/Documentation/fetch-options.txt
> +++ b/Documentation/fetch-options.txt
> @@ -42,6 +42,22 @@ the current repository has the same history as the source repository.
>  	.git/shallow. This option updates .git/shallow and accept such
>  	refs.
>  
> +--negotiation-tip=<commit|glob>::
> +	By default, Git will report, to the server, commits reachable
> +	from all local refs to find common commits in an attempt to
> +	reduce the size of the to-be-received packfile. If specified,
> +	Git will only report commits reachable from the given tips.
> +	This is useful to speed up fetches when the user knows which
> +	local ref is likely to have commits in common with the
> +	upstream ref being fetched.
> ++
> +This option may be specified more than once; if so, Git will report
> +commits reachable from any of the given commits.
> ++
> +The argument to this option may be a glob on ref names, a ref, or the (possibly
> +abbreviated SHA-1 of a commit. Specifying a glob is equivalent to specifying
> +this option multiple times, one for each matching ref name.
> +

> +static int add_oid(const char *refname, const struct object_id *oid, int flags,
> +		   void *cb_data)
> +{
> +	struct oid_array *oids = cb_data;
> +	oid_array_append(oids, oid);
> +	return 0;
> +}

This by itself is not worth a reason to reroll, but please make it a
habit to have a blank line after the run of decls before the first
statement, at least while we still forbid decl-after-stmt.  The
result is easier to read that way.

> +static void add_negotiation_tips(struct git_transport_options *smart_options)
> +{
> +	struct oid_array *oids = xcalloc(1, sizeof(*oids));
> +	int i;
> +	for (i = 0; i < negotiation_tip.nr; i++) {
> +		const char *s = negotiation_tip.items[i].string;
> +		int old_nr;
> +		if (!has_glob_specials(s)) {
> +			struct object_id oid;
> +			if (get_oid(s, &oid))
> +				die("%s is not a valid object", s);
> +			oid_array_append(oids, &oid);
> +			continue;
> +		}
> +		old_nr = oids->nr;
> +		for_each_glob_ref(add_oid, s, oids);
> +		if (old_nr == oids->nr)
> +			warning("Ignoring --negotiation-tip=%s because it does not match any refs",
> +				s);
> +	}
> +	smart_options->negotiation_tips = oids;
> +}

This may insert duplicate object ids if two refs point at the same
object, or nego globs match the same ref twice, but it is OK to have
duplicate object ids in the resulting oids array is OK, because
rev_list_insert_ref() at the end will dedup them anyway.

> diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
> index e402aee6a..8532a6faf 100755
> --- a/t/t5510-fetch.sh
> +++ b/t/t5510-fetch.sh
> @@ -865,4 +865,82 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
>  	test_cmp expect actual
>  '
>  
> +setup_negotiation_tip () {
> +	SERVER="$1"
> +	URL="$2"
> +	USE_PROTOCOL_V2="$3"
> +
> +	rm -rf "$SERVER" client trace &&
> +	git init "$SERVER" &&
> +	test_commit -C "$SERVER" alpha_1 &&
> +	test_commit -C "$SERVER" alpha_2 &&
> +	git -C "$SERVER" checkout --orphan beta &&
> +	test_commit -C "$SERVER" beta_1 &&
> +	test_commit -C "$SERVER" beta_2 &&
> +
> +	git clone "$URL" client &&
> +
> +	if [ "$USE_PROTOCOL_V2" -eq 1 ]

Style: "if test ..."

> +	then
> +		git -C "$SERVER" config protocol.version 2

broken &&-chaining?

> +		git -C client config protocol.version 2
> +	fi &&
> +
> +	test_commit -C "$SERVER" beta_s &&
> +	git -C "$SERVER" checkout master &&
> +	test_commit -C "$SERVER" alpha_s &&
> +	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2
> +}

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [PATCH v4] fetch-pack: support negotiation tip whitelist
  2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
                   ` (3 preceding siblings ...)
  2018-06-28 22:15 ` [PATCH v3] " Jonathan Tan
@ 2018-07-02 22:39 ` Jonathan Tan
  2018-07-22  9:09   ` Duy Nguyen
  2019-06-18 13:36   ` Ævar Arnfjörð Bjarmason
  4 siblings, 2 replies; 14+ messages in thread
From: Jonathan Tan @ 2018-07-02 22:39 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, bmwill, gitster

During negotiation, fetch-pack eventually reports as "have" lines all
commits reachable from all refs. Allow the user to restrict the commits
sent in this way by providing a whitelist of tips; only the tips
themselves and their ancestors will be sent.

Both globs and single objects are supported.

This feature is only supported for protocols that support connect or
stateless-connect (such as HTTP with protocol v2).

This will speed up negotiation when the repository has multiple
relatively independent branches (for example, when a repository
interacts with multiple repositories, such as with linux-next [1] and
torvalds/linux [2]), and the user knows which local branch is likely to
have commits in common with the upstream branch they are fetching.

[1] https://kernel.googlesource.com/pub/scm/linux/kernel/git/next/linux-next/
[2] https://kernel.googlesource.com/pub/scm/linux/kernel/git/torvalds/linux/

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
This is on jt/fetch-pack-negotiator.

I noticed that jt/fetch-nego-tip had a version queued with Brandon's
suggestion (of the closing parenthesis) included, but not any of Junio's
suggestions, so here is one with the suggestions from both.

> "git notes"???
>
> As this is to be used in the context of "git fetch", using glob
> e.g. "refs/heads/*", is sensible and good enough.  I was actually
> wondering if we want the head-match refs/heads/, but as "git fetch
> origin refs/heads/" does not work that way, I think we shouldn't.

OK - the above was not in the commit message, so I think we're fine. In
the commit message and in the user-facing documentation, I describe this
as a "glob".

> This is a tangent, but didn't ref-in-want wanted to use head-match
> refs/heads/ to match everything under refs/heads/?  If the latest
> incarnation wants to do so, we may want to fix that.

I don't recall anything like that - the user still specifies a refspec
(like existing users do), and in the protocol, all wanted refs are
matched by exact name.

> This by itself is not worth a reason to reroll, but please make it a
> habit to have a blank line after the run of decls before the first
> statement, at least while we still forbid decl-after-stmt.  The
> result is easier to read that way.

Thanks - I've done it for add_oid() and all the new functions introduced
in this patch.

> Style: "if test ..."

> broken &&-chaining?

Both done.
---
 Documentation/fetch-options.txt | 16 +++++++
 builtin/fetch.c                 | 43 ++++++++++++++++++
 fetch-pack.c                    | 20 ++++++++-
 fetch-pack.h                    |  7 +++
 t/t5510-fetch.sh                | 78 +++++++++++++++++++++++++++++++++
 transport-helper.c              |  3 ++
 transport.c                     |  1 +
 transport.h                     | 10 +++++
 8 files changed, 176 insertions(+), 2 deletions(-)

diff --git a/Documentation/fetch-options.txt b/Documentation/fetch-options.txt
index 97d3217df..2d09f87b4 100644
--- a/Documentation/fetch-options.txt
+++ b/Documentation/fetch-options.txt
@@ -42,6 +42,22 @@ the current repository has the same history as the source repository.
 	.git/shallow. This option updates .git/shallow and accept such
 	refs.
 
+--negotiation-tip=<commit|glob>::
+	By default, Git will report, to the server, commits reachable
+	from all local refs to find common commits in an attempt to
+	reduce the size of the to-be-received packfile. If specified,
+	Git will only report commits reachable from the given tips.
+	This is useful to speed up fetches when the user knows which
+	local ref is likely to have commits in common with the
+	upstream ref being fetched.
++
+This option may be specified more than once; if so, Git will report
+commits reachable from any of the given commits.
++
+The argument to this option may be a glob on ref names, a ref, or the (possibly
+abbreviated) SHA-1 of a commit. Specifying a glob is equivalent to specifying
+this option multiple times, one for each matching ref name.
+
 ifndef::git-pull[]
 --dry-run::
 	Show what would be done, without making any changes.
diff --git a/builtin/fetch.c b/builtin/fetch.c
index ea5b9669a..49ab6ac06 100644
--- a/builtin/fetch.c
+++ b/builtin/fetch.c
@@ -63,6 +63,7 @@ static int shown_url = 0;
 static struct refspec refmap = REFSPEC_INIT_FETCH;
 static struct list_objects_filter_options filter_options;
 static struct string_list server_options = STRING_LIST_INIT_DUP;
+static struct string_list negotiation_tip = STRING_LIST_INIT_NODUP;
 
 static int git_fetch_config(const char *k, const char *v, void *cb)
 {
@@ -174,6 +175,8 @@ static struct option builtin_fetch_options[] = {
 			TRANSPORT_FAMILY_IPV4),
 	OPT_SET_INT('6', "ipv6", &family, N_("use IPv6 addresses only"),
 			TRANSPORT_FAMILY_IPV6),
+	OPT_STRING_LIST(0, "negotiation-tip", &negotiation_tip, N_("revision"),
+			N_("report that we have only objects reachable from this object")),
 	OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options),
 	OPT_END()
 };
@@ -1049,6 +1052,40 @@ static void set_option(struct transport *transport, const char *name, const char
 			name, transport->url);
 }
 
+
+static int add_oid(const char *refname, const struct object_id *oid, int flags,
+		   void *cb_data)
+{
+	struct oid_array *oids = cb_data;
+
+	oid_array_append(oids, oid);
+	return 0;
+}
+
+static void add_negotiation_tips(struct git_transport_options *smart_options)
+{
+	struct oid_array *oids = xcalloc(1, sizeof(*oids));
+	int i;
+
+	for (i = 0; i < negotiation_tip.nr; i++) {
+		const char *s = negotiation_tip.items[i].string;
+		int old_nr;
+		if (!has_glob_specials(s)) {
+			struct object_id oid;
+			if (get_oid(s, &oid))
+				die("%s is not a valid object", s);
+			oid_array_append(oids, &oid);
+			continue;
+		}
+		old_nr = oids->nr;
+		for_each_glob_ref(add_oid, s, oids);
+		if (old_nr == oids->nr)
+			warning("Ignoring --negotiation-tip=%s because it does not match any refs",
+				s);
+	}
+	smart_options->negotiation_tips = oids;
+}
+
 static struct transport *prepare_transport(struct remote *remote, int deepen)
 {
 	struct transport *transport;
@@ -1075,6 +1112,12 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
 			   filter_options.filter_spec);
 		set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
 	}
+	if (negotiation_tip.nr) {
+		if (transport->smart_options)
+			add_negotiation_tips(transport->smart_options);
+		else
+			warning("Ignoring --negotiation-tip because the protocol does not support it.");
+	}
 	return transport;
 }
 
diff --git a/fetch-pack.c b/fetch-pack.c
index ba12085c4..1e50d9008 100644
--- a/fetch-pack.c
+++ b/fetch-pack.c
@@ -213,6 +213,22 @@ static int next_flush(int stateless_rpc, int count)
 	return count;
 }
 
+static void mark_tips(struct fetch_negotiator *negotiator,
+		      const struct oid_array *negotiation_tips)
+{
+	int i;
+
+	if (!negotiation_tips) {
+		for_each_ref(rev_list_insert_ref_oid, negotiator);
+		return;
+	}
+
+	for (i = 0; i < negotiation_tips->nr; i++)
+		rev_list_insert_ref(negotiator, NULL,
+				    &negotiation_tips->oid[i]);
+	return;
+}
+
 static int find_common(struct fetch_negotiator *negotiator,
 		       struct fetch_pack_args *args,
 		       int fd[2], struct object_id *result_oid,
@@ -230,7 +246,7 @@ static int find_common(struct fetch_negotiator *negotiator,
 	if (args->stateless_rpc && multi_ack == 1)
 		die(_("--stateless-rpc requires multi_ack_detailed"));
 
-	for_each_ref(rev_list_insert_ref_oid, negotiator);
+	mark_tips(negotiator, args->negotiation_tips);
 	for_each_cached_alternate(negotiator, insert_one_alternate_object);
 
 	fetching = 0;
@@ -1295,7 +1311,7 @@ static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args,
 			else
 				state = FETCH_SEND_REQUEST;
 
-			for_each_ref(rev_list_insert_ref_oid, &negotiator);
+			mark_tips(&negotiator, args->negotiation_tips);
 			for_each_cached_alternate(&negotiator,
 						  insert_one_alternate_object);
 			break;
diff --git a/fetch-pack.h b/fetch-pack.h
index bb45a366a..1859ee927 100644
--- a/fetch-pack.h
+++ b/fetch-pack.h
@@ -16,6 +16,13 @@ struct fetch_pack_args {
 	const struct string_list *deepen_not;
 	struct list_objects_filter_options filter_options;
 	const struct string_list *server_options;
+
+	/*
+	 * If not NULL, during packfile negotiation, fetch-pack will send "have"
+	 * lines only with these tips and their ancestors.
+	 */
+	const struct oid_array *negotiation_tips;
+
 	unsigned deepen_relative:1;
 	unsigned quiet:1;
 	unsigned keep_pack:1;
diff --git a/t/t5510-fetch.sh b/t/t5510-fetch.sh
index e402aee6a..4e6d049b9 100755
--- a/t/t5510-fetch.sh
+++ b/t/t5510-fetch.sh
@@ -865,4 +865,82 @@ test_expect_success C_LOCALE_OUTPUT 'fetch compact output' '
 	test_cmp expect actual
 '
 
+setup_negotiation_tip () {
+	SERVER="$1"
+	URL="$2"
+	USE_PROTOCOL_V2="$3"
+
+	rm -rf "$SERVER" client trace &&
+	git init "$SERVER" &&
+	test_commit -C "$SERVER" alpha_1 &&
+	test_commit -C "$SERVER" alpha_2 &&
+	git -C "$SERVER" checkout --orphan beta &&
+	test_commit -C "$SERVER" beta_1 &&
+	test_commit -C "$SERVER" beta_2 &&
+
+	git clone "$URL" client &&
+
+	if test "$USE_PROTOCOL_V2" -eq 1
+	then
+		git -C "$SERVER" config protocol.version 2 &&
+		git -C client config protocol.version 2
+	fi &&
+
+	test_commit -C "$SERVER" beta_s &&
+	git -C "$SERVER" checkout master &&
+	test_commit -C "$SERVER" alpha_s &&
+	git -C "$SERVER" tag -d alpha_1 alpha_2 beta_1 beta_2
+}
+
+check_negotiation_tip () {
+	# Ensure that {alpha,beta}_1 are sent as "have", but not {alpha_beta}_2
+	ALPHA_1=$(git -C client rev-parse alpha_1) &&
+	grep "fetch> have $ALPHA_1" trace &&
+	BETA_1=$(git -C client rev-parse beta_1) &&
+	grep "fetch> have $BETA_1" trace &&
+	ALPHA_2=$(git -C client rev-parse alpha_2) &&
+	! grep "fetch> have $ALPHA_2" trace &&
+	BETA_2=$(git -C client rev-parse beta_2) &&
+	! grep "fetch> have $BETA_2" trace
+}
+
+test_expect_success '--negotiation-tip limits "have" lines sent' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+test_expect_success '--negotiation-tip understands globs' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=*_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+test_expect_success '--negotiation-tip understands abbreviated SHA-1' '
+	setup_negotiation_tip server server 0 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=$(git -C client rev-parse --short alpha_1) \
+		--negotiation-tip=$(git -C client rev-parse --short beta_1) \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+. "$TEST_DIRECTORY"/lib-httpd.sh
+start_httpd
+
+test_expect_success '--negotiation-tip limits "have" lines sent with HTTP protocol v2' '
+	setup_negotiation_tip "$HTTPD_DOCUMENT_ROOT_PATH/server" \
+		"$HTTPD_URL/smart/server" 1 &&
+	GIT_TRACE_PACKET="$(pwd)/trace" git -C client fetch \
+		--negotiation-tip=alpha_1 --negotiation-tip=beta_1 \
+		origin alpha_s beta_s &&
+	check_negotiation_tip
+'
+
+stop_httpd
+
 test_done
diff --git a/transport-helper.c b/transport-helper.c
index 1f8ff7e94..ad8f7c772 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -684,6 +684,9 @@ static int fetch(struct transport *transport,
 			transport, "filter",
 			data->transport_options.filter_options.filter_spec);
 
+	if (data->transport_options.negotiation_tips)
+		warning("Ignoring --negotiation-tip because the protocol does not support it.");
+
 	if (data->fetch)
 		return fetch_with_fetch(transport, nr_heads, to_fetch);
 
diff --git a/transport.c b/transport.c
index a32da30de..9f10f8ad9 100644
--- a/transport.c
+++ b/transport.c
@@ -318,6 +318,7 @@ static int fetch_refs_via_pack(struct transport *transport,
 	args.filter_options = data->options.filter_options;
 	args.stateless_rpc = transport->stateless_rpc;
 	args.server_options = transport->server_options;
+	args.negotiation_tips = data->options.negotiation_tips;
 
 	if (!data->got_remote_heads)
 		refs_tmp = get_refs_via_connect(transport, 0, NULL);
diff --git a/transport.h b/transport.h
index 7792b0858..d31be5be6 100644
--- a/transport.h
+++ b/transport.h
@@ -25,6 +25,16 @@ struct git_transport_options {
 	const char *receivepack;
 	struct push_cas_option *cas;
 	struct list_objects_filter_options filter_options;
+
+	/*
+	 * This is only used during fetch. See the documentation of
+	 * negotiation_tips in struct fetch_pack_args.
+	 *
+	 * This field is only supported by transports that support connect or
+	 * stateless_connect. Set this field directly instead of using
+	 * transport_set_option().
+	 */
+	struct oid_array *negotiation_tips;
 };
 
 enum transport_family {
-- 
2.18.0.399.gad0ab374a1-goog


^ permalink raw reply related	[flat|nested] 14+ messages in thread

* Re: [PATCH v4] fetch-pack: support negotiation tip whitelist
  2018-07-02 22:39 ` [PATCH v4] " Jonathan Tan
@ 2018-07-22  9:09   ` Duy Nguyen
  2019-06-18 13:36   ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 14+ messages in thread
From: Duy Nguyen @ 2018-07-22  9:09 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: Git Mailing List, Brandon Williams, Junio C Hamano

On Tue, Jul 3, 2018 at 12:41 AM Jonathan Tan <jonathantanmy@google.com> wrote:
> +static void add_negotiation_tips(struct git_transport_options *smart_options)
> +{
> +       struct oid_array *oids = xcalloc(1, sizeof(*oids));
> +       int i;
> +
> +       for (i = 0; i < negotiation_tip.nr; i++) {
> +               const char *s = negotiation_tip.items[i].string;
> +               int old_nr;
> +               if (!has_glob_specials(s)) {
> +                       struct object_id oid;
> +                       if (get_oid(s, &oid))
> +                               die("%s is not a valid object", s);

Please _() this string and the warning() below

> +                       oid_array_append(oids, &oid);
> +                       continue;
> +               }
> +               old_nr = oids->nr;
> +               for_each_glob_ref(add_oid, s, oids);
> +               if (old_nr == oids->nr)
> +                       warning("Ignoring --negotiation-tip=%s because it does not match any refs",
> +                               s);
> +       }
> +       smart_options->negotiation_tips = oids;
> +}
> +
>  static struct transport *prepare_transport(struct remote *remote, int deepen)
>  {
>         struct transport *transport;
> @@ -1075,6 +1112,12 @@ static struct transport *prepare_transport(struct remote *remote, int deepen)
>                            filter_options.filter_spec);
>                 set_option(transport, TRANS_OPT_FROM_PROMISOR, "1");
>         }
> +       if (negotiation_tip.nr) {
> +               if (transport->smart_options)
> +                       add_negotiation_tips(transport->smart_options);
> +               else
> +                       warning("Ignoring --negotiation-tip because the protocol does not support it.");
> +       }
>         return transport;
>  }
-- 
Duy

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4] fetch-pack: support negotiation tip whitelist
  2018-07-02 22:39 ` [PATCH v4] " Jonathan Tan
  2018-07-22  9:09   ` Duy Nguyen
@ 2019-06-18 13:36   ` Ævar Arnfjörð Bjarmason
  2019-06-18 17:30     ` Jonathan Tan
  1 sibling, 1 reply; 14+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2019-06-18 13:36 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, bmwill, gitster


On Tue, Jul 03 2018, Jonathan Tan wrote:

> During negotiation, fetch-pack eventually reports as "have" lines all
> commits reachable from all refs. Allow the user to restrict the commits
> sent in this way by providing a whitelist of tips; only the tips
> themselves and their ancestors will be sent.

I discovered a bug in this...

> @@ -230,7 +246,7 @@ static int find_common(struct fetch_negotiator *negotiator,
>  	if (args->stateless_rpc && multi_ack == 1)
>  		die(_("--stateless-rpc requires multi_ack_detailed"));
>
> -	for_each_ref(rev_list_insert_ref_oid, negotiator);
> +	mark_tips(negotiator, args->negotiation_tips);
>  	for_each_cached_alternate(negotiator, insert_one_alternate_object);
>
>  	fetching = 0;

Here we blindly add objects found in an alternate repo. I found and
debugged this with this:

    diff --git a/fetch-negotiator.h b/fetch-negotiator.h
    index 9e3967ce66..cbe71c9c8d 100644
    --- a/fetch-negotiator.h
    +++ b/fetch-negotiator.h
    @@ -33,2 +33,3 @@ struct fetch_negotiator {
            void (*add_tip)(struct fetch_negotiator *, struct commit *);
    +       int done_adding;

    diff --git a/fetch-pack.c b/fetch-pack.c
    index 3f24d0c8a6..6b43b4f8f1 100644
    --- a/fetch-pack.c
    +++ b/fetch-pack.c
    @@ -238,2 +238,3 @@ static void mark_tips(struct fetch_negotiator *negotiator,
                                        &negotiation_tips->oid[i]);
    +       negotiator->done_adding = 1;
            return;
    diff --git a/negotiator/default.c b/negotiator/default.c
    index 4b78f6bf36..4e45f05f25 100644
    --- a/negotiator/default.c
    +++ b/negotiator/default.c
    @@ -137,2 +137,4 @@ static void add_tip(struct fetch_negotiator *n, struct commit *c)
     {
    +       if (n->done_adding)
    +               return;
            n->known_common = NULL;
    @@ -166,2 +168,3 @@ void default_negotiator_init(struct fetch_negotiator *negotiator)
            negotiator->add_tip = add_tip;
    +       negotiator->done_adding = 0;
            negotiator->next = next;

Perhaps something like that with an assert() is a good idea for the
negotiation backend code in general? It seems rather fragile to depend
on there being no other codepath that calls add_tip() again after some
other code (--negotiation-tip=*) that expects it not to be called again.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [PATCH v4] fetch-pack: support negotiation tip whitelist
  2019-06-18 13:36   ` Ævar Arnfjörð Bjarmason
@ 2019-06-18 17:30     ` Jonathan Tan
  0 siblings, 0 replies; 14+ messages in thread
From: Jonathan Tan @ 2019-06-18 17:30 UTC (permalink / raw)
  To: avarab; +Cc: jonathantanmy, git

> > @@ -230,7 +246,7 @@ static int find_common(struct fetch_negotiator *negotiator,
> >  	if (args->stateless_rpc && multi_ack == 1)
> >  		die(_("--stateless-rpc requires multi_ack_detailed"));
> >
> > -	for_each_ref(rev_list_insert_ref_oid, negotiator);
> > +	mark_tips(negotiator, args->negotiation_tips);
> >  	for_each_cached_alternate(negotiator, insert_one_alternate_object);
> >
> >  	fetching = 0;
> 
> Here we blindly add objects found in an alternate repo. I found and
> debugged this with this:
> 
>     diff --git a/fetch-negotiator.h b/fetch-negotiator.h
>     index 9e3967ce66..cbe71c9c8d 100644
>     --- a/fetch-negotiator.h
>     +++ b/fetch-negotiator.h
>     @@ -33,2 +33,3 @@ struct fetch_negotiator {
>             void (*add_tip)(struct fetch_negotiator *, struct commit *);
>     +       int done_adding;
> 
>     diff --git a/fetch-pack.c b/fetch-pack.c
>     index 3f24d0c8a6..6b43b4f8f1 100644
>     --- a/fetch-pack.c
>     +++ b/fetch-pack.c
>     @@ -238,2 +238,3 @@ static void mark_tips(struct fetch_negotiator *negotiator,
>                                         &negotiation_tips->oid[i]);
>     +       negotiator->done_adding = 1;
>             return;
>     diff --git a/negotiator/default.c b/negotiator/default.c
>     index 4b78f6bf36..4e45f05f25 100644
>     --- a/negotiator/default.c
>     +++ b/negotiator/default.c
>     @@ -137,2 +137,4 @@ static void add_tip(struct fetch_negotiator *n, struct commit *c)
>      {
>     +       if (n->done_adding)
>     +               return;
>             n->known_common = NULL;
>     @@ -166,2 +168,3 @@ void default_negotiator_init(struct fetch_negotiator *negotiator)
>             negotiator->add_tip = add_tip;
>     +       negotiator->done_adding = 0;
>             negotiator->next = next;
> 
> Perhaps something like that with an assert() is a good idea for the
> negotiation backend code in general? It seems rather fragile to depend
> on there being no other codepath that calls add_tip() again after some
> other code (--negotiation-tip=*) that expects it not to be called again.

Thanks for spotting this bug.

There is already some defense from add_tip() not being called
unexpectedly - see negotiator/default.c and negotiator.skipping.c, which
sets add_tip to NULL when next() is called.

I can see that this doesn't help in this case, when we want to declare
done_adding but we haven't called next() yet, but I don't think that
this API layer is the right place to prevent that.

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2019-06-18 17:30 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2018-06-25 19:37 [PATCH] fetch-pack: support negotiation tip whitelist Jonathan Tan
2018-06-26 17:53 ` Jonathan Nieder
2018-06-26 18:59 ` Junio C Hamano
2018-06-27 18:28 ` [PATCH v2] " Jonathan Tan
2018-06-28 15:56   ` Brandon Williams
2018-06-28 16:12     ` Jonathan Tan
2018-06-28 16:16       ` Brandon Williams
2018-06-28 22:15 ` [PATCH v3] " Jonathan Tan
2018-06-28 22:20   ` Brandon Williams
2018-06-29 16:28   ` Junio C Hamano
2018-07-02 22:39 ` [PATCH v4] " Jonathan Tan
2018-07-22  9:09   ` Duy Nguyen
2019-06-18 13:36   ` Ævar Arnfjörð Bjarmason
2019-06-18 17:30     ` Jonathan Tan

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).