git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [RFC PATCH] upload-pack: fix filter options scope
@ 2020-05-07  9:58 Christian Couder
  2020-05-07 11:36 ` Jeff King
  2020-05-08  8:01 ` [PATCH v2] " Christian Couder
  0 siblings, 2 replies; 12+ messages in thread
From: Christian Couder @ 2020-05-07  9:58 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Derrick Stolee, Jeff King, Taylor Blau,
	Jonathan Tan, Jonathan Nieder, Christian Couder

The upload_pack_v2() function is sometimes called twice in the same
process while 'struct list_objects_filter_options filter_options' was
declared as static at the beginning of 'upload-pack.c'.

This made the check in list_objects_filter_die_if_populated(), which
is called by process_args(), fail the second time upload_pack_v2() is
called, as filter_options had already been populated the first time.

To fix that, filter_options is not static any more, but instead it's
now owned by upload_pack_v2() and upload_pack().

This fixes the first of the 2 bugs documented by d0badf8797
(partial-clone: demonstrate bugs in partial fetch, 2020-02-21).

Helped-by: Derrick Stolee <dstolee@microsoft.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
The issue was originally discussed in:

https://lore.kernel.org/git/dbc1bdcae16f8b9941add514264b0fe04cda48c0.1582129312.git.gitgitgadget@gmail.com/

I am not sure why upload_pack_v2() is called twice. Also it seems
that another similar issue might not be fixed by this. So this patch
is RFC for now. 

 t/t5616-partial-clone.sh |  2 +-
 upload-pack.c            | 40 ++++++++++++++++++++++++++--------------
 2 files changed, 27 insertions(+), 15 deletions(-)

diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 88002b24af..b73c38c29a 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -389,7 +389,7 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
 # repository did not have tags during clone, but has tags
 # in the fetch.
 
-test_expect_failure 'verify fetch succeeds when asking for new tags' '
+test_expect_success 'verify fetch succeeds when asking for new tags' '
 	git clone --filter=blob:none "file://$(pwd)/srv.bare" tag-test &&
 	for i in I J K
 	do
diff --git a/upload-pack.c b/upload-pack.c
index 902d0ad5e1..4e22e46294 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -68,7 +68,6 @@ static const char *pack_objects_hook;
 static int filter_capability_requested;
 static int allow_filter;
 static int allow_ref_in_want;
-static struct list_objects_filter_options filter_options;
 
 static int allow_sideband_all;
 
@@ -103,7 +102,8 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
 }
 
 static void create_pack_file(const struct object_array *have_obj,
-			     const struct object_array *want_obj)
+			     const struct object_array *want_obj,
+			     struct list_objects_filter_options *filter_options)
 {
 	struct child_process pack_objects = CHILD_PROCESS_INIT;
 	char data[8193], progress[128];
@@ -140,9 +140,9 @@ static void create_pack_file(const struct object_array *have_obj,
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.choice) {
+	if (filter_options->choice) {
 		const char *spec =
-			expand_list_objects_filter_spec(&filter_options);
+			expand_list_objects_filter_spec(filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
 			sq_quote_buf(&buf, spec);
@@ -848,7 +848,9 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not,
 	return 0;
 }
 
-static void receive_needs(struct packet_reader *reader, struct object_array *want_obj)
+static void receive_needs(struct packet_reader *reader,
+			  struct object_array *want_obj,
+			  struct list_objects_filter_options *filter_options)
 {
 	struct object_array shallows = OBJECT_ARRAY_INIT;
 	struct string_list deepen_not = STRING_LIST_INIT_DUP;
@@ -883,8 +885,8 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, arg);
+			list_objects_filter_die_if_populated(filter_options);
+			parse_list_objects_filter(filter_options, arg);
 			continue;
 		}
 
@@ -1087,11 +1089,14 @@ void upload_pack(struct upload_pack_options *options)
 	struct string_list symref = STRING_LIST_INIT_DUP;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct packet_reader reader;
+	struct list_objects_filter_options filter_options;
 
 	stateless_rpc = options->stateless_rpc;
 	timeout = options->timeout;
 	daemon_mode = options->daemon_mode;
 
+	memset(&filter_options, 0, sizeof(filter_options));
+
 	git_config(upload_pack_config, NULL);
 
 	head_ref_namespaced(find_symref, &symref);
@@ -1114,12 +1119,14 @@ void upload_pack(struct upload_pack_options *options)
 			   PACKET_READ_CHOMP_NEWLINE |
 			   PACKET_READ_DIE_ON_ERR_PACKET);
 
-	receive_needs(&reader, &want_obj);
+	receive_needs(&reader, &want_obj, &filter_options);
 	if (want_obj.nr) {
 		struct object_array have_obj = OBJECT_ARRAY_INIT;
 		get_common_commits(&reader, &have_obj, &want_obj);
-		create_pack_file(&have_obj, &want_obj);
+		create_pack_file(&have_obj, &want_obj, &filter_options);
 	}
+
+	list_objects_filter_release(&filter_options);
 }
 
 struct upload_pack_data {
@@ -1250,7 +1257,8 @@ static int parse_have(const char *line, struct oid_array *haves)
 
 static void process_args(struct packet_reader *request,
 			 struct upload_pack_data *data,
-			 struct object_array *want_obj)
+			 struct object_array *want_obj,
+			 struct list_objects_filter_options *filter_options)
 {
 	while (packet_reader_read(request) == PACKET_READ_NORMAL) {
 		const char *arg = request->line;
@@ -1306,8 +1314,8 @@ static void process_args(struct packet_reader *request,
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, p);
+			list_objects_filter_die_if_populated(filter_options);
+			parse_list_objects_filter(filter_options, p);
 			continue;
 		}
 
@@ -1470,6 +1478,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 	struct upload_pack_data data;
 	struct object_array have_obj = OBJECT_ARRAY_INIT;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
+	struct list_objects_filter_options filter_options;
 
 	clear_object_flags(ALL_FLAGS);
 
@@ -1478,10 +1487,12 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 	upload_pack_data_init(&data);
 	use_sideband = LARGE_PACKET_MAX;
 
+	memset(&filter_options, 0, sizeof(filter_options));
+
 	while (state != FETCH_DONE) {
 		switch (state) {
 		case FETCH_PROCESS_ARGS:
-			process_args(request, &data, &want_obj);
+			process_args(request, &data, &want_obj, &filter_options);
 
 			if (!want_obj.nr) {
 				/*
@@ -1514,7 +1525,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 			send_shallow_info(&data, &want_obj);
 
 			packet_writer_write(&data.writer, "packfile\n");
-			create_pack_file(&have_obj, &want_obj);
+			create_pack_file(&have_obj, &want_obj, &filter_options);
 			state = FETCH_DONE;
 			break;
 		case FETCH_DONE:
@@ -1525,6 +1536,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 	upload_pack_data_clear(&data);
 	object_array_clear(&have_obj);
 	object_array_clear(&want_obj);
+	list_objects_filter_release(&filter_options);
 	return 0;
 }
 
-- 
2.26.2.562.ga2ab1ba363


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] upload-pack: fix filter options scope
  2020-05-07  9:58 [RFC PATCH] upload-pack: fix filter options scope Christian Couder
@ 2020-05-07 11:36 ` Jeff King
  2020-05-07 12:32   ` Christian Couder
  2020-05-08  8:01 ` [PATCH v2] " Christian Couder
  1 sibling, 1 reply; 12+ messages in thread
From: Jeff King @ 2020-05-07 11:36 UTC (permalink / raw)
  To: Christian Couder
  Cc: git, Junio C Hamano, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Thu, May 07, 2020 at 11:58:29AM +0200, Christian Couder wrote:

> I am not sure why upload_pack_v2() is called twice. Also it seems
> that another similar issue might not be fixed by this. So this patch
> is RFC for now.

It's because of the request/response model of v2. The client side of the
conversation looks something like:

  - client connects transport to server

  - client issues command=ls-refs to get the ref advertisement

  - client issues command=fetch and sends wants/have; the server may
    respond with a pack when the negotiation is finished, but may also
    respond with acks, etc, asking for more rounds of negotiation

  - if there's no pack, the client then issues another command=fetch,
    repeating until we get a pack

In stateless git-over-http, each of those request/response pairs happens in
its own server-side process, because the webserver kicks off a separate
CGI for each request. But v2 adapts git://, ssh://, and direct-pipe
connections to use the same request/response model, but all connected to
a single Git process on the other side.

So each upload_pack_v2() call needs to start with a clean slate; it
can't assume anything about previous rounds, and it needs to clean up
any allocated data from those rounds.

So your patch is going in the right direction, but we already have other
similar data handled by upload_pack_data, which has its own init/clear
functions. Probably the filter data should go in there, too.

This is an easy bug to introduce, and I suspect we'll see equivalent
ones in the future (if there aren't already more lurking; there's a lot
of global data in Git, and I really have no idea what subtle
interactions one could see). One thing we could do is to truly run each
v2 command request in its own process, just like git-over-http does. But
there are a lot of complications there around holding the pipe/socket
open, how the parent v2 serving process interacts with the child
requests (e.g., noticing errors), etc. So I'd worry that going that
direction would be just as likely to introduce bugs as fix them.

> diff --git a/upload-pack.c b/upload-pack.c
> index 902d0ad5e1..4e22e46294 100644
> --- a/upload-pack.c
> +++ b/upload-pack.c
> @@ -68,7 +68,6 @@ static const char *pack_objects_hook;
>  static int filter_capability_requested;
>  static int allow_filter;
>  static int allow_ref_in_want;
> -static struct list_objects_filter_options filter_options;

Just looking at nearby bits of global data that might want similar
treatment:

 - allow_filter and allow_ref_in_want are set by the server-side config.
   So they're persisted between upload_pack_v2() calls, but we'd
   overwrite them by calling git_config() in each incarnation

 - filter_capability_requested is set based on client input, but it's
   only used in v1

But boy there are a lot of global variables there to trace through.
Probably a more fruitful way to find similar problems is to look at the
v2 process_args() to see what it touches. It looks like options like
ofs_delta probably ought to be part of upload_pack_data, too. We didn't
notice because it would require a client doing something like:

  command=fetch
  ofs-delta    [claims to support ofs-delta]
  0001
  [wants and haves that don't result in a pack yet...]
  0000
  command=fetch
  [do not claim to support ofs-delta!]
  0001
  [wants and haves that result in a pack]
  0000

The server _shouldn't_ use ofs-delta there (and wouldn't over http,
where the stateless server side that generates the pack knows only about
that second request). But in git-over-ssh, it would.

A client who does this is stupid and wrong (and I didn't check the
protocol spec, but it could very well be violating the spec). So I don't
think it's _that_ big a deal. But it would be nice if all of those
globals got moved into upload_pack_data, and both v1 and v2 just used it
consistently.

-Peff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] upload-pack: fix filter options scope
  2020-05-07 11:36 ` Jeff King
@ 2020-05-07 12:32   ` Christian Couder
  2020-05-07 14:47     ` Jeff King
  0 siblings, 1 reply; 12+ messages in thread
From: Christian Couder @ 2020-05-07 12:32 UTC (permalink / raw)
  To: Jeff King
  Cc: git, Junio C Hamano, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Thu, May 7, 2020 at 1:36 PM Jeff King <peff@peff.net> wrote:
>
> On Thu, May 07, 2020 at 11:58:29AM +0200, Christian Couder wrote:
>
> > I am not sure why upload_pack_v2() is called twice. Also it seems
> > that another similar issue might not be fixed by this. So this patch
> > is RFC for now.
>
> It's because of the request/response model of v2. The client side of the
> conversation looks something like:
>
>   - client connects transport to server
>
>   - client issues command=ls-refs to get the ref advertisement
>
>   - client issues command=fetch and sends wants/have; the server may
>     respond with a pack when the negotiation is finished, but may also
>     respond with acks, etc, asking for more rounds of negotiation
>
>   - if there's no pack, the client then issues another command=fetch,
>     repeating until we get a pack
>
> In stateless git-over-http, each of those request/response pairs happens in
> its own server-side process, because the webserver kicks off a separate
> CGI for each request. But v2 adapts git://, ssh://, and direct-pipe
> connections to use the same request/response model, but all connected to
> a single Git process on the other side.
>
> So each upload_pack_v2() call needs to start with a clean slate; it
> can't assume anything about previous rounds, and it needs to clean up
> any allocated data from those rounds.
>
> So your patch is going in the right direction, but we already have other
> similar data handled by upload_pack_data, which has its own init/clear
> functions. Probably the filter data should go in there, too.

Thank you for the detailed explanations!

It looks like 'struct upload_pack_data' is indeed used by
upload_pack_v2(). It's not used by upload_pack() though. So if I move
'struct list_objects_filter_options filter_options' there, I would
need to also make upload_pack() either have its own filter_options or
use 'struct upload_pack_data' too.

> This is an easy bug to introduce, and I suspect we'll see equivalent
> ones in the future (if there aren't already more lurking; there's a lot
> of global data in Git, and I really have no idea what subtle
> interactions one could see). One thing we could do is to truly run each
> v2 command request in its own process, just like git-over-http does. But
> there are a lot of complications there around holding the pipe/socket
> open, how the parent v2 serving process interacts with the child
> requests (e.g., noticing errors), etc. So I'd worry that going that
> direction would be just as likely to introduce bugs as fix them.

I agree that it seems better to just get rid of the global data used
by the upload pack functions.

> > diff --git a/upload-pack.c b/upload-pack.c
> > index 902d0ad5e1..4e22e46294 100644
> > --- a/upload-pack.c
> > +++ b/upload-pack.c
> > @@ -68,7 +68,6 @@ static const char *pack_objects_hook;
> >  static int filter_capability_requested;
> >  static int allow_filter;
> >  static int allow_ref_in_want;
> > -static struct list_objects_filter_options filter_options;
>
> Just looking at nearby bits of global data that might want similar
> treatment:
>
>  - allow_filter and allow_ref_in_want are set by the server-side config.
>    So they're persisted between upload_pack_v2() calls, but we'd
>    overwrite them by calling git_config() in each incarnation
>
>  - filter_capability_requested is set based on client input, but it's
>    only used in v1
>
> But boy there are a lot of global variables there to trace through.

Yeah, unfortunately.

> Probably a more fruitful way to find similar problems is to look at the
> v2 process_args() to see what it touches. It looks like options like
> ofs_delta probably ought to be part of upload_pack_data, too. We didn't
> notice because it would require a client doing something like:
>
>   command=fetch
>   ofs-delta    [claims to support ofs-delta]
>   0001
>   [wants and haves that don't result in a pack yet...]
>   0000
>   command=fetch
>   [do not claim to support ofs-delta!]
>   0001
>   [wants and haves that result in a pack]
>   0000
>
> The server _shouldn't_ use ofs-delta there (and wouldn't over http,
> where the stateless server side that generates the pack knows only about
> that second request). But in git-over-ssh, it would.
>
> A client who does this is stupid and wrong (and I didn't check the
> protocol spec, but it could very well be violating the spec). So I don't
> think it's _that_ big a deal. But it would be nice if all of those
> globals got moved into upload_pack_data, and both v1 and v2 just used it
> consistently.

Unfortunately as I discuss a bit above 'struct upload_pack_data' is
not used by v1, only by v2. I think making v1 use upload_pack_data
might be nice, but it's a separate issue. So I am tempted to just
improve the commit message, adding some information from you and from
this discussion, and then re-sending.

Another intermediate solution would be to add filter_options to
'struct upload_pack_data' for v2 and to use filter_options directly
for v1.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] upload-pack: fix filter options scope
  2020-05-07 12:32   ` Christian Couder
@ 2020-05-07 14:47     ` Jeff King
  2020-05-07 16:42       ` Taylor Blau
  0 siblings, 1 reply; 12+ messages in thread
From: Jeff King @ 2020-05-07 14:47 UTC (permalink / raw)
  To: Christian Couder
  Cc: git, Junio C Hamano, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Thu, May 07, 2020 at 02:32:39PM +0200, Christian Couder wrote:

> > A client who does this is stupid and wrong (and I didn't check the
> > protocol spec, but it could very well be violating the spec). So I don't
> > think it's _that_ big a deal. But it would be nice if all of those
> > globals got moved into upload_pack_data, and both v1 and v2 just used it
> > consistently.
> 
> Unfortunately as I discuss a bit above 'struct upload_pack_data' is
> not used by v1, only by v2. I think making v1 use upload_pack_data
> might be nice, but it's a separate issue. So I am tempted to just
> improve the commit message, adding some information from you and from
> this discussion, and then re-sending.
> 
> Another intermediate solution would be to add filter_options to
> 'struct upload_pack_data' for v2 and to use filter_options directly
> for v1.

I think we do want the v1 path to use upload_pack_data in the long run,
just to keep things less confusing.

But if you don't want to go that far now, I'd strongly prefer the second
option, pushing v2 towards how we ultimately want it to look, and
plumbing a local variable through v1 paths as necessary. Or perhaps just
renaming the global to filter_options_v1 or something, so that v2 code
paths aren't tempted to use it.

-Peff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [RFC PATCH] upload-pack: fix filter options scope
  2020-05-07 14:47     ` Jeff King
@ 2020-05-07 16:42       ` Taylor Blau
  0 siblings, 0 replies; 12+ messages in thread
From: Taylor Blau @ 2020-05-07 16:42 UTC (permalink / raw)
  To: Jeff King
  Cc: Christian Couder, git, Junio C Hamano, Derrick Stolee,
	Taylor Blau, Jonathan Tan, Jonathan Nieder, Christian Couder

On Thu, May 07, 2020 at 10:47:10AM -0400, Jeff King wrote:
> On Thu, May 07, 2020 at 02:32:39PM +0200, Christian Couder wrote:
>
> > > A client who does this is stupid and wrong (and I didn't check the
> > > protocol spec, but it could very well be violating the spec). So I don't
> > > think it's _that_ big a deal. But it would be nice if all of those
> > > globals got moved into upload_pack_data, and both v1 and v2 just used it
> > > consistently.
> >
> > Unfortunately as I discuss a bit above 'struct upload_pack_data' is
> > not used by v1, only by v2. I think making v1 use upload_pack_data
> > might be nice, but it's a separate issue. So I am tempted to just
> > improve the commit message, adding some information from you and from
> > this discussion, and then re-sending.
> >
> > Another intermediate solution would be to add filter_options to
> > 'struct upload_pack_data' for v2 and to use filter_options directly
> > for v1.
>
> I think we do want the v1 path to use upload_pack_data in the long run,
> just to keep things less confusing.
>
> But if you don't want to go that far now, I'd strongly prefer the second
> option, pushing v2 towards how we ultimately want it to look, and
> plumbing a local variable through v1 paths as necessary. Or perhaps just
> renaming the global to filter_options_v1 or something, so that v2 code
> paths aren't tempted to use it.

I would be very much in favor of that direction. Thanks for CC-ing me on
this thread, since any changes here will obviously create merge
conflicts on my end when I send the "configure allowed object filters"
patches.

> -Peff

Thanks,
Taylor

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v2] upload-pack: fix filter options scope
  2020-05-07  9:58 [RFC PATCH] upload-pack: fix filter options scope Christian Couder
  2020-05-07 11:36 ` Jeff King
@ 2020-05-08  8:01 ` Christian Couder
  2020-05-08 13:06   ` Jeff King
  2020-05-08 18:09   ` [PATCH v3] upload-pack: clear filter_options for each v2 fetch command Christian Couder
  1 sibling, 2 replies; 12+ messages in thread
From: Christian Couder @ 2020-05-08  8:01 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Derrick Stolee, Jeff King, Taylor Blau,
	Jonathan Tan, Jonathan Nieder, Christian Couder

Because of the request/response model of protocol v2, the
upload_pack_v2() function is sometimes called twice in the same
process, while 'struct list_objects_filter_options filter_options'
was declared as static at the beginning of 'upload-pack.c'.

This made the check in list_objects_filter_die_if_populated(), which
is called by process_args(), fail the second time upload_pack_v2() is
called, as filter_options had already been populated the first time.

To fix that, filter_options is not static any more. It's now owned
directly by upload_pack(). It's now also part of 'struct
upload_pack_data', so that it's owned indirectly by upload_pack_v2().

In the long term, the goal is to also have upload_pack() use
'struct upload_pack_data', so adding filter_options to this struct
makes more sense than to have it owned directly by upload_pack_v2().

This fixes the first of the 2 bugs documented by d0badf8797
(partial-clone: demonstrate bugs in partial fetch, 2020-02-21).

Helped-by: Derrick Stolee <dstolee@microsoft.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
The changes since the previous RFC version are the following:

  - now filter_options is part of struct upload_pack_data as
    suggested by Peff and Taylor
  - improved commit message
  - updated comment before the test that used to fail

 t/t5616-partial-clone.sh |  7 +++----
 upload-pack.c            | 34 ++++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 88002b24af..8a27452a51 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -384,12 +384,11 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
 	grep "want $(cat hash)" trace
 '
 
-# The following two tests must be in this order, or else
-# the first will not fail. It is important that the srv.bare
-# repository did not have tags during clone, but has tags
+# The following two tests must be in this order. It is important that
+# the srv.bare repository did not have tags during clone, but has tags
 # in the fetch.
 
-test_expect_failure 'verify fetch succeeds when asking for new tags' '
+test_expect_success 'verify fetch succeeds when asking for new tags' '
 	git clone --filter=blob:none "file://$(pwd)/srv.bare" tag-test &&
 	for i in I J K
 	do
diff --git a/upload-pack.c b/upload-pack.c
index 902d0ad5e1..4d4dd7cd41 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -68,7 +68,6 @@ static const char *pack_objects_hook;
 static int filter_capability_requested;
 static int allow_filter;
 static int allow_ref_in_want;
-static struct list_objects_filter_options filter_options;
 
 static int allow_sideband_all;
 
@@ -103,7 +102,8 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
 }
 
 static void create_pack_file(const struct object_array *have_obj,
-			     const struct object_array *want_obj)
+			     const struct object_array *want_obj,
+			     struct list_objects_filter_options *filter_options)
 {
 	struct child_process pack_objects = CHILD_PROCESS_INIT;
 	char data[8193], progress[128];
@@ -140,9 +140,9 @@ static void create_pack_file(const struct object_array *have_obj,
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.choice) {
+	if (filter_options->choice) {
 		const char *spec =
-			expand_list_objects_filter_spec(&filter_options);
+			expand_list_objects_filter_spec(filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
 			sq_quote_buf(&buf, spec);
@@ -848,7 +848,9 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not,
 	return 0;
 }
 
-static void receive_needs(struct packet_reader *reader, struct object_array *want_obj)
+static void receive_needs(struct packet_reader *reader,
+			  struct object_array *want_obj,
+			  struct list_objects_filter_options *filter_options)
 {
 	struct object_array shallows = OBJECT_ARRAY_INIT;
 	struct string_list deepen_not = STRING_LIST_INIT_DUP;
@@ -883,8 +885,8 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, arg);
+			list_objects_filter_die_if_populated(filter_options);
+			parse_list_objects_filter(filter_options, arg);
 			continue;
 		}
 
@@ -1087,11 +1089,14 @@ void upload_pack(struct upload_pack_options *options)
 	struct string_list symref = STRING_LIST_INIT_DUP;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct packet_reader reader;
+	struct list_objects_filter_options filter_options;
 
 	stateless_rpc = options->stateless_rpc;
 	timeout = options->timeout;
 	daemon_mode = options->daemon_mode;
 
+	memset(&filter_options, 0, sizeof(filter_options));
+
 	git_config(upload_pack_config, NULL);
 
 	head_ref_namespaced(find_symref, &symref);
@@ -1114,12 +1119,14 @@ void upload_pack(struct upload_pack_options *options)
 			   PACKET_READ_CHOMP_NEWLINE |
 			   PACKET_READ_DIE_ON_ERR_PACKET);
 
-	receive_needs(&reader, &want_obj);
+	receive_needs(&reader, &want_obj, &filter_options);
 	if (want_obj.nr) {
 		struct object_array have_obj = OBJECT_ARRAY_INIT;
 		get_common_commits(&reader, &have_obj, &want_obj);
-		create_pack_file(&have_obj, &want_obj);
+		create_pack_file(&have_obj, &want_obj, &filter_options);
 	}
+
+	list_objects_filter_release(&filter_options);
 }
 
 struct upload_pack_data {
@@ -1134,6 +1141,8 @@ struct upload_pack_data {
 	int deepen_rev_list;
 	int deepen_relative;
 
+	struct list_objects_filter_options filter_options;
+
 	struct packet_writer writer;
 
 	unsigned stateless_rpc : 1;
@@ -1169,6 +1178,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	oid_array_clear(&data->haves);
 	object_array_clear(&data->shallows);
 	string_list_clear(&data->deepen_not, 0);
+	list_objects_filter_release(&data->filter_options);
 }
 
 static int parse_want(struct packet_writer *writer, const char *line,
@@ -1306,8 +1316,8 @@ static void process_args(struct packet_reader *request,
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, p);
+			list_objects_filter_die_if_populated(&data->filter_options);
+			parse_list_objects_filter(&data->filter_options, p);
 			continue;
 		}
 
@@ -1514,7 +1524,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 			send_shallow_info(&data, &want_obj);
 
 			packet_writer_write(&data.writer, "packfile\n");
-			create_pack_file(&have_obj, &want_obj);
+			create_pack_file(&have_obj, &want_obj, &data.filter_options);
 			state = FETCH_DONE;
 			break;
 		case FETCH_DONE:
-- 
2.26.2.561.g07d8ea56f2.dirty


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] upload-pack: fix filter options scope
  2020-05-08  8:01 ` [PATCH v2] " Christian Couder
@ 2020-05-08 13:06   ` Jeff King
  2020-05-08 15:46     ` Junio C Hamano
  2020-05-08 18:09   ` [PATCH v3] upload-pack: clear filter_options for each v2 fetch command Christian Couder
  1 sibling, 1 reply; 12+ messages in thread
From: Jeff King @ 2020-05-08 13:06 UTC (permalink / raw)
  To: Christian Couder
  Cc: git, Junio C Hamano, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Fri, May 08, 2020 at 10:01:15AM +0200, Christian Couder wrote:

> The changes since the previous RFC version are the following:
> 
>   - now filter_options is part of struct upload_pack_data as
>     suggested by Peff and Taylor
>   - improved commit message
>   - updated comment before the test that used to fail

Thanks, this version looks good to me.

>  static void create_pack_file(const struct object_array *have_obj,
> -			     const struct object_array *want_obj)
> +			     const struct object_array *want_obj,
> +			     struct list_objects_filter_options *filter_options)

I had hoped that stuffing it into upload_pack_data would require fewer
changes passing it around, but I guess many of these functions don't
know about upload_pack_data in the first place. Oh well. I think this is
the best we can do for now. In the long run we'd perhaps want to take
upload_pack_data there, but it wouldn't help until the v0 path also uses
that struct.

-Peff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] upload-pack: fix filter options scope
  2020-05-08 13:06   ` Jeff King
@ 2020-05-08 15:46     ` Junio C Hamano
  2020-05-08 17:16       ` Jeff King
  0 siblings, 1 reply; 12+ messages in thread
From: Junio C Hamano @ 2020-05-08 15:46 UTC (permalink / raw)
  To: Christian Couder, Jeff King
  Cc: git, Derrick Stolee, Taylor Blau, Jonathan Tan, Jonathan Nieder,
	Christian Couder

Jeff King <peff@peff.net> writes:

> On Fri, May 08, 2020 at 10:01:15AM +0200, Christian Couder wrote:
>
>> The changes since the previous RFC version are the following:
>> 
>>   - now filter_options is part of struct upload_pack_data as
>>     suggested by Peff and Taylor
>>   - improved commit message
>>   - updated comment before the test that used to fail
>
> Thanks, this version looks good to me.
>
>>  static void create_pack_file(const struct object_array *have_obj,
>> -			     const struct object_array *want_obj)
>> +			     const struct object_array *want_obj,
>> +			     struct list_objects_filter_options *filter_options)
>
> I had hoped that stuffing it into upload_pack_data would require fewer
> changes passing it around, but I guess many of these functions don't
> know about upload_pack_data in the first place. Oh well. I think this is
> the best we can do for now. In the long run we'd perhaps want to take
> upload_pack_data there, but it wouldn't help until the v0 path also uses
> that struct.

Yup, I agree that this v2-only fix is a good first step.

Even though the log message itself got a lot better explaining the
nature of the issue, I do not think the title of the patch does a
good job explaining what it is about to the readers of shortlog.

"fix" is a meaningless word in a bugfix patch, and it does not make
it clear what bad effect of the original code had by not giving a
clean-slate "options" variable to the second invocation of the
callchain.

Is it that the server side was incapable of serving a follow-up
fetch request in the same process when protocol v2 was in use?
Perhaps

    upload-pack: allow follow-up fetch in protocol v2

or something?

I care about singling out protocol v2 because then we would
immediately know that backporting this to older maintenance track is
of lower priority as the plan is to flip the default back to v0.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] upload-pack: fix filter options scope
  2020-05-08 15:46     ` Junio C Hamano
@ 2020-05-08 17:16       ` Jeff King
  2020-05-08 18:00         ` Christian Couder
  2020-05-08 18:11         ` Junio C Hamano
  0 siblings, 2 replies; 12+ messages in thread
From: Jeff King @ 2020-05-08 17:16 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Christian Couder, git, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Fri, May 08, 2020 at 08:46:58AM -0700, Junio C Hamano wrote:

> Even though the log message itself got a lot better explaining the
> nature of the issue, I do not think the title of the patch does a
> good job explaining what it is about to the readers of shortlog.
> 
> "fix" is a meaningless word in a bugfix patch, and it does not make
> it clear what bad effect of the original code had by not giving a
> clean-slate "options" variable to the second invocation of the
> callchain.
> 
> Is it that the server side was incapable of serving a follow-up
> fetch request in the same process when protocol v2 was in use?
> Perhaps
> 
>     upload-pack: allow follow-up fetch in protocol v2
> 
> or something?

Yeah, I agree that's better. Maybe even:

  upload-pack: clear filter_options for each v2 fetch command

which is more direct (we already allow follow-up fetches; they just
don't work sometimes) and makes it clear that this is about fixing the
filter feature with v2.

-Peff

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] upload-pack: fix filter options scope
  2020-05-08 17:16       ` Jeff King
@ 2020-05-08 18:00         ` Christian Couder
  2020-05-08 18:11         ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Christian Couder @ 2020-05-08 18:00 UTC (permalink / raw)
  To: Jeff King
  Cc: Junio C Hamano, git, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

On Fri, May 8, 2020 at 7:16 PM Jeff King <peff@peff.net> wrote:
>
> On Fri, May 08, 2020 at 08:46:58AM -0700, Junio C Hamano wrote:

> > Is it that the server side was incapable of serving a follow-up
> > fetch request in the same process when protocol v2 was in use?
> > Perhaps
> >
> >     upload-pack: allow follow-up fetch in protocol v2
> >
> > or something?
>
> Yeah, I agree that's better. Maybe even:
>
>   upload-pack: clear filter_options for each v2 fetch command

Ok, I will resend soon with the above title.

> which is more direct (we already allow follow-up fetches; they just
> don't work sometimes) and makes it clear that this is about fixing the
> filter feature with v2.

Thanks,
Christian.

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH v3] upload-pack: clear filter_options for each v2 fetch command
  2020-05-08  8:01 ` [PATCH v2] " Christian Couder
  2020-05-08 13:06   ` Jeff King
@ 2020-05-08 18:09   ` Christian Couder
  1 sibling, 0 replies; 12+ messages in thread
From: Christian Couder @ 2020-05-08 18:09 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Derrick Stolee, Jeff King, Taylor Blau,
	Jonathan Tan, Jonathan Nieder, Christian Couder

Because of the request/response model of protocol v2, the
upload_pack_v2() function is sometimes called twice in the same
process, while 'struct list_objects_filter_options filter_options'
was declared as static at the beginning of 'upload-pack.c'.

This made the check in list_objects_filter_die_if_populated(), which
is called by process_args(), fail the second time upload_pack_v2() is
called, as filter_options had already been populated the first time.

To fix that, filter_options is not static any more. It's now owned
directly by upload_pack(). It's now also part of 'struct
upload_pack_data', so that it's owned indirectly by upload_pack_v2().

In the long term, the goal is to also have upload_pack() use
'struct upload_pack_data', so adding filter_options to this struct
makes more sense than to have it owned directly by upload_pack_v2().

This fixes the first of the 2 bugs documented by d0badf8797
(partial-clone: demonstrate bugs in partial fetch, 2020-02-21).

Helped-by: Derrick Stolee <dstolee@microsoft.com>
Helped-by: Jeff King <peff@peff.net>
Helped-by: Taylor Blau <me@ttaylorr.com>
Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---
The only change compared to V2 is an improved commit title, thanks
to Peff and Junio.

 t/t5616-partial-clone.sh |  7 +++----
 upload-pack.c            | 34 ++++++++++++++++++++++------------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/t/t5616-partial-clone.sh b/t/t5616-partial-clone.sh
index 88002b24af..8a27452a51 100755
--- a/t/t5616-partial-clone.sh
+++ b/t/t5616-partial-clone.sh
@@ -384,12 +384,11 @@ test_expect_success 'fetch lazy-fetches only to resolve deltas, protocol v2' '
 	grep "want $(cat hash)" trace
 '
 
-# The following two tests must be in this order, or else
-# the first will not fail. It is important that the srv.bare
-# repository did not have tags during clone, but has tags
+# The following two tests must be in this order. It is important that
+# the srv.bare repository did not have tags during clone, but has tags
 # in the fetch.
 
-test_expect_failure 'verify fetch succeeds when asking for new tags' '
+test_expect_success 'verify fetch succeeds when asking for new tags' '
 	git clone --filter=blob:none "file://$(pwd)/srv.bare" tag-test &&
 	for i in I J K
 	do
diff --git a/upload-pack.c b/upload-pack.c
index 902d0ad5e1..4d4dd7cd41 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -68,7 +68,6 @@ static const char *pack_objects_hook;
 static int filter_capability_requested;
 static int allow_filter;
 static int allow_ref_in_want;
-static struct list_objects_filter_options filter_options;
 
 static int allow_sideband_all;
 
@@ -103,7 +102,8 @@ static int write_one_shallow(const struct commit_graft *graft, void *cb_data)
 }
 
 static void create_pack_file(const struct object_array *have_obj,
-			     const struct object_array *want_obj)
+			     const struct object_array *want_obj,
+			     struct list_objects_filter_options *filter_options)
 {
 	struct child_process pack_objects = CHILD_PROCESS_INIT;
 	char data[8193], progress[128];
@@ -140,9 +140,9 @@ static void create_pack_file(const struct object_array *have_obj,
 		argv_array_push(&pack_objects.args, "--delta-base-offset");
 	if (use_include_tag)
 		argv_array_push(&pack_objects.args, "--include-tag");
-	if (filter_options.choice) {
+	if (filter_options->choice) {
 		const char *spec =
-			expand_list_objects_filter_spec(&filter_options);
+			expand_list_objects_filter_spec(filter_options);
 		if (pack_objects.use_shell) {
 			struct strbuf buf = STRBUF_INIT;
 			sq_quote_buf(&buf, spec);
@@ -848,7 +848,9 @@ static int process_deepen_not(const char *line, struct string_list *deepen_not,
 	return 0;
 }
 
-static void receive_needs(struct packet_reader *reader, struct object_array *want_obj)
+static void receive_needs(struct packet_reader *reader,
+			  struct object_array *want_obj,
+			  struct list_objects_filter_options *filter_options)
 {
 	struct object_array shallows = OBJECT_ARRAY_INIT;
 	struct string_list deepen_not = STRING_LIST_INIT_DUP;
@@ -883,8 +885,8 @@ static void receive_needs(struct packet_reader *reader, struct object_array *wan
 		if (skip_prefix(reader->line, "filter ", &arg)) {
 			if (!filter_capability_requested)
 				die("git upload-pack: filtering capability not negotiated");
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, arg);
+			list_objects_filter_die_if_populated(filter_options);
+			parse_list_objects_filter(filter_options, arg);
 			continue;
 		}
 
@@ -1087,11 +1089,14 @@ void upload_pack(struct upload_pack_options *options)
 	struct string_list symref = STRING_LIST_INIT_DUP;
 	struct object_array want_obj = OBJECT_ARRAY_INIT;
 	struct packet_reader reader;
+	struct list_objects_filter_options filter_options;
 
 	stateless_rpc = options->stateless_rpc;
 	timeout = options->timeout;
 	daemon_mode = options->daemon_mode;
 
+	memset(&filter_options, 0, sizeof(filter_options));
+
 	git_config(upload_pack_config, NULL);
 
 	head_ref_namespaced(find_symref, &symref);
@@ -1114,12 +1119,14 @@ void upload_pack(struct upload_pack_options *options)
 			   PACKET_READ_CHOMP_NEWLINE |
 			   PACKET_READ_DIE_ON_ERR_PACKET);
 
-	receive_needs(&reader, &want_obj);
+	receive_needs(&reader, &want_obj, &filter_options);
 	if (want_obj.nr) {
 		struct object_array have_obj = OBJECT_ARRAY_INIT;
 		get_common_commits(&reader, &have_obj, &want_obj);
-		create_pack_file(&have_obj, &want_obj);
+		create_pack_file(&have_obj, &want_obj, &filter_options);
 	}
+
+	list_objects_filter_release(&filter_options);
 }
 
 struct upload_pack_data {
@@ -1134,6 +1141,8 @@ struct upload_pack_data {
 	int deepen_rev_list;
 	int deepen_relative;
 
+	struct list_objects_filter_options filter_options;
+
 	struct packet_writer writer;
 
 	unsigned stateless_rpc : 1;
@@ -1169,6 +1178,7 @@ static void upload_pack_data_clear(struct upload_pack_data *data)
 	oid_array_clear(&data->haves);
 	object_array_clear(&data->shallows);
 	string_list_clear(&data->deepen_not, 0);
+	list_objects_filter_release(&data->filter_options);
 }
 
 static int parse_want(struct packet_writer *writer, const char *line,
@@ -1306,8 +1316,8 @@ static void process_args(struct packet_reader *request,
 		}
 
 		if (allow_filter && skip_prefix(arg, "filter ", &p)) {
-			list_objects_filter_die_if_populated(&filter_options);
-			parse_list_objects_filter(&filter_options, p);
+			list_objects_filter_die_if_populated(&data->filter_options);
+			parse_list_objects_filter(&data->filter_options, p);
 			continue;
 		}
 
@@ -1514,7 +1524,7 @@ int upload_pack_v2(struct repository *r, struct argv_array *keys,
 			send_shallow_info(&data, &want_obj);
 
 			packet_writer_write(&data.writer, "packfile\n");
-			create_pack_file(&have_obj, &want_obj);
+			create_pack_file(&have_obj, &want_obj, &data.filter_options);
 			state = FETCH_DONE;
 			break;
 		case FETCH_DONE:
-- 
2.26.2.562.g0c5c0b446d.dirty


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH v2] upload-pack: fix filter options scope
  2020-05-08 17:16       ` Jeff King
  2020-05-08 18:00         ` Christian Couder
@ 2020-05-08 18:11         ` Junio C Hamano
  1 sibling, 0 replies; 12+ messages in thread
From: Junio C Hamano @ 2020-05-08 18:11 UTC (permalink / raw)
  To: Jeff King
  Cc: Christian Couder, git, Derrick Stolee, Taylor Blau, Jonathan Tan,
	Jonathan Nieder, Christian Couder

Jeff King <peff@peff.net> writes:

> Maybe even:
>
>   upload-pack: clear filter_options for each v2 fetch command
>
> which is more direct (we already allow follow-up fetches; they just
> don't work sometimes) and makes it clear that this is about fixing the
> filter feature with v2.

Christian may want to send in an updated one, but will queue with
that title in the meantime.

Thanks.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2020-05-08 18:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-05-07  9:58 [RFC PATCH] upload-pack: fix filter options scope Christian Couder
2020-05-07 11:36 ` Jeff King
2020-05-07 12:32   ` Christian Couder
2020-05-07 14:47     ` Jeff King
2020-05-07 16:42       ` Taylor Blau
2020-05-08  8:01 ` [PATCH v2] " Christian Couder
2020-05-08 13:06   ` Jeff King
2020-05-08 15:46     ` Junio C Hamano
2020-05-08 17:16       ` Jeff King
2020-05-08 18:00         ` Christian Couder
2020-05-08 18:11         ` Junio C Hamano
2020-05-08 18:09   ` [PATCH v3] upload-pack: clear filter_options for each v2 fetch command Christian Couder

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).