git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] sub-process: refactor handshake to common function
@ 2017-07-24 21:38 Jonathan Tan
  2017-07-24 22:21 ` Jonathan Nieder
                   ` (8 more replies)
  0 siblings, 9 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-24 21:38 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, benpeart, larsxschneider

Refactor, into a common function, the version and capability negotiation
done when invoking a long-running process as a clean or smudge filter.
This will be useful for other Git code that needs to interact similarly
with a long-running process.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
This will be useful for anyone implementing functionality like that in
[1].

It is unfortunate that the code grew larger because it had to be more
generic, but I think this is better than having (in the future) 2 or
more separate handshake functions.

I also don't think that the protocol should be permissive - I think
things would be simpler if all protocol errors were fatal errors. As it
is, most parts are permissive, but packet_read_line() already dies
anyway upon a malformed packet, so it may not be too drastic a change to
change this. For reference, the original protocol comes from [2].

[1] https://public-inbox.org/git/20170714132651.170708-2-benpeart@microsoft.com/
[2] edcc858 ("convert: add filter.<driver>.process option", 2016-10-17)
---
 convert.c             | 61 ++++-----------------------------
 pkt-line.c            | 19 -----------
 pkt-line.h            |  2 --
 sub-process.c         | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
 sub-process.h         | 18 ++++++++++
 t/t0021-conversion.sh |  2 +-
 6 files changed, 120 insertions(+), 76 deletions(-)

diff --git a/convert.c b/convert.c
index deaf0ba7b..ec8ecc2ea 100644
--- a/convert.c
+++ b/convert.c
@@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
 
 static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
 {
-	int err;
+	static int versions[] = {2, 0};
+	static struct subprocess_capability capabilities[] = {
+		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
+	};
 	struct cmd2process *entry = (struct cmd2process *)subprocess;
-	struct string_list cap_list = STRING_LIST_INIT_NODUP;
-	char *cap_buf;
-	const char *cap_name;
-	struct child_process *process = &subprocess->process;
-	const char *cmd = subprocess->cmd;
-
-	sigchain_push(SIGPIPE, SIG_IGN);
-
-	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
-	if (err)
-		goto done;
-
-	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
-	if (err) {
-		error("external filter '%s' does not support filter protocol version 2", cmd);
-		goto done;
-	}
-	err = strcmp(packet_read_line(process->out, NULL), "version=2");
-	if (err)
-		goto done;
-	err = packet_read_line(process->out, NULL) != NULL;
-	if (err)
-		goto done;
-
-	err = packet_writel(process->in, "capability=clean", "capability=smudge", NULL);
-
-	for (;;) {
-		cap_buf = packet_read_line(process->out, NULL);
-		if (!cap_buf)
-			break;
-		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
-
-		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
-			continue;
-
-		cap_name = cap_list.items[1].string;
-		if (!strcmp(cap_name, "clean")) {
-			entry->supported_capabilities |= CAP_CLEAN;
-		} else if (!strcmp(cap_name, "smudge")) {
-			entry->supported_capabilities |= CAP_SMUDGE;
-		} else {
-			warning(
-				"external filter '%s' requested unsupported filter capability '%s'",
-				cmd, cap_name
-			);
-		}
-
-		string_list_clear(&cap_list, 0);
-	}
-
-done:
-	sigchain_pop(SIGPIPE);
 
-	return err;
+	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
+				    capabilities,
+				    &entry->supported_capabilities);
 }
 
 static int apply_multi_file_filter(const char *path, const char *src, size_t len,
diff --git a/pkt-line.c b/pkt-line.c
index 9d845ecc3..7db911957 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 	return status;
 }
 
-int packet_writel(int fd, const char *line, ...)
-{
-	va_list args;
-	int err;
-	va_start(args, line);
-	for (;;) {
-		if (!line)
-			break;
-		if (strlen(line) > LARGE_PACKET_DATA_MAX)
-			return -1;
-		err = packet_write_fmt_gently(fd, "%s\n", line);
-		if (err)
-			return err;
-		line = va_arg(args, const char*);
-	}
-	va_end(args);
-	return packet_flush_gently(fd);
-}
-
 static int packet_write_gently(const int fd_out, const char *buf, size_t size)
 {
 	static char packet_write_buffer[LARGE_PACKET_MAX];
diff --git a/pkt-line.h b/pkt-line.h
index 450183b64..66ef610fc 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-LAST_ARG_MUST_BE_NULL
-int packet_writel(int fd, const char *line, ...);
 int write_packetized_from_fd(int fd_in, int fd_out);
 int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
 
diff --git a/sub-process.c b/sub-process.c
index a3cfab1a9..1a3f39bdf 100644
--- a/sub-process.c
+++ b/sub-process.c
@@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
 	hashmap_add(hashmap, entry);
 	return 0;
 }
+
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities) {
+	int version_scratch;
+	unsigned int capabilities_scratch;
+	struct child_process *process = &entry->process;
+	int i;
+	char *line;
+	const char *p;
+
+	if (!chosen_version)
+		chosen_version = &version_scratch;
+	if (!supported_capabilities)
+		supported_capabilities = &capabilities_scratch;
+
+	sigchain_push(SIGPIPE, SIG_IGN);
+
+	if (packet_write_fmt_gently(process->in, "%sclient\n",
+				    welcome_prefix)) {
+		error("Could not write client identification");
+		goto error;
+	}
+	for (i = 0; versions[i]; i++) {
+		if (packet_write_fmt_gently(process->in, "version=%d\n",
+					    versions[i])) {
+			error("Could not write requested version");
+			goto error;
+		}
+	}
+	if (packet_flush_gently(process->in))
+		goto error;
+
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, welcome_prefix, &p) ||
+	    strcmp(p, "server")) {
+		error("Unexpected line '%s', expected %sserver",
+		      line ? line : "<flush packet>", welcome_prefix);
+		goto error;
+	}
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, "version=", &p) ||
+	    strtol_i(p, 10, chosen_version)) {
+		error("Unexpected line '%s', expected version",
+		      line ? line : "<flush packet>");
+		goto error;
+	}
+	for (i = 0; versions[i]; i++) {
+		if (versions[i] == *chosen_version)
+			goto version_found;
+	}
+	error("Version %d not supported", *chosen_version);
+	goto error;
+version_found:
+	if ((line = packet_read_line(process->out, NULL))) {
+		error("Unexpected line '%s', expected flush", line);
+		goto error;
+	}
+
+	for (i = 0; capabilities[i].name; i++) {
+		if (packet_write_fmt_gently(process->in, "capability=%s\n",
+					    capabilities[i].name)) {
+			error("Could not write requested capability");
+			goto error;
+		}
+	}
+	if (packet_flush_gently(process->in))
+		goto error;
+
+	while ((line = packet_read_line(process->out, NULL))) {
+		if (!skip_prefix(line, "capability=", &p))
+			continue;
+
+		for (i = 0; capabilities[i].name; i++) {
+			if (!strcmp(p, capabilities[i].name)) {
+				*supported_capabilities |= capabilities[i].flag;
+				goto capability_found;
+			}
+		}
+		warning("external filter requested unsupported filter capability '%s'",
+			p);
+capability_found:
+		;
+	}
+
+	sigchain_pop(SIGPIPE);
+	return 0;
+error:
+	sigchain_pop(SIGPIPE);
+	return 1;
+}
diff --git a/sub-process.h b/sub-process.h
index 96a2cca36..a72e7f7cf 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -18,6 +18,11 @@ struct subprocess_entry {
 	struct child_process process;
 };
 
+struct subprocess_capability {
+	const char *name;
+	unsigned int flag;
+};
+
 /* subprocess functions */
 
 extern int cmd2process_cmp(const void *unused_cmp_data,
@@ -41,6 +46,19 @@ static inline struct child_process *subprocess_get_child_process(
 	return &entry->process;
 }
 
+/*
+ * Perform the handshake to a long-running process as described in the
+ * gitattributes documentation using the given requested versions and
+ * capabilities. The "versions" and "capabilities" parameters are arrays
+ * terminated by a 0 or blank struct.
+ */
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities);
+
 /*
  * Helper function that will read packets looking for "status=<foo>"
  * key/value pairs and return the value from the last "status" packet
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 161f56044..d191a7228 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
 
 		cp "$TEST_ROOT/test.o" test.r &&
 		test_must_fail git add . 2>git-stderr.log &&
-		grep "does not support filter protocol version" git-stderr.log
+		grep "expected git-filter-server" git-stderr.log
 	)
 '
 
-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
@ 2017-07-24 22:21 ` Jonathan Nieder
  2017-07-25 14:38 ` Ben Peart
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Jonathan Nieder @ 2017-07-24 22:21 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, benpeart, larsxschneider

Hi,

Jonathan Tan wrote:

> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---

Sounds like a sensible thing to do.

[...]
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -18,6 +18,11 @@ struct subprocess_entry {
>  	struct child_process process;
>  };
>  
> +struct subprocess_capability {
> +	const char *name;
> +	unsigned int flag;

What does this flag represent?  What values can it have?  A comment
might help.

[...]
> @@ -41,6 +46,19 @@ static inline struct child_process *subprocess_get_child_process(
>  	return &entry->process;
>  }
>  
> +/*
> + * Perform the handshake to a long-running process as described in the
> + * gitattributes documentation using the given requested versions and
> + * capabilities. The "versions" and "capabilities" parameters are arrays
> + * terminated by a 0 or blank struct.
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +

Is there a more precise pointer within the gitattributes documentation
that describes what this does?  I searched for "handshake" and found
nothing.  Is the "Long Running Filter Process" section where I should
have started?

The API docs for this header are currently in
Documentation/technical/api-sub-process.txt.  Perhaps these docs
should also go there, or, even better, the docs in
Documentation/technical/ could move to this header in a preparatory
patch.

Aside (not about this patch): why is the subprocess object called
struct subprocess_entry?  Would it make sense to rename it to struct
subprocess?

Back to this function.  Is this a function I should only call once,
when first launching a subprocess, or can I call it again later?  A
note about suggested usage might help.

[...]
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
>  
>  		cp "$TEST_ROOT/test.o" test.r &&
>  		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log

This error message looks a little less friendly than the old one.
Is that okay because it is not supposed to happen in practice?

[...]
> --- a/convert.c
> +++ b/convert.c
> @@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
>  
>  static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
>  {
> -	int err;
> -	struct cmd2process *entry = (struct cmd2process *)subprocess;
[... many lines snipped ...]
> -	return err;
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> +		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
> +	};
> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
> +
> +	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);
>  }

API looks nice.

[...]
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co

Implementation looks sane from a quick glance.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
  2017-07-24 22:21 ` Jonathan Nieder
@ 2017-07-25 14:38 ` Ben Peart
  2017-07-25 17:53   ` Jonathan Tan
  2017-07-25 18:29 ` [PATCH v2 0/2] " Jonathan Tan
                   ` (6 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Ben Peart @ 2017-07-25 14:38 UTC (permalink / raw)
  To: Jonathan Tan, git; +Cc: benpeart, larsxschneider, chriscool



On 7/24/2017 5:38 PM, Jonathan Tan wrote:
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
> 

Christian Couder has been working on a similar refactoring for the perl 
versions of very similar helper functions.  They can be found in the 
following patch series:

https://public-inbox.org/git/20170620075523.26961-1-chriscool@tuxfamily.org/

In particular:

     - Patches 02/49 to 08/49 create a Git/Packet.pm module by
       refactoring "t0021/rot13-filter.pl". Functions from this new
       module will be used later in test scripts.

Some differences I noticed: this patch does both the version and 
capability negotiation and it lives in "sub-process.h/c" files.  In the 
perl script patch series, the version negotiation is in the new 
"packet.pm" module but it does not include the capability negotiation.

It would make sense to me for these to have the same API and usage 
behaviors.  Perhaps pull them together into a single patch series so 
that we can review and keep them in sync?

> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> This will be useful for anyone implementing functionality like that in
> [1].
> 
> It is unfortunate that the code grew larger because it had to be more
> generic, but I think this is better than having (in the future) 2 or
> more separate handshake functions.
> 

I'm always happy to see an effort to refactor common code into reusable 
APIs.  Its a good engineering practice that makes it easier to 
stabilize, extend and maintain the code. The challenge is making sure 
the common function supports all the requirements of the various callers 
and that the increase in complexity doesn't outweigh the benefit of 
centralizing the logic.

> I also don't think that the protocol should be permissive - I think
> things would be simpler if all protocol errors were fatal errors. As it
> is, most parts are permissive, but packet_read_line() already dies
> anyway upon a malformed packet, so it may not be too drastic a change to
> change this. For reference, the original protocol comes from [2].
> 
> [1] https://public-inbox.org/git/20170714132651.170708-2-benpeart@microsoft.com/
> [2] edcc858 ("convert: add filter.<driver>.process option", 2016-10-17)
> ---
>   convert.c             | 61 ++++-----------------------------
>   pkt-line.c            | 19 -----------
>   pkt-line.h            |  2 --
>   sub-process.c         | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
>   sub-process.h         | 18 ++++++++++
>   t/t0021-conversion.sh |  2 +-
>   6 files changed, 120 insertions(+), 76 deletions(-)
> 
> diff --git a/convert.c b/convert.c
> index deaf0ba7b..ec8ecc2ea 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
>   
>   static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
>   {
> -	int err;
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> +		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
> +	};
>   	struct cmd2process *entry = (struct cmd2process *)subprocess;
> -	struct string_list cap_list = STRING_LIST_INIT_NODUP;
> -	char *cap_buf;
> -	const char *cap_name;
> -	struct child_process *process = &subprocess->process;
> -	const char *cmd = subprocess->cmd;
> -
> -	sigchain_push(SIGPIPE, SIG_IGN);
> -
> -	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
> -	if (err)
> -		goto done;
> -
> -	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> -	if (err) {
> -		error("external filter '%s' does not support filter protocol version 2", cmd);
> -		goto done;
> -	}
> -	err = strcmp(packet_read_line(process->out, NULL), "version=2");
> -	if (err)
> -		goto done;
> -	err = packet_read_line(process->out, NULL) != NULL;
> -	if (err)
> -		goto done;
> -
> -	err = packet_writel(process->in, "capability=clean", "capability=smudge", NULL);
> -
> -	for (;;) {
> -		cap_buf = packet_read_line(process->out, NULL);
> -		if (!cap_buf)
> -			break;
> -		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> -		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
> -			continue;
> -
> -		cap_name = cap_list.items[1].string;
> -		if (!strcmp(cap_name, "clean")) {
> -			entry->supported_capabilities |= CAP_CLEAN;
> -		} else if (!strcmp(cap_name, "smudge")) {
> -			entry->supported_capabilities |= CAP_SMUDGE;
> -		} else {
> -			warning(
> -				"external filter '%s' requested unsupported filter capability '%s'",
> -				cmd, cap_name
> -			);
> -		}
> -
> -		string_list_clear(&cap_list, 0);
> -	}
> -
> -done:
> -	sigchain_pop(SIGPIPE);
>   
> -	return err;
> +	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);
>   }
>   
>   static int apply_multi_file_filter(const char *path, const char *src, size_t len,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
>   	return status;
>   }
>   
> -int packet_writel(int fd, const char *line, ...)
> -{

While I found the writel functions useful, all my use cases were in 
implementing the client/server version and capability handshake. 
Hopefully, all those can be replaced with this new API.

I'm of the opinion that having a helper function that isn't actively 
used just results in dead code that has to be maintained so it makes 
sense to remove it.

> -	va_list args;
> -	int err;
> -	va_start(args, line);
> -	for (;;) {
> -		if (!line)
> -			break;
> -		if (strlen(line) > LARGE_PACKET_DATA_MAX)
> -			return -1;
> -		err = packet_write_fmt_gently(fd, "%s\n", line);
> -		if (err)
> -			return err;
> -		line = va_arg(args, const char*);
> -	}
> -	va_end(args);
> -	return packet_flush_gently(fd);
> -}
> -
>   static int packet_write_gently(const int fd_out, const char *buf, size_t size)
>   {
>   	static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
>   void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
>   int packet_flush_gently(int fd);
>   int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
>   int write_packetized_from_fd(int fd_in, int fd_out);
>   int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
>   
> diff --git a/sub-process.c b/sub-process.c
> index a3cfab1a9..1a3f39bdf 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
>   	hashmap_add(hashmap, entry);
>   	return 0;
>   }
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int version_scratch;
> +	unsigned int capabilities_scratch;
> +	struct child_process *process = &entry->process;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;
> +	if (!supported_capabilities)
> +		supported_capabilities = &capabilities_scratch;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	if (packet_write_fmt_gently(process->in, "%sclient\n",
> +				    welcome_prefix)) {
> +		error("Could not write client identification");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i])) {
> +			error("Could not write requested version");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "server")) {
> +		error("Unexpected line '%s', expected %sserver",
> +		      line ? line : "<flush packet>", welcome_prefix);
> +		goto error;
> +	}
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version)) {
> +		error("Unexpected line '%s', expected version",
> +		      line ? line : "<flush packet>");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			goto version_found;
> +	}

This doesn't look like it supports negotiating a common version between 
the client and server.  If a client supports version 1, 2, and 3 and the 
server only supports version 1 and 2, which version and capabilities 
will be used?

> +	error("Version %d not supported", *chosen_version);
> +	goto error;
> +version_found:
> +	if ((line = packet_read_line(process->out, NULL))) {
> +		error("Unexpected line '%s', expected flush", line);
> +		goto error;
> +	}
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name)) {
> +			error("Could not write requested capability");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	while ((line = packet_read_line(process->out, NULL))) {
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;
> +
> +		for (i = 0; capabilities[i].name; i++) {
> +			if (!strcmp(p, capabilities[i].name)) {
> +				*supported_capabilities |= capabilities[i].flag;
> +				goto capability_found;
> +			}
> +		}
> +		warning("external filter requested unsupported filter capability '%s'",
> +			p);
> +capability_found:
> +		;
> +	}
> +
> +	sigchain_pop(SIGPIPE);
> +	return 0;
> +error:
> +	sigchain_pop(SIGPIPE);
> +	return 1;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index 96a2cca36..a72e7f7cf 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -18,6 +18,11 @@ struct subprocess_entry {
>   	struct child_process process;
>   };
>   
> +struct subprocess_capability {
> +	const char *name;
> +	unsigned int flag;
> +};
> +
>   /* subprocess functions */
>   
>   extern int cmd2process_cmp(const void *unused_cmp_data,
> @@ -41,6 +46,19 @@ static inline struct child_process *subprocess_get_child_process(
>   	return &entry->process;
>   }
>   
> +/*
> + * Perform the handshake to a long-running process as described in the
> + * gitattributes documentation using the given requested versions and
> + * capabilities. The "versions" and "capabilities" parameters are arrays
> + * terminated by a 0 or blank struct.
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +
>   /*
>    * Helper function that will read packets looking for "status=<foo>"
>    * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index 161f56044..d191a7228 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
>   
>   		cp "$TEST_ROOT/test.o" test.r &&
>   		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log
>   	)
>   '
>   
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] sub-process: refactor handshake to common function
  2017-07-25 14:38 ` Ben Peart
@ 2017-07-25 17:53   ` Jonathan Tan
  0 siblings, 0 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-25 17:53 UTC (permalink / raw)
  To: Ben Peart; +Cc: git, benpeart, larsxschneider, chriscool

On Tue, 25 Jul 2017 10:38:51 -0400
Ben Peart <peartben@gmail.com> wrote:

> Christian Couder has been working on a similar refactoring for the perl 
> versions of very similar helper functions.  They can be found in the 
> following patch series:
> 
> https://public-inbox.org/git/20170620075523.26961-1-chriscool@tuxfamily.org/
> 
> In particular:
> 
>      - Patches 02/49 to 08/49 create a Git/Packet.pm module by
>        refactoring "t0021/rot13-filter.pl". Functions from this new
>        module will be used later in test scripts.
> 
> Some differences I noticed: this patch does both the version and 
> capability negotiation and it lives in "sub-process.h/c" files.  In the 
> perl script patch series, the version negotiation is in the new 
> "packet.pm" module but it does not include the capability negotiation.
> 
> It would make sense to me for these to have the same API and usage 
> behaviors.  Perhaps pull them together into a single patch series so 
> that we can review and keep them in sync?

Thanks for the pointer. These are different languages and different use
cases (mine for production, his for tests), though, so I don't think
consistency in API and behavior is practical.

> > It is unfortunate that the code grew larger because it had to be more
> > generic, but I think this is better than having (in the future) 2 or
> > more separate handshake functions.
> 
> I'm always happy to see an effort to refactor common code into reusable 
> APIs.  Its a good engineering practice that makes it easier to 
> stabilize, extend and maintain the code. The challenge is making sure 
> the common function supports all the requirements of the various callers 
> and that the increase in complexity doesn't outweigh the benefit of 
> centralizing the logic.

Agreed - thanks.

> > +int subprocess_handshake(struct subprocess_entry *entry,
> > +			 const char *welcome_prefix,
> > +			 int *versions,
> > +			 int *chosen_version,
> > +			 struct subprocess_capability *capabilities,
> > +			 unsigned int *supported_capabilities) {
> > +	int version_scratch;
> > +	unsigned int capabilities_scratch;
> > +	struct child_process *process = &entry->process;
> > +	int i;
> > +	char *line;
> > +	const char *p;
> > +
> > +	if (!chosen_version)
> > +		chosen_version = &version_scratch;
> > +	if (!supported_capabilities)
> > +		supported_capabilities = &capabilities_scratch;
> > +
> > +	sigchain_push(SIGPIPE, SIG_IGN);
> > +
> > +	if (packet_write_fmt_gently(process->in, "%sclient\n",
> > +				    welcome_prefix)) {
> > +		error("Could not write client identification");
> > +		goto error;
> > +	}
> > +	for (i = 0; versions[i]; i++) {
> > +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> > +					    versions[i])) {
> > +			error("Could not write requested version");
> > +			goto error;
> > +		}
> > +	}
> > +	if (packet_flush_gently(process->in))
> > +		goto error;
> > +
> > +	if (!(line = packet_read_line(process->out, NULL)) ||
> > +	    !skip_prefix(line, welcome_prefix, &p) ||
> > +	    strcmp(p, "server")) {
> > +		error("Unexpected line '%s', expected %sserver",
> > +		      line ? line : "<flush packet>", welcome_prefix);
> > +		goto error;
> > +	}
> > +	if (!(line = packet_read_line(process->out, NULL)) ||
> > +	    !skip_prefix(line, "version=", &p) ||
> > +	    strtol_i(p, 10, chosen_version)) {
> > +		error("Unexpected line '%s', expected version",
> > +		      line ? line : "<flush packet>");
> > +		goto error;
> > +	}
> > +	for (i = 0; versions[i]; i++) {
> > +		if (versions[i] == *chosen_version)
> > +			goto version_found;
> > +	}
> 
> This doesn't look like it supports negotiating a common version between 
> the client and server.  If a client supports version 1, 2, and 3 and the 
> server only supports version 1 and 2, which version and capabilities 
> will be used?

In the protocol, the client sends a list of versions it supports (see
the "for" loop above), and expects the server to write the single
version that the server chooses (see the part where we read one single
version line). In your example, the client would write "version=1\n",
"version=2\n", and "version=3\n", and the server would write
"version=2\n" (well, it could write "version=1\n" too). So version 2 (or
1) will be used.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v2 0/2] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
  2017-07-24 22:21 ` Jonathan Nieder
  2017-07-25 14:38 ` Ben Peart
@ 2017-07-25 18:29 ` Jonathan Tan
  2017-07-25 18:29 ` [PATCH v2 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-25 18:29 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, jrnieder, peartben

Changes from v1:
 - Moved API docs to header file
 - Updated documentation
 - Updated commit message of patch 2 to explain why the error message
   changed

Jonathan Tan (2):
  Documentation: migrate sub-process docs to header
  sub-process: refactor handshake to common function

 Documentation/technical/api-sub-process.txt | 59 ------------------
 convert.c                                   | 61 +++----------------
 pkt-line.c                                  | 19 ------
 pkt-line.h                                  |  2 -
 sub-process.c                               | 94 +++++++++++++++++++++++++++++
 sub-process.h                               | 51 +++++++++++++++-
 t/t0021-conversion.sh                       |  2 +-
 7 files changed, 151 insertions(+), 137 deletions(-)
 delete mode 100644 Documentation/technical/api-sub-process.txt

-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH v2 1/2] Documentation: migrate sub-process docs to header
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (2 preceding siblings ...)
  2017-07-25 18:29 ` [PATCH v2 0/2] " Jonathan Tan
@ 2017-07-25 18:29 ` Jonathan Tan
  2017-07-25 20:18   ` Brandon Williams
  2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
                   ` (4 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Jonathan Tan @ 2017-07-25 18:29 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, jrnieder, peartben

Move the documentation for the sub-process API from a separate txt file
to its header file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/technical/api-sub-process.txt | 59 -----------------------------
 sub-process.h                               | 25 +++++++++++-
 2 files changed, 23 insertions(+), 61 deletions(-)
 delete mode 100644 Documentation/technical/api-sub-process.txt

diff --git a/Documentation/technical/api-sub-process.txt b/Documentation/technical/api-sub-process.txt
deleted file mode 100644
index 793508cf3..000000000
--- a/Documentation/technical/api-sub-process.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-sub-process API
-===============
-
-The sub-process API makes it possible to run background sub-processes
-for the entire lifetime of a Git invocation. If Git needs to communicate
-with an external process multiple times, then this can reduces the process
-invocation overhead. Git and the sub-process communicate through stdin and
-stdout.
-
-The sub-processes are kept in a hashmap by command name and looked up
-via the subprocess_find_entry function.  If an existing instance can not
-be found then a new process should be created and started.  When the
-parent git command terminates, all sub-processes are also terminated.
-
-This API is based on the run-command API.
-
-Data structures
----------------
-
-* `struct subprocess_entry`
-
-The sub-process structure.  Members should not be accessed directly.
-
-Types
------
-
-'int(*subprocess_start_fn)(struct subprocess_entry *entry)'::
-
-	User-supplied function to initialize the sub-process.  This is
-	typically used to negotiate the interface version and capabilities.
-
-
-Functions
----------
-
-`cmd2process_cmp`::
-
-	Function to test two subprocess hashmap entries for equality.
-
-`subprocess_start`::
-
-	Start a subprocess and add it to the subprocess hashmap.
-
-`subprocess_stop`::
-
-	Kill a subprocess and remove it from the subprocess hashmap.
-
-`subprocess_find_entry`::
-
-	Find a subprocess in the subprocess hashmap.
-
-`subprocess_get_child_process`::
-
-	Get the underlying `struct child_process` from a subprocess.
-
-`subprocess_read_status`::
-
-	Helper function to read packets looking for the last "status=<foo>"
-	key/value pair.
diff --git a/sub-process.h b/sub-process.h
index 96a2cca36..9e6975b5e 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -6,12 +6,23 @@
 #include "run-command.h"
 
 /*
- * Generic implementation of background process infrastructure.
- * See: Documentation/technical/api-sub-process.txt
+ * The sub-process API makes it possible to run background sub-processes
+ * for the entire lifetime of a Git invocation. If Git needs to communicate
+ * with an external process multiple times, then this can reduces the process
+ * invocation overhead. Git and the sub-process communicate through stdin and
+ * stdout.
+ *
+ * The sub-processes are kept in a hashmap by command name and looked up
+ * via the subprocess_find_entry function.  If an existing instance can not
+ * be found then a new process should be created and started.  When the
+ * parent git command terminates, all sub-processes are also terminated.
+ * 
+ * This API is based on the run-command API.
  */
 
  /* data structures */
 
+/* Members should not be accessed directly. */
 struct subprocess_entry {
 	struct hashmap_entry ent; /* must be the first member! */
 	const char *cmd;
@@ -20,21 +31,31 @@ struct subprocess_entry {
 
 /* subprocess functions */
 
+/* Function to test two subprocess hashmap entries for equality. */
 extern int cmd2process_cmp(const void *unused_cmp_data,
 			   const struct subprocess_entry *e1,
 			   const struct subprocess_entry *e2,
 			   const void *unused_keydata);
 
+/*
+ * User-supplied function to initialize the sub-process.  This is
+ * typically used to negotiate the interface version and capabilities.
+ */
 typedef int(*subprocess_start_fn)(struct subprocess_entry *entry);
+
+/* Start a subprocess and add it to the subprocess hashmap. */
 int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, const char *cmd,
 		subprocess_start_fn startfn);
 
+/* Kill a subprocess and remove it from the subprocess hashmap. */
 void subprocess_stop(struct hashmap *hashmap, struct subprocess_entry *entry);
 
+/* Find a subprocess in the subprocess hashmap. */
 struct subprocess_entry *subprocess_find_entry(struct hashmap *hashmap, const char *cmd);
 
 /* subprocess helper functions */
 
+/* Get the underlying `struct child_process` from a subprocess. */
 static inline struct child_process *subprocess_get_child_process(
 		struct subprocess_entry *entry)
 {
-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH v2 2/2] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (3 preceding siblings ...)
  2017-07-25 18:29 ` [PATCH v2 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
@ 2017-07-25 18:29 ` Jonathan Tan
  2017-07-25 20:28   ` Brandon Williams
  2017-07-25 22:25   ` Junio C Hamano
  2017-07-26 16:52 ` [PATCH] " Lars Schneider
                   ` (3 subsequent siblings)
  8 siblings, 2 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-25 18:29 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, jrnieder, peartben

Refactor, into a common function, the version and capability negotiation
done when invoking a long-running process as a clean or smudge filter.
This will be useful for other Git code that needs to interact similarly
with a long-running process.

As you can see in the change to t0021, this commit changes the error
message reported when the long-running process does not introduce itself
with the expected "server"-terminated line. Originally, the error
message reports that the filter "does not support filter protocol
version 2", differentiating between the old single-file filter protocol
and the new multi-file filter protocol - I have updated it to something
more generic and useful.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 convert.c             | 61 ++++-----------------------------
 pkt-line.c            | 19 -----------
 pkt-line.h            |  2 --
 sub-process.c         | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
 sub-process.h         | 26 ++++++++++++++
 t/t0021-conversion.sh |  2 +-
 6 files changed, 128 insertions(+), 76 deletions(-)

diff --git a/convert.c b/convert.c
index deaf0ba7b..ec8ecc2ea 100644
--- a/convert.c
+++ b/convert.c
@@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
 
 static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
 {
-	int err;
+	static int versions[] = {2, 0};
+	static struct subprocess_capability capabilities[] = {
+		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
+	};
 	struct cmd2process *entry = (struct cmd2process *)subprocess;
-	struct string_list cap_list = STRING_LIST_INIT_NODUP;
-	char *cap_buf;
-	const char *cap_name;
-	struct child_process *process = &subprocess->process;
-	const char *cmd = subprocess->cmd;
-
-	sigchain_push(SIGPIPE, SIG_IGN);
-
-	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
-	if (err)
-		goto done;
-
-	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
-	if (err) {
-		error("external filter '%s' does not support filter protocol version 2", cmd);
-		goto done;
-	}
-	err = strcmp(packet_read_line(process->out, NULL), "version=2");
-	if (err)
-		goto done;
-	err = packet_read_line(process->out, NULL) != NULL;
-	if (err)
-		goto done;
-
-	err = packet_writel(process->in, "capability=clean", "capability=smudge", NULL);
-
-	for (;;) {
-		cap_buf = packet_read_line(process->out, NULL);
-		if (!cap_buf)
-			break;
-		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
-
-		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
-			continue;
-
-		cap_name = cap_list.items[1].string;
-		if (!strcmp(cap_name, "clean")) {
-			entry->supported_capabilities |= CAP_CLEAN;
-		} else if (!strcmp(cap_name, "smudge")) {
-			entry->supported_capabilities |= CAP_SMUDGE;
-		} else {
-			warning(
-				"external filter '%s' requested unsupported filter capability '%s'",
-				cmd, cap_name
-			);
-		}
-
-		string_list_clear(&cap_list, 0);
-	}
-
-done:
-	sigchain_pop(SIGPIPE);
 
-	return err;
+	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
+				    capabilities,
+				    &entry->supported_capabilities);
 }
 
 static int apply_multi_file_filter(const char *path, const char *src, size_t len,
diff --git a/pkt-line.c b/pkt-line.c
index 9d845ecc3..7db911957 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 	return status;
 }
 
-int packet_writel(int fd, const char *line, ...)
-{
-	va_list args;
-	int err;
-	va_start(args, line);
-	for (;;) {
-		if (!line)
-			break;
-		if (strlen(line) > LARGE_PACKET_DATA_MAX)
-			return -1;
-		err = packet_write_fmt_gently(fd, "%s\n", line);
-		if (err)
-			return err;
-		line = va_arg(args, const char*);
-	}
-	va_end(args);
-	return packet_flush_gently(fd);
-}
-
 static int packet_write_gently(const int fd_out, const char *buf, size_t size)
 {
 	static char packet_write_buffer[LARGE_PACKET_MAX];
diff --git a/pkt-line.h b/pkt-line.h
index 450183b64..66ef610fc 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-LAST_ARG_MUST_BE_NULL
-int packet_writel(int fd, const char *line, ...);
 int write_packetized_from_fd(int fd_in, int fd_out);
 int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
 
diff --git a/sub-process.c b/sub-process.c
index a3cfab1a9..1a3f39bdf 100644
--- a/sub-process.c
+++ b/sub-process.c
@@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
 	hashmap_add(hashmap, entry);
 	return 0;
 }
+
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities) {
+	int version_scratch;
+	unsigned int capabilities_scratch;
+	struct child_process *process = &entry->process;
+	int i;
+	char *line;
+	const char *p;
+
+	if (!chosen_version)
+		chosen_version = &version_scratch;
+	if (!supported_capabilities)
+		supported_capabilities = &capabilities_scratch;
+
+	sigchain_push(SIGPIPE, SIG_IGN);
+
+	if (packet_write_fmt_gently(process->in, "%sclient\n",
+				    welcome_prefix)) {
+		error("Could not write client identification");
+		goto error;
+	}
+	for (i = 0; versions[i]; i++) {
+		if (packet_write_fmt_gently(process->in, "version=%d\n",
+					    versions[i])) {
+			error("Could not write requested version");
+			goto error;
+		}
+	}
+	if (packet_flush_gently(process->in))
+		goto error;
+
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, welcome_prefix, &p) ||
+	    strcmp(p, "server")) {
+		error("Unexpected line '%s', expected %sserver",
+		      line ? line : "<flush packet>", welcome_prefix);
+		goto error;
+	}
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, "version=", &p) ||
+	    strtol_i(p, 10, chosen_version)) {
+		error("Unexpected line '%s', expected version",
+		      line ? line : "<flush packet>");
+		goto error;
+	}
+	for (i = 0; versions[i]; i++) {
+		if (versions[i] == *chosen_version)
+			goto version_found;
+	}
+	error("Version %d not supported", *chosen_version);
+	goto error;
+version_found:
+	if ((line = packet_read_line(process->out, NULL))) {
+		error("Unexpected line '%s', expected flush", line);
+		goto error;
+	}
+
+	for (i = 0; capabilities[i].name; i++) {
+		if (packet_write_fmt_gently(process->in, "capability=%s\n",
+					    capabilities[i].name)) {
+			error("Could not write requested capability");
+			goto error;
+		}
+	}
+	if (packet_flush_gently(process->in))
+		goto error;
+
+	while ((line = packet_read_line(process->out, NULL))) {
+		if (!skip_prefix(line, "capability=", &p))
+			continue;
+
+		for (i = 0; capabilities[i].name; i++) {
+			if (!strcmp(p, capabilities[i].name)) {
+				*supported_capabilities |= capabilities[i].flag;
+				goto capability_found;
+			}
+		}
+		warning("external filter requested unsupported filter capability '%s'",
+			p);
+capability_found:
+		;
+	}
+
+	sigchain_pop(SIGPIPE);
+	return 0;
+error:
+	sigchain_pop(SIGPIPE);
+	return 1;
+}
diff --git a/sub-process.h b/sub-process.h
index 9e6975b5e..28863fabc 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -29,6 +29,16 @@ struct subprocess_entry {
 	struct child_process process;
 };
 
+struct subprocess_capability {
+	const char *name;
+
+	/*
+	 * subprocess_handshake will "|=" this value to supported_capabilities
+	 * if the server reports that it supports this capability.
+	 */
+	unsigned int flag;
+};
+
 /* subprocess functions */
 
 /* Function to test two subprocess hashmap entries for equality. */
@@ -62,6 +72,22 @@ static inline struct child_process *subprocess_get_child_process(
 	return &entry->process;
 }
 
+/*
+ * Perform the version and capability negotiation as described in the "Long
+ * Running Filter Process" section of the gitattributes documentation using the
+ * given requested versions and capabilities. The "versions" and "capabilities"
+ * parameters are arrays terminated by a 0 or blank struct.
+ *
+ * This function is typically called when a subprocess is started (as part of
+ * the "startfn" passed to subprocess_start).
+ */
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities);
+
 /*
  * Helper function that will read packets looking for "status=<foo>"
  * key/value pairs and return the value from the last "status" packet
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 161f56044..d191a7228 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
 
 		cp "$TEST_ROOT/test.o" test.r &&
 		test_must_fail git add . 2>git-stderr.log &&
-		grep "does not support filter protocol version" git-stderr.log
+		grep "expected git-filter-server" git-stderr.log
 	)
 '
 
-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 1/2] Documentation: migrate sub-process docs to header
  2017-07-25 18:29 ` [PATCH v2 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
@ 2017-07-25 20:18   ` Brandon Williams
  0 siblings, 0 replies; 22+ messages in thread
From: Brandon Williams @ 2017-07-25 20:18 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, jrnieder, peartben

On 07/25, Jonathan Tan wrote:
> Move the documentation for the sub-process API from a separate txt file
> to its header file.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>

I really like this change!

> ---
>  Documentation/technical/api-sub-process.txt | 59 -----------------------------
>  sub-process.h                               | 25 +++++++++++-
>  2 files changed, 23 insertions(+), 61 deletions(-)
>  delete mode 100644 Documentation/technical/api-sub-process.txt
> 
> diff --git a/Documentation/technical/api-sub-process.txt b/Documentation/technical/api-sub-process.txt
> deleted file mode 100644
> index 793508cf3..000000000
> --- a/Documentation/technical/api-sub-process.txt
> +++ /dev/null
> @@ -1,59 +0,0 @@
> -sub-process API
> -===============
> -
> -The sub-process API makes it possible to run background sub-processes
> -for the entire lifetime of a Git invocation. If Git needs to communicate
> -with an external process multiple times, then this can reduces the process
> -invocation overhead. Git and the sub-process communicate through stdin and
> -stdout.
> -
> -The sub-processes are kept in a hashmap by command name and looked up
> -via the subprocess_find_entry function.  If an existing instance can not
> -be found then a new process should be created and started.  When the
> -parent git command terminates, all sub-processes are also terminated.
> -
> -This API is based on the run-command API.
> -
> -Data structures
> ----------------
> -
> -* `struct subprocess_entry`
> -
> -The sub-process structure.  Members should not be accessed directly.
> -
> -Types
> ------
> -
> -'int(*subprocess_start_fn)(struct subprocess_entry *entry)'::
> -
> -	User-supplied function to initialize the sub-process.  This is
> -	typically used to negotiate the interface version and capabilities.
> -
> -
> -Functions
> ----------
> -
> -`cmd2process_cmp`::
> -
> -	Function to test two subprocess hashmap entries for equality.
> -
> -`subprocess_start`::
> -
> -	Start a subprocess and add it to the subprocess hashmap.
> -
> -`subprocess_stop`::
> -
> -	Kill a subprocess and remove it from the subprocess hashmap.
> -
> -`subprocess_find_entry`::
> -
> -	Find a subprocess in the subprocess hashmap.
> -
> -`subprocess_get_child_process`::
> -
> -	Get the underlying `struct child_process` from a subprocess.
> -
> -`subprocess_read_status`::
> -
> -	Helper function to read packets looking for the last "status=<foo>"
> -	key/value pair.
> diff --git a/sub-process.h b/sub-process.h
> index 96a2cca36..9e6975b5e 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -6,12 +6,23 @@
>  #include "run-command.h"
>  
>  /*
> - * Generic implementation of background process infrastructure.
> - * See: Documentation/technical/api-sub-process.txt
> + * The sub-process API makes it possible to run background sub-processes
> + * for the entire lifetime of a Git invocation. If Git needs to communicate
> + * with an external process multiple times, then this can reduces the process
> + * invocation overhead. Git and the sub-process communicate through stdin and
> + * stdout.
> + *
> + * The sub-processes are kept in a hashmap by command name and looked up
> + * via the subprocess_find_entry function.  If an existing instance can not
> + * be found then a new process should be created and started.  When the
> + * parent git command terminates, all sub-processes are also terminated.
> + * 
> + * This API is based on the run-command API.
>   */
>  
>   /* data structures */
>  
> +/* Members should not be accessed directly. */
>  struct subprocess_entry {
>  	struct hashmap_entry ent; /* must be the first member! */
>  	const char *cmd;
> @@ -20,21 +31,31 @@ struct subprocess_entry {
>  
>  /* subprocess functions */
>  
> +/* Function to test two subprocess hashmap entries for equality. */
>  extern int cmd2process_cmp(const void *unused_cmp_data,
>  			   const struct subprocess_entry *e1,
>  			   const struct subprocess_entry *e2,
>  			   const void *unused_keydata);
>  
> +/*
> + * User-supplied function to initialize the sub-process.  This is
> + * typically used to negotiate the interface version and capabilities.
> + */
>  typedef int(*subprocess_start_fn)(struct subprocess_entry *entry);
> +
> +/* Start a subprocess and add it to the subprocess hashmap. */
>  int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, const char *cmd,
>  		subprocess_start_fn startfn);
>  
> +/* Kill a subprocess and remove it from the subprocess hashmap. */
>  void subprocess_stop(struct hashmap *hashmap, struct subprocess_entry *entry);
>  
> +/* Find a subprocess in the subprocess hashmap. */
>  struct subprocess_entry *subprocess_find_entry(struct hashmap *hashmap, const char *cmd);
>  
>  /* subprocess helper functions */
>  
> +/* Get the underlying `struct child_process` from a subprocess. */
>  static inline struct child_process *subprocess_get_child_process(
>  		struct subprocess_entry *entry)
>  {
> -- 
> 2.14.0.rc0.400.g1c36432dff-goog
> 

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 2/2] sub-process: refactor handshake to common function
  2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
@ 2017-07-25 20:28   ` Brandon Williams
  2017-07-25 22:25   ` Junio C Hamano
  1 sibling, 0 replies; 22+ messages in thread
From: Brandon Williams @ 2017-07-25 20:28 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, jrnieder, peartben

On 07/25, Jonathan Tan wrote:
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
> 
> As you can see in the change to t0021, this commit changes the error
> message reported when the long-running process does not introduce itself
> with the expected "server"-terminated line. Originally, the error
> message reports that the filter "does not support filter protocol
> version 2", differentiating between the old single-file filter protocol
> and the new multi-file filter protocol - I have updated it to something
> more generic and useful.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
>  convert.c             | 61 ++++-----------------------------
>  pkt-line.c            | 19 -----------
>  pkt-line.h            |  2 --
>  sub-process.c         | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
>  sub-process.h         | 26 ++++++++++++++
>  t/t0021-conversion.sh |  2 +-
>  6 files changed, 128 insertions(+), 76 deletions(-)
> 
> diff --git a/convert.c b/convert.c
> index deaf0ba7b..ec8ecc2ea 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
>  
>  static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
>  {
> -	int err;
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> +		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
> +	};
>  	struct cmd2process *entry = (struct cmd2process *)subprocess;
> -	struct string_list cap_list = STRING_LIST_INIT_NODUP;
> -	char *cap_buf;
> -	const char *cap_name;
> -	struct child_process *process = &subprocess->process;
> -	const char *cmd = subprocess->cmd;
> -
> -	sigchain_push(SIGPIPE, SIG_IGN);
> -
> -	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
> -	if (err)
> -		goto done;
> -
> -	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> -	if (err) {
> -		error("external filter '%s' does not support filter protocol version 2", cmd);
> -		goto done;
> -	}
> -	err = strcmp(packet_read_line(process->out, NULL), "version=2");
> -	if (err)
> -		goto done;
> -	err = packet_read_line(process->out, NULL) != NULL;
> -	if (err)
> -		goto done;
> -
> -	err = packet_writel(process->in, "capability=clean", "capability=smudge", NULL);
> -
> -	for (;;) {
> -		cap_buf = packet_read_line(process->out, NULL);
> -		if (!cap_buf)
> -			break;
> -		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> -		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
> -			continue;
> -
> -		cap_name = cap_list.items[1].string;
> -		if (!strcmp(cap_name, "clean")) {
> -			entry->supported_capabilities |= CAP_CLEAN;
> -		} else if (!strcmp(cap_name, "smudge")) {
> -			entry->supported_capabilities |= CAP_SMUDGE;
> -		} else {
> -			warning(
> -				"external filter '%s' requested unsupported filter capability '%s'",
> -				cmd, cap_name
> -			);
> -		}
> -
> -		string_list_clear(&cap_list, 0);
> -	}
> -
> -done:
> -	sigchain_pop(SIGPIPE);
>  
> -	return err;
> +	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);
>  }
>  
>  static int apply_multi_file_filter(const char *path, const char *src, size_t len,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
>  	return status;
>  }
>  
> -int packet_writel(int fd, const char *line, ...)
> -{
> -	va_list args;
> -	int err;
> -	va_start(args, line);
> -	for (;;) {
> -		if (!line)
> -			break;
> -		if (strlen(line) > LARGE_PACKET_DATA_MAX)
> -			return -1;
> -		err = packet_write_fmt_gently(fd, "%s\n", line);
> -		if (err)
> -			return err;
> -		line = va_arg(args, const char*);
> -	}
> -	va_end(args);
> -	return packet_flush_gently(fd);
> -}
> -
>  static int packet_write_gently(const int fd_out, const char *buf, size_t size)
>  {
>  	static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
>  void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
>  int packet_flush_gently(int fd);
>  int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
>  int write_packetized_from_fd(int fd_in, int fd_out);
>  int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
>  
> diff --git a/sub-process.c b/sub-process.c
> index a3cfab1a9..1a3f39bdf 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
>  	hashmap_add(hashmap, entry);
>  	return 0;
>  }
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int version_scratch;
> +	unsigned int capabilities_scratch;
> +	struct child_process *process = &entry->process;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;
> +	if (!supported_capabilities)
> +		supported_capabilities = &capabilities_scratch;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	if (packet_write_fmt_gently(process->in, "%sclient\n",
> +				    welcome_prefix)) {
> +		error("Could not write client identification");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i])) {
> +			error("Could not write requested version");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "server")) {
> +		error("Unexpected line '%s', expected %sserver",
> +		      line ? line : "<flush packet>", welcome_prefix);
> +		goto error;
> +	}
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version)) {
> +		error("Unexpected line '%s', expected version",
> +		      line ? line : "<flush packet>");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			goto version_found;
> +	}
> +	error("Version %d not supported", *chosen_version);
> +	goto error;
> +version_found:
> +	if ((line = packet_read_line(process->out, NULL))) {
> +		error("Unexpected line '%s', expected flush", line);
> +		goto error;
> +	}
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name)) {
> +			error("Could not write requested capability");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	while ((line = packet_read_line(process->out, NULL))) {
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;
> +
> +		for (i = 0; capabilities[i].name; i++) {
> +			if (!strcmp(p, capabilities[i].name)) {
> +				*supported_capabilities |= capabilities[i].flag;
> +				goto capability_found;
> +			}
> +		}
> +		warning("external filter requested unsupported filter capability '%s'",
> +			p);
> +capability_found:
> +		;
> +	}

I'm not familiar with this section of code so I can't comment on what
the logic is doing.  But I think that this whole function can be written
better.  Its quite a lengthy function so you may benefit from breaking
it up into smaller functions to improve readability.  My biggest
complaint is the use of goto's to jump around the function, in and out
of loops and the like.  IMO its completely fine to have a 'goto error'
where you can do any necessary cleanup but it just makes things very
hard to follow when they are structured using gotos to break out of
loops or jump to other parts of the function as you have here.

It should be pretty easy to eliminate the two non-essential goto's by
making a helper function to determine if a version is found as well as
finding a capability.  That way you should be able to restructure the
code using if's instead of gotos.

> +
> +	sigchain_pop(SIGPIPE);
> +	return 0;
> +error:
> +	sigchain_pop(SIGPIPE);
> +	return 1;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index 9e6975b5e..28863fabc 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -29,6 +29,16 @@ struct subprocess_entry {
>  	struct child_process process;
>  };
>  
> +struct subprocess_capability {
> +	const char *name;
> +
> +	/*
> +	 * subprocess_handshake will "|=" this value to supported_capabilities
> +	 * if the server reports that it supports this capability.
> +	 */
> +	unsigned int flag;
> +};
> +
>  /* subprocess functions */
>  
>  /* Function to test two subprocess hashmap entries for equality. */
> @@ -62,6 +72,22 @@ static inline struct child_process *subprocess_get_child_process(
>  	return &entry->process;
>  }
>  
> +/*
> + * Perform the version and capability negotiation as described in the "Long
> + * Running Filter Process" section of the gitattributes documentation using the
> + * given requested versions and capabilities. The "versions" and "capabilities"
> + * parameters are arrays terminated by a 0 or blank struct.
> + *
> + * This function is typically called when a subprocess is started (as part of
> + * the "startfn" passed to subprocess_start).
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +
>  /*
>   * Helper function that will read packets looking for "status=<foo>"
>   * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index 161f56044..d191a7228 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
>  
>  		cp "$TEST_ROOT/test.o" test.r &&
>  		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log
>  	)
>  '
>  
> -- 
> 2.14.0.rc0.400.g1c36432dff-goog
> 

-- 
Brandon Williams

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH v2 2/2] sub-process: refactor handshake to common function
  2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
  2017-07-25 20:28   ` Brandon Williams
@ 2017-07-25 22:25   ` Junio C Hamano
  1 sibling, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2017-07-25 22:25 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, jrnieder, peartben

Jonathan Tan <jonathantanmy@google.com> writes:

> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
>
> As you can see in the change to t0021, this commit changes the error
> message reported when the long-running process does not introduce itself
> with the expected "server"-terminated line. Originally, the error
> message reports that the filter "does not support filter protocol
> version 2", differentiating between the old single-file filter protocol
> and the new multi-file filter protocol - I have updated it to something
> more generic and useful.
>
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>

Overall I like the direction, even though the abstraction the
resulting code results in seems to me a bit too tightly defined; in
other words, I cannot be sure that this will be useful enough in a
more general context, or make some potential applications feel a bit
too constrained.

> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> +		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
> +	};
>  	struct cmd2process *entry = (struct cmd2process *)subprocess;
> ...
> +	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);
>  }

I would have defined the welcome prefix to lack the final dash,
i.e. forcing the hardcoded suffixes for clients and servers in any
protocol that uses this API to end with "-client" and "-server",
i.e. with dash.

> diff --git a/sub-process.c b/sub-process.c
> index a3cfab1a9..1a3f39bdf 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
>  	hashmap_add(hashmap, entry);
>  	return 0;
>  }
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int version_scratch;
> +	unsigned int capabilities_scratch;
> +	struct child_process *process = &entry->process;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;
> +	if (!supported_capabilities)
> +		supported_capabilities = &capabilities_scratch;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	if (packet_write_fmt_gently(process->in, "%sclient\n",
> +				    welcome_prefix)) {
> +		error("Could not write client identification");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i])) {
> +			error("Could not write requested version");
> +			goto error;
> +		}
> +	}

This forces version numbers to be positive integers, which is OK, as
I do not see it a downside that any potential application cannot use
"version=0".

> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "server")) {
> +		error("Unexpected line '%s', expected %sserver",
> +		      line ? line : "<flush packet>", welcome_prefix);
> +		goto error;
> +	}
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version)) {
> +		error("Unexpected line '%s', expected version",
> +		      line ? line : "<flush packet>");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			goto version_found;
> +	}
> +	error("Version %d not supported", *chosen_version);
> +	goto error;
> +version_found:

It would have been more natural to do

	for (i = 0; versions[i]; i++)
		if (versions[i] == *chosen_version)
			break;
	if (versions[i]) {
		error("...");
		goto error;
	}

without "version_found:" label.  In general, I'd prefer to avoid
jumping to a label in the normal/expected case and reserve "goto"
for error handling.

> +	if ((line = packet_read_line(process->out, NULL))) {
> +		error("Unexpected line '%s', expected flush", line);
> +		goto error;
> +	}
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name)) {
> +			error("Could not write requested capability");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	while ((line = packet_read_line(process->out, NULL))) {
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;
> +
> +		for (i = 0; capabilities[i].name; i++) {
> +			if (!strcmp(p, capabilities[i].name)) {
> +				*supported_capabilities |= capabilities[i].flag;
> +				goto capability_found;
> +			}
> +		}
> +		warning("external filter requested unsupported filter capability '%s'",
> +			p);
> +capability_found:
> +		;

Likewise.

Also, this is the reason why I said this might make future
applications feel a bit too constrained; is the set of fields in the
subprocess_capability struct general enough?  It can only say "a
capability with this name was found" with a single bit, so you can
have only 32 (or 64) capabilities that are all yes/no.  I am not
saying that is definitely insufficient (not yet anyway); I am
wondering if future applications may need to have something like:

	capability=buffer-size=64k

where "=64k" part is not known at this layer but is known by the
user of the API.

> +	}
> +
> +	sigchain_pop(SIGPIPE);
> +	return 0;
> +error:
> +	sigchain_pop(SIGPIPE);
> +	return 1;

I would prepare at the beginning of the function:

	int retval = -1; /* assume failure */

and rewrite the above to

                retval = 0;
        error:
                sigchain_pop(SIGPIPE);
                return retval;

if I were writing this code.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (4 preceding siblings ...)
  2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
@ 2017-07-26 16:52 ` Lars Schneider
  2017-07-26 18:14   ` Junio C Hamano
  2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
                   ` (2 subsequent siblings)
  8 siblings, 1 reply; 22+ messages in thread
From: Lars Schneider @ 2017-07-26 16:52 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, benpeart


> On 24 Jul 2017, at 23:38, Jonathan Tan <jonathantanmy@google.com> wrote:
> 
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> This will be useful for anyone implementing functionality like that in
> [1].
> 
> It is unfortunate that the code grew larger because it had to be more
> generic, but I think this is better than having (in the future) 2 or
> more separate handshake functions.
> 
> I also don't think that the protocol should be permissive - I think
> things would be simpler if all protocol errors were fatal errors. As it
> is, most parts are permissive, but packet_read_line() already dies
> anyway upon a malformed packet, so it may not be too drastic a change to
> change this. For reference, the original protocol comes from [2].
> 
> [1] https://public-inbox.org/git/20170714132651.170708-2-benpeart@microsoft.com/
> [2] edcc858 ("convert: add filter.<driver>.process option", 2016-10-17)

Thanks for this refactoring! That's great!

Please note that I've recently refactored the capabilities negotiation a bit:
https://github.com/git/git/commit/1514c8edd62d96006cd1de31e906ed5798dd4681

This change is still cooking in `next`. I am not sure how this should/could
be handled but maybe you can use my refactoring as your base?

Thanks,
Lars


> ---
> convert.c             | 61 ++++-----------------------------
> pkt-line.c            | 19 -----------
> pkt-line.h            |  2 --
> sub-process.c         | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++
> sub-process.h         | 18 ++++++++++
> t/t0021-conversion.sh |  2 +-
> 6 files changed, 120 insertions(+), 76 deletions(-)
> 
> diff --git a/convert.c b/convert.c
> index deaf0ba7b..ec8ecc2ea 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -512,62 +512,15 @@ static struct hashmap subprocess_map;
> 
> static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
> {
> -	int err;
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> +		{"clean", CAP_CLEAN}, {"smudge", CAP_SMUDGE}, {NULL, 0}
> +	};
> 	struct cmd2process *entry = (struct cmd2process *)subprocess;
> -	struct string_list cap_list = STRING_LIST_INIT_NODUP;
> -	char *cap_buf;
> -	const char *cap_name;
> -	struct child_process *process = &subprocess->process;
> -	const char *cmd = subprocess->cmd;
> -
> -	sigchain_push(SIGPIPE, SIG_IGN);
> -
> -	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
> -	if (err)
> -		goto done;
> -
> -	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> -	if (err) {
> -		error("external filter '%s' does not support filter protocol version 2", cmd);
> -		goto done;
> -	}
> -	err = strcmp(packet_read_line(process->out, NULL), "version=2");
> -	if (err)
> -		goto done;
> -	err = packet_read_line(process->out, NULL) != NULL;
> -	if (err)
> -		goto done;
> -
> -	err = packet_writel(process->in, "capability=clean", "capability=smudge", NULL);
> -
> -	for (;;) {
> -		cap_buf = packet_read_line(process->out, NULL);
> -		if (!cap_buf)
> -			break;
> -		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> -		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
> -			continue;
> -
> -		cap_name = cap_list.items[1].string;
> -		if (!strcmp(cap_name, "clean")) {
> -			entry->supported_capabilities |= CAP_CLEAN;
> -		} else if (!strcmp(cap_name, "smudge")) {
> -			entry->supported_capabilities |= CAP_SMUDGE;
> -		} else {
> -			warning(
> -				"external filter '%s' requested unsupported filter capability '%s'",
> -				cmd, cap_name
> -			);
> -		}
> -
> -		string_list_clear(&cap_list, 0);
> -	}
> -
> -done:
> -	sigchain_pop(SIGPIPE);
> 
> -	return err;
> +	return subprocess_handshake(subprocess, "git-filter-", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);
> }
> 
> static int apply_multi_file_filter(const char *path, const char *src, size_t len,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
> 	return status;
> }
> 
> -int packet_writel(int fd, const char *line, ...)
> -{
> -	va_list args;
> -	int err;
> -	va_start(args, line);
> -	for (;;) {
> -		if (!line)
> -			break;
> -		if (strlen(line) > LARGE_PACKET_DATA_MAX)
> -			return -1;
> -		err = packet_write_fmt_gently(fd, "%s\n", line);
> -		if (err)
> -			return err;
> -		line = va_arg(args, const char*);
> -	}
> -	va_end(args);
> -	return packet_flush_gently(fd);
> -}
> -
> static int packet_write_gently(const int fd_out, const char *buf, size_t size)
> {
> 	static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
> void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> int packet_flush_gently(int fd);
> int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
> int write_packetized_from_fd(int fd_in, int fd_out);
> int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
> 
> diff --git a/sub-process.c b/sub-process.c
> index a3cfab1a9..1a3f39bdf 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -105,3 +105,97 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
> 	hashmap_add(hashmap, entry);
> 	return 0;
> }
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int version_scratch;
> +	unsigned int capabilities_scratch;
> +	struct child_process *process = &entry->process;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;
> +	if (!supported_capabilities)
> +		supported_capabilities = &capabilities_scratch;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	if (packet_write_fmt_gently(process->in, "%sclient\n",
> +				    welcome_prefix)) {
> +		error("Could not write client identification");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i])) {
> +			error("Could not write requested version");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "server")) {
> +		error("Unexpected line '%s', expected %sserver",
> +		      line ? line : "<flush packet>", welcome_prefix);
> +		goto error;
> +	}
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version)) {
> +		error("Unexpected line '%s', expected version",
> +		      line ? line : "<flush packet>");
> +		goto error;
> +	}
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			goto version_found;
> +	}
> +	error("Version %d not supported", *chosen_version);
> +	goto error;
> +version_found:
> +	if ((line = packet_read_line(process->out, NULL))) {
> +		error("Unexpected line '%s', expected flush", line);
> +		goto error;
> +	}
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name)) {
> +			error("Could not write requested capability");
> +			goto error;
> +		}
> +	}
> +	if (packet_flush_gently(process->in))
> +		goto error;
> +
> +	while ((line = packet_read_line(process->out, NULL))) {
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;
> +
> +		for (i = 0; capabilities[i].name; i++) {
> +			if (!strcmp(p, capabilities[i].name)) {
> +				*supported_capabilities |= capabilities[i].flag;
> +				goto capability_found;
> +			}
> +		}
> +		warning("external filter requested unsupported filter capability '%s'",
> +			p);
> +capability_found:
> +		;
> +	}
> +
> +	sigchain_pop(SIGPIPE);
> +	return 0;
> +error:
> +	sigchain_pop(SIGPIPE);
> +	return 1;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index 96a2cca36..a72e7f7cf 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -18,6 +18,11 @@ struct subprocess_entry {
> 	struct child_process process;
> };
> 
> +struct subprocess_capability {
> +	const char *name;
> +	unsigned int flag;
> +};
> +
> /* subprocess functions */
> 
> extern int cmd2process_cmp(const void *unused_cmp_data,
> @@ -41,6 +46,19 @@ static inline struct child_process *subprocess_get_child_process(
> 	return &entry->process;
> }
> 
> +/*
> + * Perform the handshake to a long-running process as described in the
> + * gitattributes documentation using the given requested versions and
> + * capabilities. The "versions" and "capabilities" parameters are arrays
> + * terminated by a 0 or blank struct.
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +
> /*
>  * Helper function that will read packets looking for "status=<foo>"
>  * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index 161f56044..d191a7228 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
> 
> 		cp "$TEST_ROOT/test.o" test.r &&
> 		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log
> 	)
> '
> 
> -- 
> 2.14.0.rc0.400.g1c36432dff-goog
> 


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH] sub-process: refactor handshake to common function
  2017-07-26 16:52 ` [PATCH] " Lars Schneider
@ 2017-07-26 18:14   ` Junio C Hamano
  0 siblings, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2017-07-26 18:14 UTC (permalink / raw)
  To: Lars Schneider; +Cc: Jonathan Tan, git, benpeart

Lars Schneider <larsxschneider@gmail.com> writes:

> Please note that I've recently refactored the capabilities negotiation a bit:
> https://github.com/git/git/commit/1514c8edd62d96006cd1de31e906ed5798dd4681
>
> This change is still cooking in `next`. I am not sure how this should/could
> be handled but maybe you can use my refactoring as your base?

I think they can play well together as independent topics.  The
known_caps[] thing you introduced essentially is the same as the
struct subprocess_capability capablities[] Jonathan has.

Please check

    $ git show "pu^{/jt/subprocess-handshake' into pu}" convert.c

to see if it makes sense.

Thanks.





^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH for NEXT v3 0/2] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (5 preceding siblings ...)
  2017-07-26 16:52 ` [PATCH] " Lars Schneider
@ 2017-07-26 18:17 ` Jonathan Tan
  2017-07-26 19:48   ` Junio C Hamano
  2017-07-29 16:26   ` Junio C Hamano
  2017-07-26 18:17 ` [PATCH for NEXT v3 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
  2017-07-26 18:17 ` [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function Jonathan Tan
  8 siblings, 2 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-26 18:17 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, bmwill, gitster, larsxschneider

Thanks for all your comments.

This is now built off "next" to include Lars's changes.

About whether this is too restrictive (for example, as Junio mentions,
this will not allow future capabilities of the form "key=???"), I think
that this can be upgraded later if necessary. For now, both the filter
code and the ODB (and ODB-like) proposals on the mailing list do not
require such a thing, so I have not included that functionality.

Changes from v2:
 - now rebased onto next, to pick up Lars's changes
 - split up into more functions
 - welcome prefix does not include final dash
 - no more gotos in expected cases (or at all)

Jonathan Tan (2):
  Documentation: migrate sub-process docs to header
  sub-process: refactor handshake to common function

 Documentation/technical/api-sub-process.txt |  59 ----------------
 convert.c                                   |  75 ++------------------
 pkt-line.c                                  |  19 -----
 pkt-line.h                                  |   2 -
 sub-process.c                               | 103 ++++++++++++++++++++++++++++
 sub-process.h                               |  51 +++++++++++++-
 t/t0021-conversion.sh                       |   2 +-
 7 files changed, 160 insertions(+), 151 deletions(-)
 delete mode 100644 Documentation/technical/api-sub-process.txt

-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply	[flat|nested] 22+ messages in thread

* [PATCH for NEXT v3 1/2] Documentation: migrate sub-process docs to header
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (6 preceding siblings ...)
  2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
@ 2017-07-26 18:17 ` Jonathan Tan
  2017-07-26 18:17 ` [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function Jonathan Tan
  8 siblings, 0 replies; 22+ messages in thread
From: Jonathan Tan @ 2017-07-26 18:17 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, bmwill, gitster, larsxschneider

Move the documentation for the sub-process API from a separate txt file
to its header file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/technical/api-sub-process.txt | 59 -----------------------------
 sub-process.h                               | 25 +++++++++++-
 2 files changed, 23 insertions(+), 61 deletions(-)
 delete mode 100644 Documentation/technical/api-sub-process.txt

diff --git a/Documentation/technical/api-sub-process.txt b/Documentation/technical/api-sub-process.txt
deleted file mode 100644
index 793508cf3..000000000
--- a/Documentation/technical/api-sub-process.txt
+++ /dev/null
@@ -1,59 +0,0 @@
-sub-process API
-===============
-
-The sub-process API makes it possible to run background sub-processes
-for the entire lifetime of a Git invocation. If Git needs to communicate
-with an external process multiple times, then this can reduces the process
-invocation overhead. Git and the sub-process communicate through stdin and
-stdout.
-
-The sub-processes are kept in a hashmap by command name and looked up
-via the subprocess_find_entry function.  If an existing instance can not
-be found then a new process should be created and started.  When the
-parent git command terminates, all sub-processes are also terminated.
-
-This API is based on the run-command API.
-
-Data structures
----------------
-
-* `struct subprocess_entry`
-
-The sub-process structure.  Members should not be accessed directly.
-
-Types
------
-
-'int(*subprocess_start_fn)(struct subprocess_entry *entry)'::
-
-	User-supplied function to initialize the sub-process.  This is
-	typically used to negotiate the interface version and capabilities.
-
-
-Functions
----------
-
-`cmd2process_cmp`::
-
-	Function to test two subprocess hashmap entries for equality.
-
-`subprocess_start`::
-
-	Start a subprocess and add it to the subprocess hashmap.
-
-`subprocess_stop`::
-
-	Kill a subprocess and remove it from the subprocess hashmap.
-
-`subprocess_find_entry`::
-
-	Find a subprocess in the subprocess hashmap.
-
-`subprocess_get_child_process`::
-
-	Get the underlying `struct child_process` from a subprocess.
-
-`subprocess_read_status`::
-
-	Helper function to read packets looking for the last "status=<foo>"
-	key/value pair.
diff --git a/sub-process.h b/sub-process.h
index 8cd07a59a..d37c1499a 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -6,12 +6,23 @@
 #include "run-command.h"
 
 /*
- * Generic implementation of background process infrastructure.
- * See: Documentation/technical/api-sub-process.txt
+ * The sub-process API makes it possible to run background sub-processes
+ * for the entire lifetime of a Git invocation. If Git needs to communicate
+ * with an external process multiple times, then this can reduces the process
+ * invocation overhead. Git and the sub-process communicate through stdin and
+ * stdout.
+ *
+ * The sub-processes are kept in a hashmap by command name and looked up
+ * via the subprocess_find_entry function.  If an existing instance can not
+ * be found then a new process should be created and started.  When the
+ * parent git command terminates, all sub-processes are also terminated.
+ * 
+ * This API is based on the run-command API.
  */
 
  /* data structures */
 
+/* Members should not be accessed directly. */
 struct subprocess_entry {
 	struct hashmap_entry ent; /* must be the first member! */
 	const char *cmd;
@@ -20,21 +31,31 @@ struct subprocess_entry {
 
 /* subprocess functions */
 
+/* Function to test two subprocess hashmap entries for equality. */
 extern int cmd2process_cmp(const void *unused_cmp_data,
 			   const void *e1,
 			   const void *e2,
 			   const void *unused_keydata);
 
+/*
+ * User-supplied function to initialize the sub-process.  This is
+ * typically used to negotiate the interface version and capabilities.
+ */
 typedef int(*subprocess_start_fn)(struct subprocess_entry *entry);
+
+/* Start a subprocess and add it to the subprocess hashmap. */
 int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, const char *cmd,
 		subprocess_start_fn startfn);
 
+/* Kill a subprocess and remove it from the subprocess hashmap. */
 void subprocess_stop(struct hashmap *hashmap, struct subprocess_entry *entry);
 
+/* Find a subprocess in the subprocess hashmap. */
 struct subprocess_entry *subprocess_find_entry(struct hashmap *hashmap, const char *cmd);
 
 /* subprocess helper functions */
 
+/* Get the underlying `struct child_process` from a subprocess. */
 static inline struct child_process *subprocess_get_child_process(
 		struct subprocess_entry *entry)
 {
-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
                   ` (7 preceding siblings ...)
  2017-07-26 18:17 ` [PATCH for NEXT v3 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
@ 2017-07-26 18:17 ` Jonathan Tan
  2017-08-06 19:58   ` Lars Schneider
  8 siblings, 1 reply; 22+ messages in thread
From: Jonathan Tan @ 2017-07-26 18:17 UTC (permalink / raw)
  To: git; +Cc: Jonathan Tan, bmwill, gitster, larsxschneider

Refactor, into a common function, the version and capability negotiation
done when invoking a long-running process as a clean or smudge filter.
This will be useful for other Git code that needs to interact similarly
with a long-running process.

As you can see in the change to t0021, this commit changes the error
message reported when the long-running process does not introduce itself
with the expected "server"-terminated line. Originally, the error
message reports that the filter "does not support filter protocol
version 2", differentiating between the old single-file filter protocol
and the new multi-file filter protocol - I have updated it to something
more generic and useful.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 convert.c             |  75 ++++--------------------------------
 pkt-line.c            |  19 ----------
 pkt-line.h            |   2 -
 sub-process.c         | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
 sub-process.h         |  26 +++++++++++++
 t/t0021-conversion.sh |   2 +-
 6 files changed, 137 insertions(+), 90 deletions(-)

diff --git a/convert.c b/convert.c
index dbdbb24e4..1012462e3 100644
--- a/convert.c
+++ b/convert.c
@@ -513,78 +513,17 @@ static struct hashmap subprocess_map;
 
 static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
 {
-	int err, i;
-	struct cmd2process *entry = (struct cmd2process *)subprocess;
-	struct string_list cap_list = STRING_LIST_INIT_NODUP;
-	char *cap_buf;
-	const char *cap_name;
-	struct child_process *process = &subprocess->process;
-	const char *cmd = subprocess->cmd;
-
-	static const struct {
-		const char *name;
-		unsigned int cap;
-	} known_caps[] = {
+	static int versions[] = {2, 0};
+	static struct subprocess_capability capabilities[] = {
 		{ "clean",  CAP_CLEAN  },
 		{ "smudge", CAP_SMUDGE },
 		{ "delay",  CAP_DELAY  },
+		{ NULL, 0 }
 	};
-
-	sigchain_push(SIGPIPE, SIG_IGN);
-
-	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
-	if (err)
-		goto done;
-
-	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
-	if (err) {
-		error("external filter '%s' does not support filter protocol version 2", cmd);
-		goto done;
-	}
-	err = strcmp(packet_read_line(process->out, NULL), "version=2");
-	if (err)
-		goto done;
-	err = packet_read_line(process->out, NULL) != NULL;
-	if (err)
-		goto done;
-
-	for (i = 0; i < ARRAY_SIZE(known_caps); ++i) {
-		err = packet_write_fmt_gently(
-			process->in, "capability=%s\n", known_caps[i].name);
-		if (err)
-			goto done;
-	}
-	err = packet_flush_gently(process->in);
-	if (err)
-		goto done;
-
-	for (;;) {
-		cap_buf = packet_read_line(process->out, NULL);
-		if (!cap_buf)
-			break;
-		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
-
-		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
-			continue;
-
-		cap_name = cap_list.items[1].string;
-		i = ARRAY_SIZE(known_caps) - 1;
-		while (i >= 0 && strcmp(cap_name, known_caps[i].name))
-			i--;
-
-		if (i >= 0)
-			entry->supported_capabilities |= known_caps[i].cap;
-		else
-			warning("external filter '%s' requested unsupported filter capability '%s'",
-			cmd, cap_name);
-
-		string_list_clear(&cap_list, 0);
-	}
-
-done:
-	sigchain_pop(SIGPIPE);
-
-	return err;
+	struct cmd2process *entry = (struct cmd2process *)subprocess;
+	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
+				    capabilities,
+				    &entry->supported_capabilities);
 }
 
 static void handle_filter_error(const struct strbuf *filter_status,
diff --git a/pkt-line.c b/pkt-line.c
index 9d845ecc3..7db911957 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 	return status;
 }
 
-int packet_writel(int fd, const char *line, ...)
-{
-	va_list args;
-	int err;
-	va_start(args, line);
-	for (;;) {
-		if (!line)
-			break;
-		if (strlen(line) > LARGE_PACKET_DATA_MAX)
-			return -1;
-		err = packet_write_fmt_gently(fd, "%s\n", line);
-		if (err)
-			return err;
-		line = va_arg(args, const char*);
-	}
-	va_end(args);
-	return packet_flush_gently(fd);
-}
-
 static int packet_write_gently(const int fd_out, const char *buf, size_t size)
 {
 	static char packet_write_buffer[LARGE_PACKET_MAX];
diff --git a/pkt-line.h b/pkt-line.h
index 450183b64..66ef610fc 100644
--- a/pkt-line.h
+++ b/pkt-line.h
@@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
 void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
 int packet_flush_gently(int fd);
 int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
-LAST_ARG_MUST_BE_NULL
-int packet_writel(int fd, const char *line, ...);
 int write_packetized_from_fd(int fd_in, int fd_out);
 int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
 
diff --git a/sub-process.c b/sub-process.c
index 6cbffa440..37b4bd0ad 100644
--- a/sub-process.c
+++ b/sub-process.c
@@ -108,3 +108,106 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
 	hashmap_add(hashmap, entry);
 	return 0;
 }
+
+static int handshake_version(struct child_process *process,
+			     const char *welcome_prefix, int *versions,
+			     int *chosen_version)
+{
+	int version_scratch;
+	int i;
+	char *line;
+	const char *p;
+
+	if (!chosen_version)
+		chosen_version = &version_scratch;
+
+	if (packet_write_fmt_gently(process->in, "%s-client\n",
+				    welcome_prefix))
+		return error("Could not write client identification");
+	for (i = 0; versions[i]; i++) {
+		if (packet_write_fmt_gently(process->in, "version=%d\n",
+					    versions[i]))
+			return error("Could not write requested version");
+	}
+	if (packet_flush_gently(process->in))
+		return error("Could not write flush packet");
+
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, welcome_prefix, &p) ||
+	    strcmp(p, "-server"))
+		return error("Unexpected line '%s', expected %s-server",
+			     line ? line : "<flush packet>", welcome_prefix);
+	if (!(line = packet_read_line(process->out, NULL)) ||
+	    !skip_prefix(line, "version=", &p) ||
+	    strtol_i(p, 10, chosen_version))
+		return error("Unexpected line '%s', expected version",
+			     line ? line : "<flush packet>");
+	if ((line = packet_read_line(process->out, NULL)))
+		return error("Unexpected line '%s', expected flush", line);
+
+	/* Check to make sure that the version received is supported */
+	for (i = 0; versions[i]; i++) {
+		if (versions[i] == *chosen_version)
+			break;
+	}
+	if (!versions[i])
+		return error("Version %d not supported", *chosen_version);
+
+	return 0;
+}
+
+static int handshake_capabilities(struct child_process *process,
+				  struct subprocess_capability *capabilities,
+				  unsigned int *supported_capabilities)
+{
+	int i;
+	char *line;
+
+	for (i = 0; capabilities[i].name; i++) {
+		if (packet_write_fmt_gently(process->in, "capability=%s\n",
+					    capabilities[i].name))
+			return error("Could not write requested capability");
+	}
+	if (packet_flush_gently(process->in))
+		return error("Could not write flush packet");
+
+	while ((line = packet_read_line(process->out, NULL))) {
+		const char *p;
+		if (!skip_prefix(line, "capability=", &p))
+			continue;
+
+		for (i = 0;
+		     capabilities[i].name && strcmp(p, capabilities[i].name);
+		     i++)
+			;
+		if (capabilities[i].name) {
+			if (supported_capabilities)
+				*supported_capabilities |= capabilities[i].flag;
+		} else {
+			warning("external filter requested unsupported filter capability '%s'",
+				p);
+		}
+	}
+
+	return 0;
+}
+
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities) {
+	int retval;
+	struct child_process *process = &entry->process;
+
+	sigchain_push(SIGPIPE, SIG_IGN);
+
+	retval = handshake_version(process, welcome_prefix, versions,
+				   chosen_version) ||
+		 handshake_capabilities(process, capabilities,
+					supported_capabilities);
+
+	sigchain_pop(SIGPIPE);
+	return retval;
+}
diff --git a/sub-process.h b/sub-process.h
index d37c1499a..6857eb1b5 100644
--- a/sub-process.h
+++ b/sub-process.h
@@ -29,6 +29,16 @@ struct subprocess_entry {
 	struct child_process process;
 };
 
+struct subprocess_capability {
+	const char *name;
+
+	/*
+	 * subprocess_handshake will "|=" this value to supported_capabilities
+	 * if the server reports that it supports this capability.
+	 */
+	unsigned int flag;
+};
+
 /* subprocess functions */
 
 /* Function to test two subprocess hashmap entries for equality. */
@@ -62,6 +72,22 @@ static inline struct child_process *subprocess_get_child_process(
 	return &entry->process;
 }
 
+/*
+ * Perform the version and capability negotiation as described in the "Long
+ * Running Filter Process" section of the gitattributes documentation using the
+ * given requested versions and capabilities. The "versions" and "capabilities"
+ * parameters are arrays terminated by a 0 or blank struct.
+ *
+ * This function is typically called when a subprocess is started (as part of
+ * the "startfn" passed to subprocess_start).
+ */
+int subprocess_handshake(struct subprocess_entry *entry,
+			 const char *welcome_prefix,
+			 int *versions,
+			 int *chosen_version,
+			 struct subprocess_capability *capabilities,
+			 unsigned int *supported_capabilities);
+
 /*
  * Helper function that will read packets looking for "status=<foo>"
  * key/value pairs and return the value from the last "status" packet
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index eb3d83744..46f8e583c 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
 
 		cp "$TEST_ROOT/test.o" test.r &&
 		test_must_fail git add . 2>git-stderr.log &&
-		grep "does not support filter protocol version" git-stderr.log
+		grep "expected git-filter-server" git-stderr.log
 	)
 '
 
-- 
2.14.0.rc0.400.g1c36432dff-goog


^ permalink raw reply related	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 0/2] sub-process: refactor handshake to common function
  2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
@ 2017-07-26 19:48   ` Junio C Hamano
  2017-07-29 16:26   ` Junio C Hamano
  1 sibling, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2017-07-26 19:48 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, bmwill, larsxschneider

Jonathan Tan <jonathantanmy@google.com> writes:

> Thanks for all your comments.
>
> This is now built off "next" to include Lars's changes.
>
> About whether this is too restrictive (for example, as Junio mentions,
> this will not allow future capabilities of the form "key=???"), I think
> that this can be upgraded later if necessary. For now, both the filter
> code and the ODB (and ODB-like) proposals on the mailing list do not
> require such a thing, so I have not included that functionality.

Heh, anything can be updated later.  The real question is how much
damage such an update may incur.  If such an update ends up having
to dismantle large part of what you did, then that is not a real
consolation that it "can be" updated later.

I think subprocess_handshake() can become the lowest level helper
that takes callback function to act on the capability request that
was received, and the existing user like start_multi_file_filter_fn()
can be served by a thin-wrapper around subprocess_handshake() that
defines a standard/built-in callback that checks the capability list
for exact match and flips the bit in an unsigned, so it is possible
to limit the damage when such an enhancement happens.



>
> Changes from v2:
>  - now rebased onto next, to pick up Lars's changes
>  - split up into more functions
>  - welcome prefix does not include final dash
>  - no more gotos in expected cases (or at all)
>
> Jonathan Tan (2):
>   Documentation: migrate sub-process docs to header
>   sub-process: refactor handshake to common function
>
>  Documentation/technical/api-sub-process.txt |  59 ----------------
>  convert.c                                   |  75 ++------------------
>  pkt-line.c                                  |  19 -----
>  pkt-line.h                                  |   2 -
>  sub-process.c                               | 103 ++++++++++++++++++++++++++++
>  sub-process.h                               |  51 +++++++++++++-
>  t/t0021-conversion.sh                       |   2 +-
>  7 files changed, 160 insertions(+), 151 deletions(-)
>  delete mode 100644 Documentation/technical/api-sub-process.txt

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 0/2] sub-process: refactor handshake to common function
  2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
  2017-07-26 19:48   ` Junio C Hamano
@ 2017-07-29 16:26   ` Junio C Hamano
  1 sibling, 0 replies; 22+ messages in thread
From: Junio C Hamano @ 2017-07-29 16:26 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, bmwill, larsxschneider

Jonathan Tan <jonathantanmy@google.com> writes:

> Thanks for all your comments.
>
> This is now built off "next" to include Lars's changes.

Developing on 'next' to ask for comments is OK, but try to minimize
the dependencies; otherwise you are setting yourself to be taken
hostage by many unrelated topics.

I did a merge of ls/filter-branch-delayed into v2.14-rc1 and then
applied these two on top (with one small style-fix) to see how bad
its dependencies are---I think that is the only topic you need that
is still in flight, so it does not look too bad.


^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-07-26 18:17 ` [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function Jonathan Tan
@ 2017-08-06 19:58   ` Lars Schneider
  2017-08-07 17:21     ` Jonathan Tan
  0 siblings, 1 reply; 22+ messages in thread
From: Lars Schneider @ 2017-08-06 19:58 UTC (permalink / raw)
  To: Jonathan Tan; +Cc: git, bmwill, gitster


> On 26 Jul 2017, at 20:17, Jonathan Tan <jonathantanmy@google.com> wrote:
> 
> Refactor, into a common function, the version and capability negotiation
> done when invoking a long-running process as a clean or smudge filter.
> This will be useful for other Git code that needs to interact similarly
> with a long-running process.
> 
> As you can see in the change to t0021, this commit changes the error
> message reported when the long-running process does not introduce itself
> with the expected "server"-terminated line. Originally, the error
> message reports that the filter "does not support filter protocol
> version 2", differentiating between the old single-file filter protocol
> and the new multi-file filter protocol - I have updated it to something
> more generic and useful.
> 
> Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
> ---
> convert.c             |  75 ++++--------------------------------
> pkt-line.c            |  19 ----------
> pkt-line.h            |   2 -
> sub-process.c         | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++
> sub-process.h         |  26 +++++++++++++
> t/t0021-conversion.sh |   2 +-
> 6 files changed, 137 insertions(+), 90 deletions(-)
> 
> diff --git a/convert.c b/convert.c
> index dbdbb24e4..1012462e3 100644
> --- a/convert.c
> +++ b/convert.c
> @@ -513,78 +513,17 @@ static struct hashmap subprocess_map;
> 
> static int start_multi_file_filter_fn(struct subprocess_entry *subprocess)
> {
> -	int err, i;
> -	struct cmd2process *entry = (struct cmd2process *)subprocess;
> -	struct string_list cap_list = STRING_LIST_INIT_NODUP;
> -	char *cap_buf;
> -	const char *cap_name;
> -	struct child_process *process = &subprocess->process;
> -	const char *cmd = subprocess->cmd;
> -
> -	static const struct {
> -		const char *name;
> -		unsigned int cap;
> -	} known_caps[] = {
> +	static int versions[] = {2, 0};
> +	static struct subprocess_capability capabilities[] = {
> 		{ "clean",  CAP_CLEAN  },
> 		{ "smudge", CAP_SMUDGE },
> 		{ "delay",  CAP_DELAY  },
> +		{ NULL, 0 }
> 	};
> -
> -	sigchain_push(SIGPIPE, SIG_IGN);
> -
> -	err = packet_writel(process->in, "git-filter-client", "version=2", NULL);
> -	if (err)
> -		goto done;
> -
> -	err = strcmp(packet_read_line(process->out, NULL), "git-filter-server");
> -	if (err) {
> -		error("external filter '%s' does not support filter protocol version 2", cmd);
> -		goto done;
> -	}
> -	err = strcmp(packet_read_line(process->out, NULL), "version=2");
> -	if (err)
> -		goto done;
> -	err = packet_read_line(process->out, NULL) != NULL;
> -	if (err)
> -		goto done;
> -
> -	for (i = 0; i < ARRAY_SIZE(known_caps); ++i) {
> -		err = packet_write_fmt_gently(
> -			process->in, "capability=%s\n", known_caps[i].name);
> -		if (err)
> -			goto done;
> -	}
> -	err = packet_flush_gently(process->in);
> -	if (err)
> -		goto done;
> -
> -	for (;;) {
> -		cap_buf = packet_read_line(process->out, NULL);
> -		if (!cap_buf)
> -			break;
> -		string_list_split_in_place(&cap_list, cap_buf, '=', 1);
> -
> -		if (cap_list.nr != 2 || strcmp(cap_list.items[0].string, "capability"))
> -			continue;
> -
> -		cap_name = cap_list.items[1].string;
> -		i = ARRAY_SIZE(known_caps) - 1;
> -		while (i >= 0 && strcmp(cap_name, known_caps[i].name))
> -			i--;
> -
> -		if (i >= 0)
> -			entry->supported_capabilities |= known_caps[i].cap;
> -		else
> -			warning("external filter '%s' requested unsupported filter capability '%s'",
> -			cmd, cap_name);
> -
> -		string_list_clear(&cap_list, 0);
> -	}
> -
> -done:
> -	sigchain_pop(SIGPIPE);
> -
> -	return err;
> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
> +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
> +				    capabilities,
> +				    &entry->supported_capabilities);

Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?


> }
> 
> static void handle_filter_error(const struct strbuf *filter_status,
> diff --git a/pkt-line.c b/pkt-line.c
> index 9d845ecc3..7db911957 100644
> --- a/pkt-line.c
> +++ b/pkt-line.c
> @@ -171,25 +171,6 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
> 	return status;
> }
> 
> -int packet_writel(int fd, const char *line, ...)
> -{
> -	va_list args;
> -	int err;
> -	va_start(args, line);
> -	for (;;) {
> -		if (!line)
> -			break;
> -		if (strlen(line) > LARGE_PACKET_DATA_MAX)
> -			return -1;
> -		err = packet_write_fmt_gently(fd, "%s\n", line);
> -		if (err)
> -			return err;
> -		line = va_arg(args, const char*);
> -	}
> -	va_end(args);
> -	return packet_flush_gently(fd);
> -}
> -
> static int packet_write_gently(const int fd_out, const char *buf, size_t size)
> {
> 	static char packet_write_buffer[LARGE_PACKET_MAX];
> diff --git a/pkt-line.h b/pkt-line.h
> index 450183b64..66ef610fc 100644
> --- a/pkt-line.h
> +++ b/pkt-line.h
> @@ -25,8 +25,6 @@ void packet_buf_flush(struct strbuf *buf);
> void packet_buf_write(struct strbuf *buf, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> int packet_flush_gently(int fd);
> int packet_write_fmt_gently(int fd, const char *fmt, ...) __attribute__((format (printf, 2, 3)));
> -LAST_ARG_MUST_BE_NULL
> -int packet_writel(int fd, const char *line, ...);
> int write_packetized_from_fd(int fd_in, int fd_out);
> int write_packetized_from_buf(const char *src_in, size_t len, int fd_out);
> 
> diff --git a/sub-process.c b/sub-process.c
> index 6cbffa440..37b4bd0ad 100644
> --- a/sub-process.c
> +++ b/sub-process.c
> @@ -108,3 +108,106 @@ int subprocess_start(struct hashmap *hashmap, struct subprocess_entry *entry, co
> 	hashmap_add(hashmap, entry);
> 	return 0;
> }
> +
> +static int handshake_version(struct child_process *process,
> +			     const char *welcome_prefix, int *versions,

Maybe it would be less ambiguous if we call it `supported_versions` ? 


> +			     int *chosen_version)
> +{
> +	int version_scratch;
> +	int i;
> +	char *line;
> +	const char *p;
> +
> +	if (!chosen_version)
> +		chosen_version = &version_scratch;

I am not an C expert but wouldn't 'version_scratch' go out of scope as soon
as the function returns? Why don't you error here right away?


> +	if (packet_write_fmt_gently(process->in, "%s-client\n",
> +				    welcome_prefix))
> +		return error("Could not write client identification");

Nit: Would it make sense to rename `welcome_prefix` to `client_id`?
Alternatively, could we rename the error messages to "welcome prefix"?


> +	for (i = 0; versions[i]; i++) {
> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> +					    versions[i]))
> +			return error("Could not write requested version");

Maybe: "Could not write supported versions"?


> +	}
> +	if (packet_flush_gently(process->in))
> +		return error("Could not write flush packet");

I feel this error is too generic.
Maybe: "Could not finish writing supported versions"?


> +
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, welcome_prefix, &p) ||
> +	    strcmp(p, "-server"))
> +		return error("Unexpected line '%s', expected %s-server",
> +			     line ? line : "<flush packet>", welcome_prefix);
> +	if (!(line = packet_read_line(process->out, NULL)) ||
> +	    !skip_prefix(line, "version=", &p) ||
> +	    strtol_i(p, 10, chosen_version))

Maybe `strlen("version=")` would be more clear than 10?


> +		return error("Unexpected line '%s', expected version",

Maybe "... expected version number" ?


> +			     line ? line : "<flush packet>");
> +	if ((line = packet_read_line(process->out, NULL)))
> +		return error("Unexpected line '%s', expected flush", line);
> +
> +	/* Check to make sure that the version received is supported */
> +	for (i = 0; versions[i]; i++) {
> +		if (versions[i] == *chosen_version)
> +			break;
> +	}
> +	if (!versions[i])
> +		return error("Version %d not supported", *chosen_version);
> +
> +	return 0;
> +}
> +
> +static int handshake_capabilities(struct child_process *process,
> +				  struct subprocess_capability *capabilities,
> +				  unsigned int *supported_capabilities)

I feel the naming could be misleading. I think ...
`capabilities` is really `supported_capabilities` 
and 
`supported_capabilities` is really `negiotated_capabilties` or `agreed_capabilites`


> +{
> +	int i;
> +	char *line;
> +
> +	for (i = 0; capabilities[i].name; i++) {
> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> +					    capabilities[i].name))
> +			return error("Could not write requested capability");

I think this should be "Could not write supported capability", no?


> +	}
> +	if (packet_flush_gently(process->in))
> +		return error("Could not write flush packet");

Maybe " "Could not finish writing supported capability" ?


> +	while ((line = packet_read_line(process->out, NULL))) {
> +		const char *p;
> +		if (!skip_prefix(line, "capability=", &p))
> +			continue;

Shouldn't we write an error in this case?


> +		for (i = 0;
> +		     capabilities[i].name && strcmp(p, capabilities[i].name);
> +		     i++)
> +			;
> +		if (capabilities[i].name) {
> +			if (supported_capabilities)
> +				*supported_capabilities |= capabilities[i].flag;
> +		} else {
> +			warning("external filter requested unsupported filter capability '%s'",
> +				p);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities) {
> +	int retval;
> +	struct child_process *process = &entry->process;
> +
> +	sigchain_push(SIGPIPE, SIG_IGN);
> +
> +	retval = handshake_version(process, welcome_prefix, versions,
> +				   chosen_version) ||
> +		 handshake_capabilities(process, capabilities,
> +					supported_capabilities);
> +
> +	sigchain_pop(SIGPIPE);
> +	return retval;
> +}
> diff --git a/sub-process.h b/sub-process.h
> index d37c1499a..6857eb1b5 100644
> --- a/sub-process.h
> +++ b/sub-process.h
> @@ -29,6 +29,16 @@ struct subprocess_entry {
> 	struct child_process process;
> };
> 
> +struct subprocess_capability {
> +	const char *name;
> +
> +	/*
> +	 * subprocess_handshake will "|=" this value to supported_capabilities
> +	 * if the server reports that it supports this capability.
> +	 */
> +	unsigned int flag;
> +};
> +
> /* subprocess functions */
> 
> /* Function to test two subprocess hashmap entries for equality. */
> @@ -62,6 +72,22 @@ static inline struct child_process *subprocess_get_child_process(
> 	return &entry->process;
> }
> 
> +/*
> + * Perform the version and capability negotiation as described in the "Long
> + * Running Filter Process" section of the gitattributes documentation using the
> + * given requested versions and capabilities. The "versions" and "capabilities"
> + * parameters are arrays terminated by a 0 or blank struct.

Should we reference the "gitattributes docs" if we want to make this generic?
I thought "technical/api-sub-process.txt" would explain this kind of stuff
and I was surprised that you deleted it in an earlier patch.


> + *
> + * This function is typically called when a subprocess is started (as part of
> + * the "startfn" passed to subprocess_start).
> + */
> +int subprocess_handshake(struct subprocess_entry *entry,
> +			 const char *welcome_prefix,
> +			 int *versions,
> +			 int *chosen_version,
> +			 struct subprocess_capability *capabilities,
> +			 unsigned int *supported_capabilities);
> +
> /*
>  * Helper function that will read packets looking for "status=<foo>"
>  * key/value pairs and return the value from the last "status" packet
> diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
> index eb3d83744..46f8e583c 100755
> --- a/t/t0021-conversion.sh
> +++ b/t/t0021-conversion.sh
> @@ -697,7 +697,7 @@ test_expect_success PERL 'invalid process filter must fail (and not hang!)' '
> 
> 		cp "$TEST_ROOT/test.o" test.r &&
> 		test_must_fail git add . 2>git-stderr.log &&
> -		grep "does not support filter protocol version" git-stderr.log
> +		grep "expected git-filter-server" git-stderr.log
> 	)
> '
> 

The generalization of this protocol is nice to see.

Thanks for working on it,
Lars




^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-08-06 19:58   ` Lars Schneider
@ 2017-08-07 17:21     ` Jonathan Tan
  2017-08-07 17:51       ` Lars Schneider
  0 siblings, 1 reply; 22+ messages in thread
From: Jonathan Tan @ 2017-08-07 17:21 UTC (permalink / raw)
  To: Lars Schneider; +Cc: git, bmwill, gitster

On Sun, 6 Aug 2017 21:58:24 +0200
Lars Schneider <larsxschneider@gmail.com> wrote:

> > +	struct cmd2process *entry = (struct cmd2process *)subprocess;
> > +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
> > +				    capabilities,
> > +				    &entry->supported_capabilities);
> 
> Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?

The members of "struct subprocess_entry" are not supposed to be accessed
directly, according to the documentation. If we relaxed that, then we
could do this, but before that I think it's better to let the caller
handle it.

> > +
> > +static int handshake_version(struct child_process *process,
> > +			     const char *welcome_prefix, int *versions,
> 
> Maybe it would be less ambiguous if we call it `supported_versions` ? 

I thought of that, but I think "supported_versions" is actually more
ambiguous, since we don't know if these are versions supported by the
server or client or both.

> > +			     int *chosen_version)
> > +{
> > +	int version_scratch;
> > +	int i;
> > +	char *line;
> > +	const char *p;
> > +
> > +	if (!chosen_version)
> > +		chosen_version = &version_scratch;
> 
> I am not an C expert but wouldn't 'version_scratch' go out of scope as soon
> as the function returns? Why don't you error here right away?

It does, but so does chosen_version. This is meant to allow the caller
to pass NULL to this function.

> > +	if (packet_write_fmt_gently(process->in, "%s-client\n",
> > +				    welcome_prefix))
> > +		return error("Could not write client identification");
> 
> Nit: Would it make sense to rename `welcome_prefix` to `client_id`?
> Alternatively, could we rename the error messages to "welcome prefix"?

I was retaining the existing terminology, but your suggestions seem
reasonable. This might be best done in another patch once this series
lands in master, though.

> > +	for (i = 0; versions[i]; i++) {
> > +		if (packet_write_fmt_gently(process->in, "version=%d\n",
> > +					    versions[i]))
> > +			return error("Could not write requested version");
> 
> Maybe: "Could not write supported versions"?

Same as above - "supported" is ambiguous.

> > +	}
> > +	if (packet_flush_gently(process->in))
> > +		return error("Could not write flush packet");
> 
> I feel this error is too generic.
> Maybe: "Could not finish writing supported versions"?

That's reasonable. This is a rare error, though, and if it does occur, I
think this message is more informative. But I'm OK either way.

> > +
> > +	if (!(line = packet_read_line(process->out, NULL)) ||
> > +	    !skip_prefix(line, welcome_prefix, &p) ||
> > +	    strcmp(p, "-server"))
> > +		return error("Unexpected line '%s', expected %s-server",
> > +			     line ? line : "<flush packet>", welcome_prefix);
> > +	if (!(line = packet_read_line(process->out, NULL)) ||
> > +	    !skip_prefix(line, "version=", &p) ||
> > +	    strtol_i(p, 10, chosen_version))
> 
> Maybe `strlen("version=")` would be more clear than 10?

The 10 here is the base, not the length. If there's a better way to
convert strings to integers, let me know.

> > +		return error("Unexpected line '%s', expected version",
> 
> Maybe "... expected version number" ?

I'm fine either way.

> > +static int handshake_capabilities(struct child_process *process,
> > +				  struct subprocess_capability *capabilities,
> > +				  unsigned int *supported_capabilities)
> 
> I feel the naming could be misleading. I think ...
> `capabilities` is really `supported_capabilities` 
> and 
> `supported_capabilities` is really `negiotated_capabilties` or `agreed_capabilites`

These "supported capabilities" are those supported by both the client
(Git) and the server (the process Git is invoking). I think it's better
to use this term for the intersection of capabilities, rather than
exclusively for the client or server.

> > +	for (i = 0; capabilities[i].name; i++) {
> > +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
> > +					    capabilities[i].name))
> > +			return error("Could not write requested capability");
> 
> I think this should be "Could not write supported capability", no?

Same comment as above.

> > +	}
> > +	if (packet_flush_gently(process->in))
> > +		return error("Could not write flush packet");
> 
> Maybe " "Could not finish writing supported capability" ?

Same comment as the one about writing flush packets above.

> > +	while ((line = packet_read_line(process->out, NULL))) {
> > +		const char *p;
> > +		if (!skip_prefix(line, "capability=", &p))
> > +			continue;
> 
> Shouldn't we write an error in this case?

I'm preserving the existing behavior.

> > +/*
> > + * Perform the version and capability negotiation as described in the "Long
> > + * Running Filter Process" section of the gitattributes documentation using the
> > + * given requested versions and capabilities. The "versions" and "capabilities"
> > + * parameters are arrays terminated by a 0 or blank struct.
> 
> Should we reference the "gitattributes docs" if we want to make this generic?
> I thought "technical/api-sub-process.txt" would explain this kind of stuff
> and I was surprised that you deleted it in an earlier patch.

I think this should be done only when we have two users of this, for
example, when a patch like [1] (which does contain the change to move
away from the gitattributes doc) lands.

[1] https://public-inbox.org/git/eadce97b6a1e80345a2621e71ce187e9e6bc05bf.1501532294.git.jonathantanmy@google.com/

> The generalization of this protocol is nice to see.
> 
> Thanks for working on it,
> Lars

Thanks for your comments.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-08-07 17:21     ` Jonathan Tan
@ 2017-08-07 17:51       ` Lars Schneider
  2017-08-07 18:17         ` Jonathan Tan
  0 siblings, 1 reply; 22+ messages in thread
From: Lars Schneider @ 2017-08-07 17:51 UTC (permalink / raw)
  To: Jonathan Tan, benpeart; +Cc: git, bmwill, gitster


> On 07 Aug 2017, at 19:21, Jonathan Tan <jonathantanmy@google.com> wrote:
> 
> On Sun, 6 Aug 2017 21:58:24 +0200
> Lars Schneider <larsxschneider@gmail.com> wrote:
> 
>>> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
>>> +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
>>> +				    capabilities,
>>> +				    &entry->supported_capabilities);
>> 
>> Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?
> 
> The members of "struct subprocess_entry" are not supposed to be accessed
> directly, according to the documentation. If we relaxed that, then we
> could do this, but before that I think it's better to let the caller
> handle it.

@Ben: You wrote that " Members should not be accessed directly.":
https://github.com/git/git/commit/99605d62e8e7e568035dc953b24b79b3d52f0522#diff-c1655ad5d68943a3dc5bfae8c98466f2R22
Can you give me a hint why?

@Jonathan: What do you mean by "it's better to let the caller handle it"


>>> 
>>> +static int handshake_version(struct child_process *process,
>>> +			     const char *welcome_prefix, int *versions,
>> 
>> Maybe it would be less ambiguous if we call it `supported_versions` ? 
> 
> I thought of that, but I think "supported_versions" is actually more
> ambiguous, since we don't know if these are versions supported by the
> server or client or both.

True! Maybe `versions_supported_by_git` to annoy people that hate 
long variable names ;-)


>>> +			     int *chosen_version)
>>> +{
>>> +	int version_scratch;
>>> +	int i;
>>> +	char *line;
>>> +	const char *p;
>>> +
>>> +	if (!chosen_version)
>>> +		chosen_version = &version_scratch;
>> 
>> I am not an C expert but wouldn't 'version_scratch' go out of scope as soon
>> as the function returns? Why don't you error here right away?
> 
> It does, but so does chosen_version. This is meant to allow the caller
> to pass NULL to this function.

Hm. I think every protocol should be versioned otherwise we could run
into trouble in the long run.

TBH I wouldn't support NULL in that case in the first place. If you
want to support it then I think we should document it.


>>> +	if (packet_write_fmt_gently(process->in, "%s-client\n",
>>> +				    welcome_prefix))
>>> +		return error("Could not write client identification");
>> 
>> Nit: Would it make sense to rename `welcome_prefix` to `client_id`?
>> Alternatively, could we rename the error messages to "welcome prefix"?
> 
> I was retaining the existing terminology, but your suggestions seem
> reasonable. This might be best done in another patch once this series
> lands in master, though.

Yeah, sorry for my late review :-(


>>> +	for (i = 0; versions[i]; i++) {
>>> +		if (packet_write_fmt_gently(process->in, "version=%d\n",
>>> +					    versions[i]))
>>> +			return error("Could not write requested version");
>> 
>> Maybe: "Could not write supported versions"?
> 
> Same as above - "supported" is ambiguous.
> 
>>> +	}
>>> +	if (packet_flush_gently(process->in))
>>> +		return error("Could not write flush packet");
>> 
>> I feel this error is too generic.
>> Maybe: "Could not finish writing supported versions"?
> 
> That's reasonable. This is a rare error, though, and if it does occur, I
> think this message is more informative. But I'm OK either way.

My thinking is this: if I see an error then I want to roughly know what
went wrong and I want to have a good chance to find the error in the
source. The "Could not write flush packet" is technically correct but
it makes it harder to pinpoint the error in the source as we throw
it in several places.


> 
>>> +
>>> +	if (!(line = packet_read_line(process->out, NULL)) ||
>>> +	    !skip_prefix(line, welcome_prefix, &p) ||
>>> +	    strcmp(p, "-server"))
>>> +		return error("Unexpected line '%s', expected %s-server",
>>> +			     line ? line : "<flush packet>", welcome_prefix);
>>> +	if (!(line = packet_read_line(process->out, NULL)) ||
>>> +	    !skip_prefix(line, "version=", &p) ||
>>> +	    strtol_i(p, 10, chosen_version))
>> 
>> Maybe `strlen("version=")` would be more clear than 10?
> 
> The 10 here is the base, not the length. If there's a better way to
> convert strings to integers, let me know.

Argh, of course! Sorry! To my defense: it was late last night :-)


> 
>>> +		return error("Unexpected line '%s', expected version",
>> 
>> Maybe "... expected version number" ?
> 
> I'm fine either way.
> 
>>> +static int handshake_capabilities(struct child_process *process,
>>> +				  struct subprocess_capability *capabilities,
>>> +				  unsigned int *supported_capabilities)
>> 
>> I feel the naming could be misleading. I think ...
>> `capabilities` is really `supported_capabilities` 
>> and 
>> `supported_capabilities` is really `negiotated_capabilties` or `agreed_capabilites`
> 
> These "supported capabilities" are those supported by both the client
> (Git) and the server (the process Git is invoking). I think it's better
> to use this term for the intersection of capabilities, rather than
> exclusively for the client or server.
> 
>>> +	for (i = 0; capabilities[i].name; i++) {
>>> +		if (packet_write_fmt_gently(process->in, "capability=%s\n",
>>> +					    capabilities[i].name))
>>> +			return error("Could not write requested capability");
>> 
>> I think this should be "Could not write supported capability", no?
> 
> Same comment as above.
> 
>>> +	}
>>> +	if (packet_flush_gently(process->in))
>>> +		return error("Could not write flush packet");
>> 
>> Maybe " "Could not finish writing supported capability" ?
> 
> Same comment as the one about writing flush packets above.
> 
>>> +	while ((line = packet_read_line(process->out, NULL))) {
>>> +		const char *p;
>>> +		if (!skip_prefix(line, "capability=", &p))
>>> +			continue;
>> 
>> Shouldn't we write an error in this case?
> 
> I'm preserving the existing behavior.

You're right:
https://github.com/git/git/blob/4384e3cde2ce8ecd194202e171ae16333d241326/convert.c#L549-L550


- Lars

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-08-07 17:51       ` Lars Schneider
@ 2017-08-07 18:17         ` Jonathan Tan
  2017-08-07 18:29           ` Ben Peart
  0 siblings, 1 reply; 22+ messages in thread
From: Jonathan Tan @ 2017-08-07 18:17 UTC (permalink / raw)
  To: Lars Schneider; +Cc: benpeart, git, bmwill, gitster

On Mon, 7 Aug 2017 19:51:04 +0200
Lars Schneider <larsxschneider@gmail.com> wrote:

> 
> > On 07 Aug 2017, at 19:21, Jonathan Tan <jonathantanmy@google.com> wrote:
> > 
> > On Sun, 6 Aug 2017 21:58:24 +0200
> > Lars Schneider <larsxschneider@gmail.com> wrote:
> > 
> >>> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
> >>> +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
> >>> +				    capabilities,
> >>> +				    &entry->supported_capabilities);
> >> 
> >> Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?
> > 
> > The members of "struct subprocess_entry" are not supposed to be accessed
> > directly, according to the documentation. If we relaxed that, then we
> > could do this, but before that I think it's better to let the caller
> > handle it.
> 
> @Ben: You wrote that " Members should not be accessed directly.":
> https://github.com/git/git/commit/99605d62e8e7e568035dc953b24b79b3d52f0522#diff-c1655ad5d68943a3dc5bfae8c98466f2R22
> Can you give me a hint why?
> 
> @Jonathan: What do you mean by "it's better to let the caller handle it"

Let the caller provide their own place to store the capabilities, I
mean, instead of (say) using a field as you describe and an accessor
method.

I don't feel strongly about this, though.

> > It does, but so does chosen_version. This is meant to allow the caller
> > to pass NULL to this function.
> 
> Hm. I think every protocol should be versioned otherwise we could run
> into trouble in the long run.
> 
> TBH I wouldn't support NULL in that case in the first place. If you
> want to support it then I think we should document it.

Note that this NULL is for the chosen version as chosen by the server,
not the versions declared as supported by the client.

The protocol is versioned. Some users (e.g. the filter mechanism) of
this subprocess thing would want to pass NULL because they only support
one version and the subprocess thing already ensures that the server
report that it supports one of the versions sent.

^ permalink raw reply	[flat|nested] 22+ messages in thread

* Re: [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function
  2017-08-07 18:17         ` Jonathan Tan
@ 2017-08-07 18:29           ` Ben Peart
  0 siblings, 0 replies; 22+ messages in thread
From: Ben Peart @ 2017-08-07 18:29 UTC (permalink / raw)
  To: Jonathan Tan, Lars Schneider; +Cc: benpeart, git, bmwill, gitster



On 8/7/2017 2:17 PM, Jonathan Tan wrote:
> On Mon, 7 Aug 2017 19:51:04 +0200
> Lars Schneider <larsxschneider@gmail.com> wrote:
> 
>>
>>> On 07 Aug 2017, at 19:21, Jonathan Tan <jonathantanmy@google.com> wrote:
>>>
>>> On Sun, 6 Aug 2017 21:58:24 +0200
>>> Lars Schneider <larsxschneider@gmail.com> wrote:
>>>
>>>>> +	struct cmd2process *entry = (struct cmd2process *)subprocess;
>>>>> +	return subprocess_handshake(subprocess, "git-filter", versions, NULL,
>>>>> +				    capabilities,
>>>>> +				    &entry->supported_capabilities);
>>>>
>>>> Wouldn't it make sense to add `supported_capabilities` to `struct subprocess_entry` ?
>>>
>>> The members of "struct subprocess_entry" are not supposed to be accessed
>>> directly, according to the documentation. If we relaxed that, then we
>>> could do this, but before that I think it's better to let the caller
>>> handle it.
>>
>> @Ben: You wrote that " Members should not be accessed directly.":
>> https://github.com/git/git/commit/99605d62e8e7e568035dc953b24b79b3d52f0522#diff-c1655ad5d68943a3dc5bfae8c98466f2R22
>> Can you give me a hint why?
>>

It's just good object oriented design of providing a layer of 
abstraction between the implementation details and the use of the 
class/object/API.  I was following the model in api-hashmap.txt but 
there are many other examples of where we don't do this.

Perhaps providing a function that returns the property you want to 
access (similar to subprocess_get_child_process) would work.

>> @Jonathan: What do you mean by "it's better to let the caller handle it"
> 
> Let the caller provide their own place to store the capabilities, I
> mean, instead of (say) using a field as you describe and an accessor
> method.
> 
> I don't feel strongly about this, though.
> 
>>> It does, but so does chosen_version. This is meant to allow the caller
>>> to pass NULL to this function.
>>
>> Hm. I think every protocol should be versioned otherwise we could run
>> into trouble in the long run.
>>
>> TBH I wouldn't support NULL in that case in the first place. If you
>> want to support it then I think we should document it.
> 
> Note that this NULL is for the chosen version as chosen by the server,
> not the versions declared as supported by the client.
> 
> The protocol is versioned. Some users (e.g. the filter mechanism) of
> this subprocess thing would want to pass NULL because they only support
> one version and the subprocess thing already ensures that the server
> report that it supports one of the versions sent.
> 

^ permalink raw reply	[flat|nested] 22+ messages in thread

end of thread, other threads:[~2017-08-07 18:29 UTC | newest]

Thread overview: 22+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-07-24 21:38 [PATCH] sub-process: refactor handshake to common function Jonathan Tan
2017-07-24 22:21 ` Jonathan Nieder
2017-07-25 14:38 ` Ben Peart
2017-07-25 17:53   ` Jonathan Tan
2017-07-25 18:29 ` [PATCH v2 0/2] " Jonathan Tan
2017-07-25 18:29 ` [PATCH v2 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
2017-07-25 20:18   ` Brandon Williams
2017-07-25 18:29 ` [PATCH v2 2/2] sub-process: refactor handshake to common function Jonathan Tan
2017-07-25 20:28   ` Brandon Williams
2017-07-25 22:25   ` Junio C Hamano
2017-07-26 16:52 ` [PATCH] " Lars Schneider
2017-07-26 18:14   ` Junio C Hamano
2017-07-26 18:17 ` [PATCH for NEXT v3 0/2] " Jonathan Tan
2017-07-26 19:48   ` Junio C Hamano
2017-07-29 16:26   ` Junio C Hamano
2017-07-26 18:17 ` [PATCH for NEXT v3 1/2] Documentation: migrate sub-process docs to header Jonathan Tan
2017-07-26 18:17 ` [PATCH for NEXT v3 2/2] sub-process: refactor handshake to common function Jonathan Tan
2017-08-06 19:58   ` Lars Schneider
2017-08-07 17:21     ` Jonathan Tan
2017-08-07 17:51       ` Lars Schneider
2017-08-07 18:17         ` Jonathan Tan
2017-08-07 18:29           ` Ben Peart

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).