git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: larsxschneider@gmail.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, jnareb@gmail.com, peff@peff.net,
	Lars Schneider <larsxschneider@gmail.com>
Subject: [PATCH v10 00/14] Git filter protocol
Date: Sat,  8 Oct 2016 13:25:16 +0200	[thread overview]
Message-ID: <20161008112530.15506-1-larsxschneider@gmail.com> (raw)

From: Lars Schneider <larsxschneider@gmail.com>

The goal of this series is to avoid launching a new clean/smudge filter
process for each file that is filtered.

A short summary about v1 to v5 can be found here:
https://git.github.io/rev_news/2016/08/17/edition-18/

This series is also published on web:
https://github.com/larsxschneider/git/pull/14

Patches 1 and 2 are cleanups and not strictly necessary for the series.
Patches 3 to 12 are required preparation. Patch 13 is the main patch.
Patch 14 adds an example how to use the Git filter protocol in contrib.

Thanks a lot to
 Jakub, Junio, and Peff
for very helpful reviews,
Lars

## Changes since v9
  * replace the very specific "wait after close(stdin)" behavior with the
    more flexible run-command "clean_on_exit_handler" flag to fix flaky t0021,
    see discussion:
    http://public-inbox.org/git/xmqq37k9mm7k.fsf@gitster.mtv.corp.google.com/
    http://public-inbox.org/git/xmqq8tubitjs.fsf@gitster.mtv.corp.google.com/
  * run stop_multi_file_filter() for all filters on Git shutdown
  * actually kill filter in kill_multi_file_filter()
  * add new filter process to hashmap only if the process start was successful
  * remove superfluous fstat() call
  * avoid potential buffer overflow in packet_write_gently() packet size check
  * remove superfluous buffer in write_packetized_from_buf()
  * improve test name


Lars Schneider (14):
  convert: quote filter names in error messages
  convert: modernize tests
  run-command: move check_pipe() from write_or_die to run_command
  run-command: add clean_on_exit_handler
  pkt-line: rename packet_write() to packet_write_fmt()
  pkt-line: extract set_packet_header()
  pkt-line: add packet_write_fmt_gently()
  pkt-line: add packet_flush_gently()
  pkt-line: add packet_write_gently()
  pkt-line: add functions to read/write flush terminated packet streams
  convert: make apply_filter() adhere to standard Git error handling
  convert: prepare filter.<driver>.process option
  convert: add filter.<driver>.process option
  contrib/long-running-filter: add long running filter example

 Documentation/gitattributes.txt        | 159 ++++++++++-
 builtin/archive.c                      |   4 +-
 builtin/receive-pack.c                 |   4 +-
 builtin/remote-ext.c                   |   4 +-
 builtin/upload-archive.c               |   4 +-
 connect.c                              |   2 +-
 contrib/long-running-filter/example.pl | 127 +++++++++
 convert.c                              | 372 +++++++++++++++++++++---
 daemon.c                               |   2 +-
 http-backend.c                         |   2 +-
 pkt-line.c                             | 152 +++++++++-
 pkt-line.h                             |  12 +-
 run-command.c                          |  36 ++-
 run-command.h                          |   4 +-
 shallow.c                              |   2 +-
 t/t0021-conversion.sh                  | 505 ++++++++++++++++++++++++++++++---
 t/t0021/rot13-filter.pl                | 191 +++++++++++++
 upload-pack.c                          |  30 +-
 write_or_die.c                         |  13 -
 19 files changed, 1492 insertions(+), 133 deletions(-)
 create mode 100755 contrib/long-running-filter/example.pl
 create mode 100755 t/t0021/rot13-filter.pl



## Interdiff (v9..v10)

diff --git a/convert.c b/convert.c
index 88581d6..1d89632 100644
--- a/convert.c
+++ b/convert.c
@@ -516,23 +516,6 @@ static struct cmd2process *find_multi_file_filter_entry(struct hashmap *hashmap,
 	return hashmap_get(hashmap, &key, NULL);
 }

-static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry)
-{
-	if (!entry)
-		return;
-	sigchain_push(SIGPIPE, SIG_IGN);
-	/*
-	 * We kill the filter most likely because an error happened already.
-	 * That's why we are not interested in any error code here.
-	 */
-	close(entry->process.in);
-	close(entry->process.out);
-	sigchain_pop(SIGPIPE);
-	finish_command(&entry->process);
-	hashmap_remove(hashmap, entry, NULL);
-	free(entry);
-}
-
 static int packet_write_list(int fd, const char *line, ...)
 {
 	va_list args;
@@ -552,6 +535,49 @@ static int packet_write_list(int fd, const char *line, ...)
 	return packet_flush_gently(fd);
 }

+static void read_multi_file_filter_status(int fd, struct strbuf *status) {
+	struct strbuf **pair;
+	char *line;
+	for (;;) {
+		line = packet_read_line(fd, NULL);
+		if (!line)
+			break;
+		pair = strbuf_split_str(line, '=', 2);
+		if (pair[0] && pair[0]->len && pair[1]) {
+			/* the last "status=<foo>" line wins */
+			if (!strcmp(pair[0]->buf, "status=")) {
+				strbuf_reset(status);
+				strbuf_addbuf(status, pair[1]);
+			}
+		}
+		strbuf_list_free(pair);
+	}
+}
+
+static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry)
+{
+	if (!entry)
+		return;
+
+	entry->process.clean_on_exit = 0;
+	kill(entry->process.pid, SIGTERM);
+	finish_command(&entry->process);
+
+	hashmap_remove(hashmap, entry, NULL);
+	free(entry);
+}
+
+void stop_multi_file_filter(struct child_process *process)
+{
+	sigchain_push(SIGPIPE, SIG_IGN);
+	/* Closing the pipe signals the filter to initiate a shutdown. */
+	close(process->in);
+	close(process->out);
+	sigchain_pop(SIGPIPE);
+	/* Finish command will wait until the shutdown is complete. */
+	finish_command(process);
+}
+
 static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, const char *cmd)
 {
 	int err;
@@ -563,7 +589,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
 	const char *cap_name;

 	entry = xmalloc(sizeof(*entry));
-	hashmap_entry_init(entry, strhash(cmd));
 	entry->cmd = cmd;
 	entry->supported_capabilities = 0;
 	process = &entry->process;
@@ -573,14 +598,16 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
 	process->use_shell = 1;
 	process->in = -1;
 	process->out = -1;
-	process->wait_on_exit = 1;
+	process->clean_on_exit = 1;
+	process->clean_on_exit_handler = stop_multi_file_filter;

 	if (start_command(process)) {
 		error("cannot fork to run external filter '%s'", cmd);
-		kill_multi_file_filter(hashmap, entry);
 		return NULL;
 	}

+	hashmap_entry_init(entry, strhash(cmd));
+
 	sigchain_push(SIGPIPE, SIG_IGN);

 	err = packet_write_list(process->in, "git-filter-client", "version=2", NULL);
@@ -635,24 +662,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
 	return entry;
 }

-static void read_multi_file_filter_status(int fd, struct strbuf *status) {
-	struct strbuf **pair;
-	char *line;
-	for (;;) {
-		line = packet_read_line(fd, NULL);
-		if (!line)
-			break;
-		pair = strbuf_split_str(line, '=', 2);
-		if (pair[0] && pair[0]->len && pair[1]) {
-			if (!strcmp(pair[0]->buf, "status=")) {
-				strbuf_reset(status);
-				strbuf_addbuf(status, pair[1]);
-			}
-		}
-		strbuf_list_free(pair);
-	}
-}
-
 static int apply_multi_file_filter(const char *path, const char *src, size_t len,
 				   int fd, struct strbuf *dst, const char *cmd,
 				   const unsigned int wanted_capability)
@@ -660,10 +669,9 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
 	int err;
 	struct cmd2process *entry;
 	struct child_process *process;
-	struct stat file_stat;
 	struct strbuf nbuf = STRBUF_INIT;
 	struct strbuf filter_status = STRBUF_INIT;
-	char *filter_type;
+	const char *filter_type;

 	if (!cmd_process_map_initialized) {
 		cmd_process_map_initialized = 1;
@@ -692,12 +700,6 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
 	else
 		die("unexpected filter type");

-	if (fd >= 0 && !src) {
-		if (fstat(fd, &file_stat) == -1)
-			return 0;
-		len = xsize_t(file_stat.st_size);
-	}
-
 	sigchain_push(SIGPIPE, SIG_IGN);

 	assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));
diff --git a/pkt-line.c b/pkt-line.c
index b82aaca..0b5125f 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -174,12 +174,13 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
 static int packet_write_gently(const int fd_out, const char *buf, size_t size)
 {
 	static char packet_write_buffer[LARGE_PACKET_MAX];
-	const size_t packet_size = size + 4;
+	size_t packet_size;

-	if (packet_size > sizeof(packet_write_buffer))
+	if (size > sizeof(packet_write_buffer) - 4)
 		return error("packet write failed - data exceeds max packet size");

 	packet_trace(buf, size, 1);
+	packet_size = size + 4;
 	set_packet_header(packet_write_buffer, packet_size);
 	memcpy(packet_write_buffer + 4, buf, size);
 	if (write_in_full(fd_out, packet_write_buffer, packet_size) == packet_size)
@@ -217,14 +218,13 @@ int write_packetized_from_fd(int fd_in, int fd_out)

 int write_packetized_from_buf(const char *src_in, size_t len, int fd_out)
 {
-	static char buf[LARGE_PACKET_DATA_MAX];
 	int err = 0;
 	size_t bytes_written = 0;
 	size_t bytes_to_write;

 	while (!err) {
-		if ((len - bytes_written) > sizeof(buf))
-			bytes_to_write = sizeof(buf);
+		if ((len - bytes_written) > LARGE_PACKET_DATA_MAX)
+			bytes_to_write = LARGE_PACKET_DATA_MAX;
 		else
 			bytes_to_write = len - bytes_written;
 		if (bytes_to_write == 0)
diff --git a/run-command.c b/run-command.c
index 96c54fe..e5fd6ff 100644
--- a/run-command.c
+++ b/run-command.c
@@ -21,9 +21,7 @@ void child_process_clear(struct child_process *child)

 struct child_to_clean {
 	pid_t pid;
-	char *name;
-	int stdin;
-	int wait;
+	struct child_process *process;
 	struct child_to_clean *next;
 };
 static struct child_to_clean *children_to_clean;
@@ -31,35 +29,23 @@ static int installed_child_cleanup_handler;

 static void cleanup_children(int sig, int in_signal)
 {
-	int status;
-	struct child_to_clean *p = children_to_clean;
-
-	/* Close the the child's stdin as indicator that Git will exit soon */
-	while (p) {
-		if (p->wait)
-			if (p->stdin > 0)
-				close(p->stdin);
-		p = p->next;
-	}
-
 	while (children_to_clean) {
-		p = children_to_clean;
+		struct child_to_clean *p = children_to_clean;
 		children_to_clean = p->next;

-		if (p->wait) {
-			fprintf(stderr, _("Waiting for '%s' to finish..."), p->name);
-			while ((waitpid(p->pid, &status, 0)) < 0 && errno == EINTR)
-				;	/* nothing */
-			fprintf(stderr, _("done!\n"));
+		if (p->process && !in_signal) {
+			struct child_process *process = p->process;
+			if (process->clean_on_exit_handler) {
+				trace_printf("trace: run_command: running exit handler for pid %d", p->pid);
+				process->clean_on_exit_handler(process);
+			}
 		}

 		kill(p->pid, sig);
-		if (!in_signal) {
-			free(p->name);
+		if (!in_signal)
 			free(p);
 	}
 }
-}

 static void cleanup_children_on_signal(int sig)
 {
@@ -73,16 +59,11 @@ static void cleanup_children_on_exit(void)
 	cleanup_children(SIGTERM, 0);
 }

-static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int wait)
+static void mark_child_for_cleanup(pid_t pid, struct child_process *process)
 {
 	struct child_to_clean *p = xmalloc(sizeof(*p));
 	p->pid = pid;
-	p->wait = wait;
-	p->stdin = stdin;
-	if (name)
-		p->name = xstrdup(name);
-	else
-		p->name = "process";
+	p->process = process;
 	p->next = children_to_clean;
 	children_to_clean = p;

@@ -93,13 +74,6 @@ static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int w
 	}
 }

-#ifdef NO_PTHREADS
-static void mark_child_for_cleanup_no_wait(pid_t pid, const char *name, int timeout, int stdin)
-{
-	mark_child_for_cleanup(pid, NULL, 0, 0);
-}
-#endif
-
 static void clear_child_for_cleanup(pid_t pid)
 {
 	struct child_to_clean **pp;
@@ -458,9 +432,8 @@ int start_command(struct child_process *cmd)
 	}
 	if (cmd->pid < 0)
 		error_errno("cannot fork() for %s", cmd->argv[0]);
-	else if (cmd->clean_on_exit || cmd->wait_on_exit)
-		mark_child_for_cleanup(
-			cmd->pid, cmd->argv[0], cmd->in, cmd->wait_on_exit);
+	else if (cmd->clean_on_exit)
+		mark_child_for_cleanup(cmd->pid, cmd);

 	/*
 	 * Wait for child's execvp. If the execvp succeeds (or if fork()
@@ -520,9 +493,8 @@ int start_command(struct child_process *cmd)
 	failed_errno = errno;
 	if (cmd->pid < 0 && (!cmd->silent_exec_failure || errno != ENOENT))
 		error_errno("cannot spawn %s", cmd->argv[0]);
-	if ((cmd->clean_on_exit || cmd->wait_on_exit) && cmd->pid >= 0)
-		mark_child_for_cleanup(
-			cmd->pid, cmd->argv[0], cmd->in, cmd->clean_on_exit_timeout);
+	if (cmd->clean_on_exit && cmd->pid >= 0)
+		mark_child_for_cleanup(cmd->pid, cmd);

 	argv_array_clear(&nargv);
 	cmd->argv = sargv;
@@ -804,7 +776,7 @@ int start_async(struct async *async)
 		exit(!!async->proc(proc_in, proc_out, async->data));
 	}

-	mark_child_for_cleanup_no_wait(async->pid);
+	mark_child_for_cleanup(async->pid, NULL);

 	if (need_in)
 		close(fdin[0]);
diff --git a/run-command.h b/run-command.h
index f7b9907..dd1c78c 100644
--- a/run-command.h
+++ b/run-command.h
@@ -42,10 +42,9 @@ struct child_process {
 	unsigned silent_exec_failure:1;
 	unsigned stdout_to_stderr:1;
 	unsigned use_shell:1;
-	 /* kill the child on Git exit */
 	unsigned clean_on_exit:1;
-	/* close the child's stdin on Git exit and wait until it terminates */
-	unsigned wait_on_exit:1;
+	void (*clean_on_exit_handler)(struct child_process *process);
+	void *clean_on_exit_handler_cbdata;
 };

 #define CHILD_PROCESS_INIT { NULL, ARGV_ARRAY_INIT, ARGV_ARRAY_INIT }
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 52b7fe9..9f892c0 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -28,9 +28,7 @@ file_size () {
 filter_git () {
 	rm -f rot13-filter.log &&
 	git "$@" 2>git-stderr.log &&
-	sed '/Waiting for/d' git-stderr.log >git-stderr-clean.log &&
-	test_must_be_empty git-stderr-clean.log &&
-	rm -f git-stderr.log git-stderr-clean.log
+	rm -f git-stderr.log
 }

 # Count unique lines in two files and compare them.
@@ -668,7 +666,7 @@ test_expect_success PERL 'process filter should not be restarted if it signals a
 	)
 '

-test_expect_success PERL 'process filter signals abort once to abort processing of all future files' '
+test_expect_success PERL 'process filter abort stops processing of all further files' '
 	test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
 	rm -rf repo &&
 	mkdir repo &&
@@ -688,6 +686,8 @@ test_expect_success PERL 'process filter signals abort once to abort processing
 		git add . &&
 		rm -f *.r &&

+		# Note: This test assumes that Git filters files in alphabetical
+		# order ("abort.r" before "test.r").
 		filter_git checkout --quiet --no-progress . &&
 		cat >expected.log <<-EOF &&
 			START

--
2.10.0


             reply	other threads:[~2016-10-08 11:25 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-10-08 11:25 larsxschneider [this message]
2016-10-08 11:25 ` [PATCH v10 01/14] convert: quote filter names in error messages larsxschneider
2016-10-08 11:25 ` [PATCH v10 02/14] convert: modernize tests larsxschneider
2016-10-08 11:25 ` [PATCH v10 03/14] run-command: move check_pipe() from write_or_die to run_command larsxschneider
2016-10-08 11:25 ` [PATCH v10 04/14] run-command: add clean_on_exit_handler larsxschneider
2016-10-11 12:12   ` Johannes Schindelin
2016-10-15 15:02     ` Lars Schneider
2016-10-16  8:03       ` Johannes Schindelin
2016-10-16 21:57         ` Lars Schneider
2016-10-08 11:25 ` [PATCH v10 05/14] pkt-line: rename packet_write() to packet_write_fmt() larsxschneider
2016-10-08 11:25 ` [PATCH v10 06/14] pkt-line: extract set_packet_header() larsxschneider
2016-10-08 11:25 ` [PATCH v10 07/14] pkt-line: add packet_write_fmt_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 08/14] pkt-line: add packet_flush_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 09/14] pkt-line: add packet_write_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 10/14] pkt-line: add functions to read/write flush terminated packet streams larsxschneider
2016-10-08 11:25 ` [PATCH v10 11/14] convert: make apply_filter() adhere to standard Git error handling larsxschneider
2016-10-08 11:25 ` [PATCH v10 12/14] convert: prepare filter.<driver>.process option larsxschneider
2016-10-08 11:25 ` [PATCH v10 13/14] convert: add " larsxschneider
2016-10-08 23:06   ` Jakub Narębski
2016-10-09  5:32     ` Torsten Bögershausen
2016-10-11 15:29       ` Lars Schneider
2016-10-11 22:26     ` Lars Schneider
2016-10-12 10:54       ` Jakub Narębski
2016-10-15 14:45         ` Lars Schneider
2016-10-15 17:41           ` Jeff King
2016-10-15 19:42           ` Jakub Narębski
2016-10-10 19:58   ` Junio C Hamano
2016-10-11  8:11     ` Lars Schneider
2016-10-11 10:09       ` Torsten Bögershausen
2016-10-16 23:13         ` Lars Schneider
2016-10-17 17:05         ` Junio C Hamano
2016-10-08 11:25 ` [PATCH v10 14/14] contrib/long-running-filter: add long running filter example larsxschneider
2016-10-09  5:42   ` Torsten Bögershausen
2016-10-15 14:47     ` Lars Schneider

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20161008112530.15506-1-larsxschneider@gmail.com \
    --to=larsxschneider@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=jnareb@gmail.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).