From: larsxschneider@gmail.com
To: git@vger.kernel.org
Cc: gitster@pobox.com, jnareb@gmail.com, peff@peff.net,
Lars Schneider <larsxschneider@gmail.com>
Subject: [PATCH v10 00/14] Git filter protocol
Date: Sat, 8 Oct 2016 13:25:16 +0200 [thread overview]
Message-ID: <20161008112530.15506-1-larsxschneider@gmail.com> (raw)
From: Lars Schneider <larsxschneider@gmail.com>
The goal of this series is to avoid launching a new clean/smudge filter
process for each file that is filtered.
A short summary about v1 to v5 can be found here:
https://git.github.io/rev_news/2016/08/17/edition-18/
This series is also published on web:
https://github.com/larsxschneider/git/pull/14
Patches 1 and 2 are cleanups and not strictly necessary for the series.
Patches 3 to 12 are required preparation. Patch 13 is the main patch.
Patch 14 adds an example how to use the Git filter protocol in contrib.
Thanks a lot to
Jakub, Junio, and Peff
for very helpful reviews,
Lars
## Changes since v9
* replace the very specific "wait after close(stdin)" behavior with the
more flexible run-command "clean_on_exit_handler" flag to fix flaky t0021,
see discussion:
http://public-inbox.org/git/xmqq37k9mm7k.fsf@gitster.mtv.corp.google.com/
http://public-inbox.org/git/xmqq8tubitjs.fsf@gitster.mtv.corp.google.com/
* run stop_multi_file_filter() for all filters on Git shutdown
* actually kill filter in kill_multi_file_filter()
* add new filter process to hashmap only if the process start was successful
* remove superfluous fstat() call
* avoid potential buffer overflow in packet_write_gently() packet size check
* remove superfluous buffer in write_packetized_from_buf()
* improve test name
Lars Schneider (14):
convert: quote filter names in error messages
convert: modernize tests
run-command: move check_pipe() from write_or_die to run_command
run-command: add clean_on_exit_handler
pkt-line: rename packet_write() to packet_write_fmt()
pkt-line: extract set_packet_header()
pkt-line: add packet_write_fmt_gently()
pkt-line: add packet_flush_gently()
pkt-line: add packet_write_gently()
pkt-line: add functions to read/write flush terminated packet streams
convert: make apply_filter() adhere to standard Git error handling
convert: prepare filter.<driver>.process option
convert: add filter.<driver>.process option
contrib/long-running-filter: add long running filter example
Documentation/gitattributes.txt | 159 ++++++++++-
builtin/archive.c | 4 +-
builtin/receive-pack.c | 4 +-
builtin/remote-ext.c | 4 +-
builtin/upload-archive.c | 4 +-
connect.c | 2 +-
contrib/long-running-filter/example.pl | 127 +++++++++
convert.c | 372 +++++++++++++++++++++---
daemon.c | 2 +-
http-backend.c | 2 +-
pkt-line.c | 152 +++++++++-
pkt-line.h | 12 +-
run-command.c | 36 ++-
run-command.h | 4 +-
shallow.c | 2 +-
t/t0021-conversion.sh | 505 ++++++++++++++++++++++++++++++---
t/t0021/rot13-filter.pl | 191 +++++++++++++
upload-pack.c | 30 +-
write_or_die.c | 13 -
19 files changed, 1492 insertions(+), 133 deletions(-)
create mode 100755 contrib/long-running-filter/example.pl
create mode 100755 t/t0021/rot13-filter.pl
## Interdiff (v9..v10)
diff --git a/convert.c b/convert.c
index 88581d6..1d89632 100644
--- a/convert.c
+++ b/convert.c
@@ -516,23 +516,6 @@ static struct cmd2process *find_multi_file_filter_entry(struct hashmap *hashmap,
return hashmap_get(hashmap, &key, NULL);
}
-static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry)
-{
- if (!entry)
- return;
- sigchain_push(SIGPIPE, SIG_IGN);
- /*
- * We kill the filter most likely because an error happened already.
- * That's why we are not interested in any error code here.
- */
- close(entry->process.in);
- close(entry->process.out);
- sigchain_pop(SIGPIPE);
- finish_command(&entry->process);
- hashmap_remove(hashmap, entry, NULL);
- free(entry);
-}
-
static int packet_write_list(int fd, const char *line, ...)
{
va_list args;
@@ -552,6 +535,49 @@ static int packet_write_list(int fd, const char *line, ...)
return packet_flush_gently(fd);
}
+static void read_multi_file_filter_status(int fd, struct strbuf *status) {
+ struct strbuf **pair;
+ char *line;
+ for (;;) {
+ line = packet_read_line(fd, NULL);
+ if (!line)
+ break;
+ pair = strbuf_split_str(line, '=', 2);
+ if (pair[0] && pair[0]->len && pair[1]) {
+ /* the last "status=<foo>" line wins */
+ if (!strcmp(pair[0]->buf, "status=")) {
+ strbuf_reset(status);
+ strbuf_addbuf(status, pair[1]);
+ }
+ }
+ strbuf_list_free(pair);
+ }
+}
+
+static void kill_multi_file_filter(struct hashmap *hashmap, struct cmd2process *entry)
+{
+ if (!entry)
+ return;
+
+ entry->process.clean_on_exit = 0;
+ kill(entry->process.pid, SIGTERM);
+ finish_command(&entry->process);
+
+ hashmap_remove(hashmap, entry, NULL);
+ free(entry);
+}
+
+void stop_multi_file_filter(struct child_process *process)
+{
+ sigchain_push(SIGPIPE, SIG_IGN);
+ /* Closing the pipe signals the filter to initiate a shutdown. */
+ close(process->in);
+ close(process->out);
+ sigchain_pop(SIGPIPE);
+ /* Finish command will wait until the shutdown is complete. */
+ finish_command(process);
+}
+
static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, const char *cmd)
{
int err;
@@ -563,7 +589,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
const char *cap_name;
entry = xmalloc(sizeof(*entry));
- hashmap_entry_init(entry, strhash(cmd));
entry->cmd = cmd;
entry->supported_capabilities = 0;
process = &entry->process;
@@ -573,14 +598,16 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
process->use_shell = 1;
process->in = -1;
process->out = -1;
- process->wait_on_exit = 1;
+ process->clean_on_exit = 1;
+ process->clean_on_exit_handler = stop_multi_file_filter;
if (start_command(process)) {
error("cannot fork to run external filter '%s'", cmd);
- kill_multi_file_filter(hashmap, entry);
return NULL;
}
+ hashmap_entry_init(entry, strhash(cmd));
+
sigchain_push(SIGPIPE, SIG_IGN);
err = packet_write_list(process->in, "git-filter-client", "version=2", NULL);
@@ -635,24 +662,6 @@ static struct cmd2process *start_multi_file_filter(struct hashmap *hashmap, cons
return entry;
}
-static void read_multi_file_filter_status(int fd, struct strbuf *status) {
- struct strbuf **pair;
- char *line;
- for (;;) {
- line = packet_read_line(fd, NULL);
- if (!line)
- break;
- pair = strbuf_split_str(line, '=', 2);
- if (pair[0] && pair[0]->len && pair[1]) {
- if (!strcmp(pair[0]->buf, "status=")) {
- strbuf_reset(status);
- strbuf_addbuf(status, pair[1]);
- }
- }
- strbuf_list_free(pair);
- }
-}
-
static int apply_multi_file_filter(const char *path, const char *src, size_t len,
int fd, struct strbuf *dst, const char *cmd,
const unsigned int wanted_capability)
@@ -660,10 +669,9 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
int err;
struct cmd2process *entry;
struct child_process *process;
- struct stat file_stat;
struct strbuf nbuf = STRBUF_INIT;
struct strbuf filter_status = STRBUF_INIT;
- char *filter_type;
+ const char *filter_type;
if (!cmd_process_map_initialized) {
cmd_process_map_initialized = 1;
@@ -692,12 +700,6 @@ static int apply_multi_file_filter(const char *path, const char *src, size_t len
else
die("unexpected filter type");
- if (fd >= 0 && !src) {
- if (fstat(fd, &file_stat) == -1)
- return 0;
- len = xsize_t(file_stat.st_size);
- }
-
sigchain_push(SIGPIPE, SIG_IGN);
assert(strlen(filter_type) < LARGE_PACKET_DATA_MAX - strlen("command=\n"));
diff --git a/pkt-line.c b/pkt-line.c
index b82aaca..0b5125f 100644
--- a/pkt-line.c
+++ b/pkt-line.c
@@ -174,12 +174,13 @@ int packet_write_fmt_gently(int fd, const char *fmt, ...)
static int packet_write_gently(const int fd_out, const char *buf, size_t size)
{
static char packet_write_buffer[LARGE_PACKET_MAX];
- const size_t packet_size = size + 4;
+ size_t packet_size;
- if (packet_size > sizeof(packet_write_buffer))
+ if (size > sizeof(packet_write_buffer) - 4)
return error("packet write failed - data exceeds max packet size");
packet_trace(buf, size, 1);
+ packet_size = size + 4;
set_packet_header(packet_write_buffer, packet_size);
memcpy(packet_write_buffer + 4, buf, size);
if (write_in_full(fd_out, packet_write_buffer, packet_size) == packet_size)
@@ -217,14 +218,13 @@ int write_packetized_from_fd(int fd_in, int fd_out)
int write_packetized_from_buf(const char *src_in, size_t len, int fd_out)
{
- static char buf[LARGE_PACKET_DATA_MAX];
int err = 0;
size_t bytes_written = 0;
size_t bytes_to_write;
while (!err) {
- if ((len - bytes_written) > sizeof(buf))
- bytes_to_write = sizeof(buf);
+ if ((len - bytes_written) > LARGE_PACKET_DATA_MAX)
+ bytes_to_write = LARGE_PACKET_DATA_MAX;
else
bytes_to_write = len - bytes_written;
if (bytes_to_write == 0)
diff --git a/run-command.c b/run-command.c
index 96c54fe..e5fd6ff 100644
--- a/run-command.c
+++ b/run-command.c
@@ -21,9 +21,7 @@ void child_process_clear(struct child_process *child)
struct child_to_clean {
pid_t pid;
- char *name;
- int stdin;
- int wait;
+ struct child_process *process;
struct child_to_clean *next;
};
static struct child_to_clean *children_to_clean;
@@ -31,35 +29,23 @@ static int installed_child_cleanup_handler;
static void cleanup_children(int sig, int in_signal)
{
- int status;
- struct child_to_clean *p = children_to_clean;
-
- /* Close the the child's stdin as indicator that Git will exit soon */
- while (p) {
- if (p->wait)
- if (p->stdin > 0)
- close(p->stdin);
- p = p->next;
- }
-
while (children_to_clean) {
- p = children_to_clean;
+ struct child_to_clean *p = children_to_clean;
children_to_clean = p->next;
- if (p->wait) {
- fprintf(stderr, _("Waiting for '%s' to finish..."), p->name);
- while ((waitpid(p->pid, &status, 0)) < 0 && errno == EINTR)
- ; /* nothing */
- fprintf(stderr, _("done!\n"));
+ if (p->process && !in_signal) {
+ struct child_process *process = p->process;
+ if (process->clean_on_exit_handler) {
+ trace_printf("trace: run_command: running exit handler for pid %d", p->pid);
+ process->clean_on_exit_handler(process);
+ }
}
kill(p->pid, sig);
- if (!in_signal) {
- free(p->name);
+ if (!in_signal)
free(p);
}
}
-}
static void cleanup_children_on_signal(int sig)
{
@@ -73,16 +59,11 @@ static void cleanup_children_on_exit(void)
cleanup_children(SIGTERM, 0);
}
-static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int wait)
+static void mark_child_for_cleanup(pid_t pid, struct child_process *process)
{
struct child_to_clean *p = xmalloc(sizeof(*p));
p->pid = pid;
- p->wait = wait;
- p->stdin = stdin;
- if (name)
- p->name = xstrdup(name);
- else
- p->name = "process";
+ p->process = process;
p->next = children_to_clean;
children_to_clean = p;
@@ -93,13 +74,6 @@ static void mark_child_for_cleanup(pid_t pid, const char *name, int stdin, int w
}
}
-#ifdef NO_PTHREADS
-static void mark_child_for_cleanup_no_wait(pid_t pid, const char *name, int timeout, int stdin)
-{
- mark_child_for_cleanup(pid, NULL, 0, 0);
-}
-#endif
-
static void clear_child_for_cleanup(pid_t pid)
{
struct child_to_clean **pp;
@@ -458,9 +432,8 @@ int start_command(struct child_process *cmd)
}
if (cmd->pid < 0)
error_errno("cannot fork() for %s", cmd->argv[0]);
- else if (cmd->clean_on_exit || cmd->wait_on_exit)
- mark_child_for_cleanup(
- cmd->pid, cmd->argv[0], cmd->in, cmd->wait_on_exit);
+ else if (cmd->clean_on_exit)
+ mark_child_for_cleanup(cmd->pid, cmd);
/*
* Wait for child's execvp. If the execvp succeeds (or if fork()
@@ -520,9 +493,8 @@ int start_command(struct child_process *cmd)
failed_errno = errno;
if (cmd->pid < 0 && (!cmd->silent_exec_failure || errno != ENOENT))
error_errno("cannot spawn %s", cmd->argv[0]);
- if ((cmd->clean_on_exit || cmd->wait_on_exit) && cmd->pid >= 0)
- mark_child_for_cleanup(
- cmd->pid, cmd->argv[0], cmd->in, cmd->clean_on_exit_timeout);
+ if (cmd->clean_on_exit && cmd->pid >= 0)
+ mark_child_for_cleanup(cmd->pid, cmd);
argv_array_clear(&nargv);
cmd->argv = sargv;
@@ -804,7 +776,7 @@ int start_async(struct async *async)
exit(!!async->proc(proc_in, proc_out, async->data));
}
- mark_child_for_cleanup_no_wait(async->pid);
+ mark_child_for_cleanup(async->pid, NULL);
if (need_in)
close(fdin[0]);
diff --git a/run-command.h b/run-command.h
index f7b9907..dd1c78c 100644
--- a/run-command.h
+++ b/run-command.h
@@ -42,10 +42,9 @@ struct child_process {
unsigned silent_exec_failure:1;
unsigned stdout_to_stderr:1;
unsigned use_shell:1;
- /* kill the child on Git exit */
unsigned clean_on_exit:1;
- /* close the child's stdin on Git exit and wait until it terminates */
- unsigned wait_on_exit:1;
+ void (*clean_on_exit_handler)(struct child_process *process);
+ void *clean_on_exit_handler_cbdata;
};
#define CHILD_PROCESS_INIT { NULL, ARGV_ARRAY_INIT, ARGV_ARRAY_INIT }
diff --git a/t/t0021-conversion.sh b/t/t0021-conversion.sh
index 52b7fe9..9f892c0 100755
--- a/t/t0021-conversion.sh
+++ b/t/t0021-conversion.sh
@@ -28,9 +28,7 @@ file_size () {
filter_git () {
rm -f rot13-filter.log &&
git "$@" 2>git-stderr.log &&
- sed '/Waiting for/d' git-stderr.log >git-stderr-clean.log &&
- test_must_be_empty git-stderr-clean.log &&
- rm -f git-stderr.log git-stderr-clean.log
+ rm -f git-stderr.log
}
# Count unique lines in two files and compare them.
@@ -668,7 +666,7 @@ test_expect_success PERL 'process filter should not be restarted if it signals a
)
'
-test_expect_success PERL 'process filter signals abort once to abort processing of all future files' '
+test_expect_success PERL 'process filter abort stops processing of all further files' '
test_config_global filter.protocol.process "$TEST_DIRECTORY/t0021/rot13-filter.pl clean smudge" &&
rm -rf repo &&
mkdir repo &&
@@ -688,6 +686,8 @@ test_expect_success PERL 'process filter signals abort once to abort processing
git add . &&
rm -f *.r &&
+ # Note: This test assumes that Git filters files in alphabetical
+ # order ("abort.r" before "test.r").
filter_git checkout --quiet --no-progress . &&
cat >expected.log <<-EOF &&
START
--
2.10.0
next reply other threads:[~2016-10-08 11:25 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-08 11:25 larsxschneider [this message]
2016-10-08 11:25 ` [PATCH v10 01/14] convert: quote filter names in error messages larsxschneider
2016-10-08 11:25 ` [PATCH v10 02/14] convert: modernize tests larsxschneider
2016-10-08 11:25 ` [PATCH v10 03/14] run-command: move check_pipe() from write_or_die to run_command larsxschneider
2016-10-08 11:25 ` [PATCH v10 04/14] run-command: add clean_on_exit_handler larsxschneider
2016-10-11 12:12 ` Johannes Schindelin
2016-10-15 15:02 ` Lars Schneider
2016-10-16 8:03 ` Johannes Schindelin
2016-10-16 21:57 ` Lars Schneider
2016-10-08 11:25 ` [PATCH v10 05/14] pkt-line: rename packet_write() to packet_write_fmt() larsxschneider
2016-10-08 11:25 ` [PATCH v10 06/14] pkt-line: extract set_packet_header() larsxschneider
2016-10-08 11:25 ` [PATCH v10 07/14] pkt-line: add packet_write_fmt_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 08/14] pkt-line: add packet_flush_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 09/14] pkt-line: add packet_write_gently() larsxschneider
2016-10-08 11:25 ` [PATCH v10 10/14] pkt-line: add functions to read/write flush terminated packet streams larsxschneider
2016-10-08 11:25 ` [PATCH v10 11/14] convert: make apply_filter() adhere to standard Git error handling larsxschneider
2016-10-08 11:25 ` [PATCH v10 12/14] convert: prepare filter.<driver>.process option larsxschneider
2016-10-08 11:25 ` [PATCH v10 13/14] convert: add " larsxschneider
2016-10-08 23:06 ` Jakub Narębski
2016-10-09 5:32 ` Torsten Bögershausen
2016-10-11 15:29 ` Lars Schneider
2016-10-11 22:26 ` Lars Schneider
2016-10-12 10:54 ` Jakub Narębski
2016-10-15 14:45 ` Lars Schneider
2016-10-15 17:41 ` Jeff King
2016-10-15 19:42 ` Jakub Narębski
2016-10-10 19:58 ` Junio C Hamano
2016-10-11 8:11 ` Lars Schneider
2016-10-11 10:09 ` Torsten Bögershausen
2016-10-16 23:13 ` Lars Schneider
2016-10-17 17:05 ` Junio C Hamano
2016-10-08 11:25 ` [PATCH v10 14/14] contrib/long-running-filter: add long running filter example larsxschneider
2016-10-09 5:42 ` Torsten Bögershausen
2016-10-15 14:47 ` Lars Schneider
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161008112530.15506-1-larsxschneider@gmail.com \
--to=larsxschneider@gmail.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jnareb@gmail.com \
--cc=peff@peff.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).