git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Phillip Wood <phillip.wood123@gmail.com>
To: Calvin Wan <calvinwan@google.com>, git@vger.kernel.org
Cc: avarab@gmail.com, chooglen@google.com, newren@gmail.com,
	jonathantanmy@google.com
Subject: Re: [PATCH v7 1/7] run-command: add duplicate_output_fn to run_processes_parallel_opts
Date: Wed, 8 Feb 2023 14:19:55 +0000	[thread overview]
Message-ID: <cd528776-caa8-6372-0ca5-db0545947fba@dunelm.org.uk> (raw)
In-Reply-To: <20230207181706.363453-2-calvinwan@google.com>

Hi Calvin

On 07/02/2023 18:17, Calvin Wan wrote:
> Add duplicate_output_fn as an optionally set function in
> run_process_parallel_opts. If set, output from each child process is
> copied and passed to the callback function whenever output from the
> child process is buffered to allow for separate parsing.
> 
> Signed-off-by: Calvin Wan <calvinwan@google.com>
> ---
>   run-command.c               | 16 ++++++++++++---
>   run-command.h               | 27 +++++++++++++++++++++++++
>   t/helper/test-run-command.c | 21 ++++++++++++++++++++
>   t/t0061-run-command.sh      | 39 +++++++++++++++++++++++++++++++++++++
>   4 files changed, 100 insertions(+), 3 deletions(-)
> 
> diff --git a/run-command.c b/run-command.c
> index 756f1839aa..cad88befe0 100644
> --- a/run-command.c
> +++ b/run-command.c
> @@ -1526,6 +1526,9 @@ static void pp_init(struct parallel_processes *pp,
>   	if (!opts->get_next_task)
>   		BUG("you need to specify a get_next_task function");
>   
> +	if (opts->duplicate_output && opts->ungroup)
> +		BUG("duplicate_output and ungroup are incompatible with each other");
> +
>   	CALLOC_ARRAY(pp->children, n);
>   	if (!opts->ungroup)
>   		CALLOC_ARRAY(pp->pfd, n);
> @@ -1645,14 +1648,21 @@ static void pp_buffer_stderr(struct parallel_processes *pp,
>   	for (size_t i = 0; i < opts->processes; i++) {
>   		if (pp->children[i].state == GIT_CP_WORKING &&
>   		    pp->pfd[i].revents & (POLLIN | POLLHUP)) {
> -			int n = strbuf_read_once(&pp->children[i].err,
> -						 pp->children[i].process.err, 0);
> +			ssize_t n = strbuf_read_once(&pp->children[i].err,
> +						     pp->children[i].process.err, 0);
>   			if (n == 0) {
>   				close(pp->children[i].process.err);
>   				pp->children[i].state = GIT_CP_WAIT_CLEANUP;
> -			} else if (n < 0)
> +			} else if (n < 0) {
>   				if (errno != EAGAIN)
>   					die_errno("read");
> +			} else {
> +				if (opts->duplicate_output)
> +					opts->duplicate_output(&pp->children[i].err,
> +					       strlen(pp->children[i].err.buf) - n,

Looking at how this is used in patch 7 I think it would be better to 
pass a const char*, length pair rather than a struct strbuf*, offset pair.
i.e.
	opts->duplicate_output(pp->children[i].err.buf + 
pp->children[i].err.len - n, n, ...)

That would make it clear that we do not expect duplicate_output() to 
alter the buffer and would avoid the duplicate_output() having to add 
the offset to the start of the buffer to find the new data.

Best Wishes

Phillip


> +					       opts->data,
> +					       pp->children[i].data);
> +			}
>   		}
>   	}
>   }
> diff --git a/run-command.h b/run-command.h
> index 072db56a4d..6dcf999f6c 100644
> --- a/run-command.h
> +++ b/run-command.h
> @@ -408,6 +408,27 @@ typedef int (*start_failure_fn)(struct strbuf *out,
>   				void *pp_cb,
>   				void *pp_task_cb);
>   
> +/**
> + * This callback is called whenever output from a child process is buffered
> + *
> + * See run_processes_parallel() below for a discussion of the "struct
> + * strbuf *out" parameter.
> + *
> + * The offset refers to the number of bytes originally in "out" before
> + * the output from the child process was buffered. Therefore, the buffer
> + * range, "out + buf" to the end of "out", would contain the buffer of
> + * the child process output.
> + *
> + * pp_cb is the callback cookie as passed into run_processes_parallel,
> + * pp_task_cb is the callback cookie as passed into get_next_task_fn.
> + *
> + * This function is incompatible with "ungroup"
> + */
> +typedef void (*duplicate_output_fn)(struct strbuf *out,
> +				    size_t offset,
> +				    void *pp_cb,
> +				    void *pp_task_cb);
> +
>   /**
>    * This callback is called on every child process that finished processing.
>    *
> @@ -461,6 +482,12 @@ struct run_process_parallel_opts
>   	 */
>   	start_failure_fn start_failure;
>   
> +	/**
> +	 * duplicate_output: See duplicate_output_fn() above. This should be
> +	 * NULL unless process specific output is needed
> +	 */
> +	duplicate_output_fn duplicate_output;
> +
>   	/**
>   	 * task_finished: See task_finished_fn() above. This can be
>   	 * NULL to omit any special handling.
> diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c
> index 3ecb830f4a..ffd3cd0045 100644
> --- a/t/helper/test-run-command.c
> +++ b/t/helper/test-run-command.c
> @@ -52,6 +52,21 @@ static int no_job(struct child_process *cp,
>   	return 0;
>   }
>   
> +static void duplicate_output(struct strbuf *out,
> +			size_t offset,
> +			void *pp_cb UNUSED,
> +			void *pp_task_cb UNUSED)
> +{
> +	struct string_list list = STRING_LIST_INIT_DUP;
> +
> +	string_list_split(&list, out->buf + offset, '\n', -1);
> +	for (size_t i = 0; i < list.nr; i++) {
> +		if (strlen(list.items[i].string) > 0)
> +			fprintf(stderr, "duplicate_output: %s\n", list.items[i].string);
> +	}
> +	string_list_clear(&list, 0);
> +}
> +
>   static int task_finished(int result,
>   			 struct strbuf *err,
>   			 void *pp_cb,
> @@ -439,6 +454,12 @@ int cmd__run_command(int argc, const char **argv)
>   		opts.ungroup = 1;
>   	}
>   
> +	if (!strcmp(argv[1], "--duplicate-output")) {
> +		argv += 1;
> +		argc -= 1;
> +		opts.duplicate_output = duplicate_output;
> +	}
> +
>   	jobs = atoi(argv[2]);
>   	strvec_clear(&proc.args);
>   	strvec_pushv(&proc.args, (const char **)argv + 3);
> diff --git a/t/t0061-run-command.sh b/t/t0061-run-command.sh
> index e2411f6a9b..879e536638 100755
> --- a/t/t0061-run-command.sh
> +++ b/t/t0061-run-command.sh
> @@ -135,6 +135,15 @@ test_expect_success 'run_command runs in parallel with more jobs available than
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'run_command runs in parallel with more jobs available than tasks --duplicate-output' '
> +	test-tool run-command --duplicate-output run-command-parallel 5 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
> +	test_must_be_empty out &&
> +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
> +	test 4 = $(grep -c "duplicate_output: World" err) &&
> +	sed "/duplicate_output/d" err > err1 &&
> +	test_cmp expect err1
> +'
> +
>   test_expect_success 'run_command runs ungrouped in parallel with more jobs available than tasks' '
>   	test-tool run-command --ungroup run-command-parallel 5 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
>   	test_line_count = 8 out &&
> @@ -147,6 +156,15 @@ test_expect_success 'run_command runs in parallel with as many jobs as tasks' '
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'run_command runs in parallel with as many jobs as tasks --duplicate-output' '
> +	test-tool run-command --duplicate-output run-command-parallel 4 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
> +	test_must_be_empty out &&
> +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
> +	test 4 = $(grep -c "duplicate_output: World" err) &&
> +	sed "/duplicate_output/d" err > err1 &&
> +	test_cmp expect err1
> +'
> +
>   test_expect_success 'run_command runs ungrouped in parallel with as many jobs as tasks' '
>   	test-tool run-command --ungroup run-command-parallel 4 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
>   	test_line_count = 8 out &&
> @@ -159,6 +177,15 @@ test_expect_success 'run_command runs in parallel with more tasks than jobs avai
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'run_command runs in parallel with more tasks than jobs available --duplicate-output' '
> +	test-tool run-command --duplicate-output run-command-parallel 3 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
> +	test_must_be_empty out &&
> +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
> +	test 4 = $(grep -c "duplicate_output: World" err) &&
> +	sed "/duplicate_output/d" err > err1 &&
> +	test_cmp expect err1
> +'
> +
>   test_expect_success 'run_command runs ungrouped in parallel with more tasks than jobs available' '
>   	test-tool run-command --ungroup run-command-parallel 3 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
>   	test_line_count = 8 out &&
> @@ -180,6 +207,12 @@ test_expect_success 'run_command is asked to abort gracefully' '
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'run_command is asked to abort gracefully --duplicate-output' '
> +	test-tool run-command --duplicate-output run-command-abort 3 false >out 2>err &&
> +	test_must_be_empty out &&
> +	test_cmp expect err
> +'
> +
>   test_expect_success 'run_command is asked to abort gracefully (ungroup)' '
>   	test-tool run-command --ungroup run-command-abort 3 false >out 2>err &&
>   	test_must_be_empty out &&
> @@ -196,6 +229,12 @@ test_expect_success 'run_command outputs ' '
>   	test_cmp expect actual
>   '
>   
> +test_expect_success 'run_command outputs --duplicate-output' '
> +	test-tool run-command --duplicate-output run-command-no-jobs 3 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
> +	test_must_be_empty out &&
> +	test_cmp expect err
> +'
> +
>   test_expect_success 'run_command outputs (ungroup) ' '
>   	test-tool run-command --ungroup run-command-no-jobs 3 sh -c "printf \"%s\n%s\n\" Hello World" >out 2>err &&
>   	test_must_be_empty out &&

  parent reply	other threads:[~2023-02-08 14:21 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <https://lore.kernel.org/git/20221108184200.2813458-1-calvinwan@google.com/>
2023-01-04 21:54 ` [PATCH v5 0/6] submodule: parallelize diff Calvin Wan
2023-01-05 23:23   ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 " Calvin Wan
2023-02-07 18:16     ` [PATCH v7 0/7] " Calvin Wan
2023-02-08  0:55       ` Ævar Arnfjörð Bjarmason
2023-02-09  0:02       ` [PATCH v8 0/6] " Calvin Wan
2023-02-09  1:42         ` Ævar Arnfjörð Bjarmason
2023-02-09 19:50         ` Junio C Hamano
2023-02-09 21:52           ` Calvin Wan
2023-02-09 22:25             ` Junio C Hamano
2023-02-10 13:24             ` Ævar Arnfjörð Bjarmason
2023-02-10 17:42               ` Junio C Hamano
2023-02-09 20:50         ` Phillip Wood
2023-03-02 21:52         ` [PATCH v9 " Calvin Wan
2023-03-02 22:02           ` [PATCH v9 1/6] run-command: add on_stderr_output_fn to run_processes_parallel_opts Calvin Wan
2023-03-02 22:02           ` [PATCH v9 2/6] submodule: rename strbuf variable Calvin Wan
2023-03-03  0:25             ` Junio C Hamano
2023-03-06 17:37               ` Calvin Wan
2023-03-06 18:30                 ` Junio C Hamano
2023-03-06 19:00                   ` Calvin Wan
2023-03-02 22:02           ` [PATCH v9 3/6] submodule: move status parsing into function Calvin Wan
2023-03-17 20:42             ` Glen Choo
2023-03-02 22:02           ` [PATCH v9 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-03-02 22:02           ` [PATCH v9 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-03-02 22:02           ` [PATCH v9 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-03-07  8:41             ` Ævar Arnfjörð Bjarmason
2023-03-07 10:21             ` Ævar Arnfjörð Bjarmason
2023-03-07 17:55               ` Junio C Hamano
2023-03-17  1:09             ` Glen Choo
2023-03-17  2:51               ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-13  6:34         ` Glen Choo
2023-02-13 17:52           ` Junio C Hamano
2023-02-13 18:26             ` Calvin Wan
2023-02-09  0:02       ` [PATCH v8 2/6] submodule: strbuf variable rename Calvin Wan
2023-02-13  8:37         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 3/6] submodule: move status parsing into function Calvin Wan
2023-02-09  0:02       ` [PATCH v8 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-13  7:06         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-09  1:48         ` Ævar Arnfjörð Bjarmason
2023-02-13  8:42         ` Glen Choo
2023-02-13 18:29           ` Calvin Wan
2023-02-14  4:03             ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-13  8:36         ` Glen Choo
2023-02-07 18:17     ` [PATCH v7 1/7] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-07 22:16       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:50         ` Calvin Wan
2023-02-08 14:19       ` Phillip Wood [this message]
2023-02-08 22:54         ` Calvin Wan
2023-02-09 20:37           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 2/7] submodule: strbuf variable rename Calvin Wan
2023-02-07 22:47       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:59         ` Calvin Wan
2023-02-07 18:17     ` [PATCH v7 3/7] submodule: move status parsing into function Calvin Wan
2023-02-07 18:17     ` [PATCH v7 4/7] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-07 22:59       ` Ævar Arnfjörð Bjarmason
2023-02-07 18:17     ` [PATCH v7 5/7] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-08 14:28       ` Phillip Wood
2023-02-08 23:12         ` Calvin Wan
2023-02-09 20:53           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 6/7] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-02-08  8:18       ` Ævar Arnfjörð Bjarmason
2023-02-08 17:07         ` Phillip Wood
2023-02-08 23:13           ` Calvin Wan
2023-02-08 14:22       ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 7/7] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-07 23:06       ` Ævar Arnfjörð Bjarmason
2023-01-17 19:30   ` [PATCH v6 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-17 19:30   ` [PATCH v6 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-17 19:30   ` [PATCH v6 3/6] submodule: move status parsing into function Calvin Wan
2023-01-17 19:30   ` [PATCH v6 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-17 19:30   ` [PATCH v6 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-26  9:09     ` Glen Choo
2023-01-26  9:16     ` Glen Choo
2023-01-26 18:52       ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 6/6] submodule: call parallel code from serial status Calvin Wan
2023-01-26  8:09     ` Glen Choo
2023-01-26  8:45       ` Glen Choo
2023-01-04 21:54 ` [PATCH v5 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-04 21:54 ` [PATCH v5 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-04 21:54 ` [PATCH v5 3/6] submodule: move status parsing into function Calvin Wan
2023-01-04 21:54 ` [PATCH v5 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-04 21:54 ` [PATCH v5 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-04 21:54 ` [PATCH v5 6/6] submodule: call parallel code from serial status Calvin Wan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cd528776-caa8-6372-0ca5-db0545947fba@dunelm.org.uk \
    --to=phillip.wood123@gmail.com \
    --cc=avarab@gmail.com \
    --cc=calvinwan@google.com \
    --cc=chooglen@google.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=newren@gmail.com \
    --cc=phillip.wood@dunelm.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).