git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Calvin Wan <calvinwan@google.com>
To: git@vger.kernel.org
Cc: Calvin Wan <calvinwan@google.com>,
	avarab@gmail.com, chooglen@google.com, newren@gmail.com,
	jonathantanmy@google.com, phillip.wood123@gmail.com
Subject: [PATCH v8 0/6] submodule: parallelize diff
Date: Thu,  9 Feb 2023 00:02:06 +0000	[thread overview]
Message-ID: <20230209000212.1892457-1-calvinwan@google.com> (raw)
In-Reply-To: <20230207181706.363453-1-calvinwan@google.com>

Original cover letter for context:
https://lore.kernel.org/git/20221011232604.839941-1-calvinwan@google.com/

This reroll contains stylistic changes suggested by Avar and Phillip,
and includes a range-diff below.

Calvin Wan (6):
  run-command: add duplicate_output_fn to run_processes_parallel_opts
  submodule: strbuf variable rename
  submodule: move status parsing into function
  submodule: refactor is_submodule_modified()
  diff-lib: refactor out diff_change logic
  diff-lib: parallelize run_diff_files for submodules

 Documentation/config/submodule.txt |  12 ++
 diff-lib.c                         | 133 +++++++++++----
 run-command.c                      |  16 +-
 run-command.h                      |  25 +++
 submodule.c                        | 266 ++++++++++++++++++++++++-----
 submodule.h                        |   9 +
 t/helper/test-run-command.c        |  20 +++
 t/t0061-run-command.sh             |  39 +++++
 t/t4027-diff-submodule.sh          |  31 ++++
 t/t7506-status-submodule.sh        |  25 +++
 10 files changed, 497 insertions(+), 79 deletions(-)

Range-diff against v7:
1:  311b1abfbe ! 1:  5d51250c67 run-command: add duplicate_output_fn to run_processes_parallel_opts
    @@ run-command.c: static void pp_init(struct parallel_processes *pp,
      	if (!opts->get_next_task)
      		BUG("you need to specify a get_next_task function");
      
    -+	if (opts->duplicate_output && opts->ungroup)
    -+		BUG("duplicate_output and ungroup are incompatible with each other");
    ++	if (opts->ungroup) {
    ++		if (opts->duplicate_output)
    ++			BUG("duplicate_output and ungroup are incompatible with each other");
    ++	}
     +
      	CALLOC_ARRAY(pp->children, n);
      	if (!opts->ungroup)
    @@ run-command.c: static void pp_buffer_stderr(struct parallel_processes *pp,
     +			} else if (n < 0) {
      				if (errno != EAGAIN)
      					die_errno("read");
    -+			} else {
    -+				if (opts->duplicate_output)
    -+					opts->duplicate_output(&pp->children[i].err,
    -+					       strlen(pp->children[i].err.buf) - n,
    -+					       opts->data,
    -+					       pp->children[i].data);
    ++			} else if (opts->duplicate_output) {
    ++				opts->duplicate_output(&pp->children[i].err,
    ++					pp->children[i].err.len - n,
    ++					opts->data, pp->children[i].data);
     +			}
      		}
      	}
    @@ run-command.h: typedef int (*start_failure_fn)(struct strbuf *out,
     + *
     + * This function is incompatible with "ungroup"
     + */
    -+typedef void (*duplicate_output_fn)(struct strbuf *out,
    -+				    size_t offset,
    -+				    void *pp_cb,
    -+				    void *pp_task_cb);
    ++typedef void (*duplicate_output_fn)(struct strbuf *out, size_t offset,
    ++				    void *pp_cb, void *pp_task_cb);
     +
      /**
       * This callback is called on every child process that finished processing.
    @@ run-command.h: struct run_process_parallel_opts
      	start_failure_fn start_failure;
      
     +	/**
    -+	 * duplicate_output: See duplicate_output_fn() above. This should be
    -+	 * NULL unless process specific output is needed
    ++	 * duplicate_output: See duplicate_output_fn() above. Unless you need
    ++	 * to capture output from child processes, leave this as NULL.
     +	 */
     +	duplicate_output_fn duplicate_output;
     +
    @@ t/helper/test-run-command.c: static int no_job(struct child_process *cp,
     +			void *pp_task_cb UNUSED)
     +{
     +	struct string_list list = STRING_LIST_INIT_DUP;
    ++	struct string_list_item *item;
     +
     +	string_list_split(&list, out->buf + offset, '\n', -1);
    -+	for (size_t i = 0; i < list.nr; i++) {
    -+		if (strlen(list.items[i].string) > 0)
    -+			fprintf(stderr, "duplicate_output: %s\n", list.items[i].string);
    -+	}
    ++	for_each_string_list_item(item, &list)
    ++		fprintf(stderr, "duplicate_output: %s\n", item->string);
     +	string_list_clear(&list, 0);
     +}
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with a
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
    @@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
     +	test_must_be_empty out &&
     +	test 4 = $(grep -c "duplicate_output: Hello" err) &&
     +	test 4 = $(grep -c "duplicate_output: World" err) &&
    -+	sed "/duplicate_output/d" err > err1 &&
    ++	sed "/duplicate_output/d" err >err1 &&
     +	test_cmp expect err1
     +'
     +
2:  d00a18dd84 = 2:  6ded5b6788 submodule: strbuf variable rename
3:  dcda518922 = 3:  0c71cea8cd submodule: move status parsing into function
4:  c6fc5ba13b ! 4:  5c8cc93f9f submodule: refactor is_submodule_modified()
    @@ submodule.c: static int config_update_recurse_submodules = RECURSE_SUBMODULES_OF
      static int initialized_fetch_ref_tips;
      static struct oid_array ref_tips_before_fetch;
      static struct oid_array ref_tips_after_fetch;
    -+static const char *status_porcelain_start_error =
    -+	N_("could not run 'git status --porcelain=2' in submodule %s");
    -+static const char *status_porcelain_fail_error =
    -+	N_("'git status --porcelain=2' failed in submodule %s");
    ++#define STATUS_PORCELAIN_START_ERROR \
    ++	N_("could not run 'git status --porcelain=2' in submodule %s")
    ++#define STATUS_PORCELAIN_FAIL_ERROR \
    ++	N_("'git status --porcelain=2' failed in submodule %s")
      
      /*
       * Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	prepare_status_porcelain(&cp, path, ignore_untracked);
      	if (start_command(&cp))
     -		die(_("Could not run 'git status --porcelain=2' in submodule %s"), path);
    -+		die(_(status_porcelain_start_error), path);
    ++		die(_(STATUS_PORCELAIN_START_ERROR), path);
      
      	fp = xfdopen(cp.out, "r");
      	while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
      
      	if (finish_command(&cp) && !ignore_cp_exit_code)
     -		die(_("'git status --porcelain=2' failed in submodule %s"), path);
    -+		die(_(status_porcelain_fail_error), path);
    ++		die(_(STATUS_PORCELAIN_FAIL_ERROR), path);
      
      	strbuf_release(&buf);
      	return dirty_submodule;
5:  1ea8eae9c9 = 5:  6c2b62abc8 diff-lib: refactor out diff_change logic
6:  0d35fcc38d < -:  ---------- diff-lib: refactor match_stat_with_submodule
7:  fd1eec974d ! 6:  bb25dadbe5 diff-lib: parallelize run_diff_files for submodules
    @@ diff-lib.c: static int check_removed(const struct index_state *istate, const str
     +				     unsigned *ignore_untracked)
      {
      	int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option);
    - 	struct diff_flags orig_flags;
    +-	if (S_ISGITLINK(ce->ce_mode)) {
    +-		struct diff_flags orig_flags = diffopt->flags;
    +-		if (!diffopt->flags.override_submodule_config)
    +-			set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    +-		if (diffopt->flags.ignore_submodules)
    +-			changed = 0;
    +-		else if (!diffopt->flags.ignore_dirty_submodules &&
    +-			 (!changed || diffopt->flags.dirty_submodules))
    ++	struct diff_flags orig_flags;
     +	int defer = 0;
    - 
    - 	if (!S_ISGITLINK(ce->ce_mode))
    --		return changed;
    ++
    ++	if (!S_ISGITLINK(ce->ce_mode))
     +		goto ret;
    - 
    - 	orig_flags = diffopt->flags;
    - 	if (!diffopt->flags.override_submodule_config)
    -@@ diff-lib.c: static int match_stat_with_submodule(struct diff_options *diffopt,
    - 		goto cleanup;
    - 	}
    - 	if (!diffopt->flags.ignore_dirty_submodules &&
    --	    (!changed || diffopt->flags.dirty_submodules))
    --		*dirty_submodule = is_submodule_modified(ce->name,
    ++
    ++	orig_flags = diffopt->flags;
    ++	if (!diffopt->flags.override_submodule_config)
    ++		set_diffopt_flags_from_submodule_config(diffopt, ce->name);
    ++	if (diffopt->flags.ignore_submodules) {
    ++		changed = 0;
    ++		goto cleanup;
    ++	}
    ++	if (!diffopt->flags.ignore_dirty_submodules &&
     +	    (!changed || diffopt->flags.dirty_submodules)) {
     +		if (defer_submodule_status && *defer_submodule_status) {
     +			defer = 1;
     +			*ignore_untracked = diffopt->flags.ignore_untracked_in_submodules;
     +		} else {
    -+			*dirty_submodule = is_submodule_modified(ce->name,
    - 					 diffopt->flags.ignore_untracked_in_submodules);
    + 			*dirty_submodule = is_submodule_modified(ce->name,
    +-								 diffopt->flags.ignore_untracked_in_submodules);
    +-		diffopt->flags = orig_flags;
    ++					 diffopt->flags.ignore_untracked_in_submodules);
     +		}
    -+	}
    - cleanup:
    - 	diffopt->flags = orig_flags;
    + 	}
    ++cleanup:
    ++	diffopt->flags = orig_flags;
     +ret:
     +	if (defer_submodule_status)
     +		*defer_submodule_status = defer;
    @@ diff-lib.c: int run_diff_files(struct rev_info *revs, unsigned int option)
      				       changed, istate, ce))
      			continue;
      	}
    -+	if (submodules.nr > 0) {
    -+		int parallel_jobs;
    -+		if (git_config_get_int("submodule.diffjobs", &parallel_jobs))
    ++	if (submodules.nr) {
    ++		unsigned long parallel_jobs;
    ++		struct string_list_item *item;
    ++
    ++		if (git_config_get_ulong("submodule.diffjobs", &parallel_jobs))
     +			parallel_jobs = 1;
     +		else if (!parallel_jobs)
     +			parallel_jobs = online_cpus();
    -+		else if (parallel_jobs < 0)
    -+			die(_("submodule.diffjobs cannot be negative"));
     +
     +		if (get_submodules_status(&submodules, parallel_jobs))
     +			die(_("submodule status failed"));
    -+		for (size_t i = 0; i < submodules.nr; i++) {
    -+			struct submodule_status_util *util = submodules.items[i].util;
    ++		for_each_string_list_item(item, &submodules) {
    ++			struct submodule_status_util *util = item->util;
     +
     +			if (diff_change_helper(&revs->diffopt, util->newmode,
     +				       util->dirty_submodule, util->changed,
    @@ submodule.c: int submodule_touches_in_range(struct repository *r,
     +	int result;
     +
     +	struct string_list *submodule_names;
    -+
    -+	/* Pending statuses by OIDs */
    -+	struct status_task **oid_status_tasks;
    -+	int oid_status_tasks_nr, oid_status_tasks_alloc;
     +};
     +
      struct submodule_parallel_fetch {
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +	struct status_task *task = task_cb;
     +
     +	sps->result = 1;
    -+	strbuf_addf(err,
    -+	    _(status_porcelain_start_error),
    -+	    task->path);
    ++	strbuf_addf(err, _(STATUS_PORCELAIN_START_ERROR), task->path);
     +	return 0;
     +}
     +
    @@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
     +
     +	if (retvalue) {
     +		sps->result = 1;
    -+		strbuf_addf(err,
    -+		    _(status_porcelain_fail_error),
    -+		    task->path);
    ++		strbuf_addf(err, _(STATUS_PORCELAIN_FAIL_ERROR), task->path);
     +	}
     +
     +	parse_status_porcelain_strbuf(&task->out,
-- 
2.39.1.519.gcb327c4b5f-goog


  parent reply	other threads:[~2023-02-09  0:03 UTC|newest]

Thread overview: 86+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <https://lore.kernel.org/git/20221108184200.2813458-1-calvinwan@google.com/>
2023-01-04 21:54 ` [PATCH v5 0/6] submodule: parallelize diff Calvin Wan
2023-01-05 23:23   ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 " Calvin Wan
2023-02-07 18:16     ` [PATCH v7 0/7] " Calvin Wan
2023-02-08  0:55       ` Ævar Arnfjörð Bjarmason
2023-02-09  0:02       ` Calvin Wan [this message]
2023-02-09  1:42         ` [PATCH v8 0/6] " Ævar Arnfjörð Bjarmason
2023-02-09 19:50         ` Junio C Hamano
2023-02-09 21:52           ` Calvin Wan
2023-02-09 22:25             ` Junio C Hamano
2023-02-10 13:24             ` Ævar Arnfjörð Bjarmason
2023-02-10 17:42               ` Junio C Hamano
2023-02-09 20:50         ` Phillip Wood
2023-03-02 21:52         ` [PATCH v9 " Calvin Wan
2023-03-02 22:02           ` [PATCH v9 1/6] run-command: add on_stderr_output_fn to run_processes_parallel_opts Calvin Wan
2023-03-02 22:02           ` [PATCH v9 2/6] submodule: rename strbuf variable Calvin Wan
2023-03-03  0:25             ` Junio C Hamano
2023-03-06 17:37               ` Calvin Wan
2023-03-06 18:30                 ` Junio C Hamano
2023-03-06 19:00                   ` Calvin Wan
2023-03-02 22:02           ` [PATCH v9 3/6] submodule: move status parsing into function Calvin Wan
2023-03-17 20:42             ` Glen Choo
2023-03-02 22:02           ` [PATCH v9 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-03-02 22:02           ` [PATCH v9 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-03-02 22:02           ` [PATCH v9 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-03-07  8:41             ` Ævar Arnfjörð Bjarmason
2023-03-07 10:21             ` Ævar Arnfjörð Bjarmason
2023-03-07 17:55               ` Junio C Hamano
2023-03-17  1:09             ` Glen Choo
2023-03-17  2:51               ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-13  6:34         ` Glen Choo
2023-02-13 17:52           ` Junio C Hamano
2023-02-13 18:26             ` Calvin Wan
2023-02-09  0:02       ` [PATCH v8 2/6] submodule: strbuf variable rename Calvin Wan
2023-02-13  8:37         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 3/6] submodule: move status parsing into function Calvin Wan
2023-02-09  0:02       ` [PATCH v8 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-13  7:06         ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-09  1:48         ` Ævar Arnfjörð Bjarmason
2023-02-13  8:42         ` Glen Choo
2023-02-13 18:29           ` Calvin Wan
2023-02-14  4:03             ` Glen Choo
2023-02-09  0:02       ` [PATCH v8 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-13  8:36         ` Glen Choo
2023-02-07 18:17     ` [PATCH v7 1/7] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-07 22:16       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:50         ` Calvin Wan
2023-02-08 14:19       ` Phillip Wood
2023-02-08 22:54         ` Calvin Wan
2023-02-09 20:37           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 2/7] submodule: strbuf variable rename Calvin Wan
2023-02-07 22:47       ` Ævar Arnfjörð Bjarmason
2023-02-08 22:59         ` Calvin Wan
2023-02-07 18:17     ` [PATCH v7 3/7] submodule: move status parsing into function Calvin Wan
2023-02-07 18:17     ` [PATCH v7 4/7] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-07 22:59       ` Ævar Arnfjörð Bjarmason
2023-02-07 18:17     ` [PATCH v7 5/7] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-08 14:28       ` Phillip Wood
2023-02-08 23:12         ` Calvin Wan
2023-02-09 20:53           ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 6/7] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-02-08  8:18       ` Ævar Arnfjörð Bjarmason
2023-02-08 17:07         ` Phillip Wood
2023-02-08 23:13           ` Calvin Wan
2023-02-08 14:22       ` Phillip Wood
2023-02-07 18:17     ` [PATCH v7 7/7] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-07 23:06       ` Ævar Arnfjörð Bjarmason
2023-01-17 19:30   ` [PATCH v6 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-17 19:30   ` [PATCH v6 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-17 19:30   ` [PATCH v6 3/6] submodule: move status parsing into function Calvin Wan
2023-01-17 19:30   ` [PATCH v6 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-17 19:30   ` [PATCH v6 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-26  9:09     ` Glen Choo
2023-01-26  9:16     ` Glen Choo
2023-01-26 18:52       ` Calvin Wan
2023-01-17 19:30   ` [PATCH v6 6/6] submodule: call parallel code from serial status Calvin Wan
2023-01-26  8:09     ` Glen Choo
2023-01-26  8:45       ` Glen Choo
2023-01-04 21:54 ` [PATCH v5 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-04 21:54 ` [PATCH v5 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-04 21:54 ` [PATCH v5 3/6] submodule: move status parsing into function Calvin Wan
2023-01-04 21:54 ` [PATCH v5 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-04 21:54 ` [PATCH v5 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-04 21:54 ` [PATCH v5 6/6] submodule: call parallel code from serial status Calvin Wan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230209000212.1892457-1-calvinwan@google.com \
    --to=calvinwan@google.com \
    --cc=avarab@gmail.com \
    --cc=chooglen@google.com \
    --cc=git@vger.kernel.org \
    --cc=jonathantanmy@google.com \
    --cc=newren@gmail.com \
    --cc=phillip.wood123@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).