From: Calvin Wan <calvinwan@google.com>
To: git@vger.kernel.org
Cc: Calvin Wan <calvinwan@google.com>,
avarab@gmail.com, chooglen@google.com, newren@gmail.com,
jonathantanmy@google.com, phillip.wood123@gmail.com
Subject: [PATCH v8 0/6] submodule: parallelize diff
Date: Thu, 9 Feb 2023 00:02:06 +0000 [thread overview]
Message-ID: <20230209000212.1892457-1-calvinwan@google.com> (raw)
In-Reply-To: <20230207181706.363453-1-calvinwan@google.com>
Original cover letter for context:
https://lore.kernel.org/git/20221011232604.839941-1-calvinwan@google.com/
This reroll contains stylistic changes suggested by Avar and Phillip,
and includes a range-diff below.
Calvin Wan (6):
run-command: add duplicate_output_fn to run_processes_parallel_opts
submodule: strbuf variable rename
submodule: move status parsing into function
submodule: refactor is_submodule_modified()
diff-lib: refactor out diff_change logic
diff-lib: parallelize run_diff_files for submodules
Documentation/config/submodule.txt | 12 ++
diff-lib.c | 133 +++++++++++----
run-command.c | 16 +-
run-command.h | 25 +++
submodule.c | 266 ++++++++++++++++++++++++-----
submodule.h | 9 +
t/helper/test-run-command.c | 20 +++
t/t0061-run-command.sh | 39 +++++
t/t4027-diff-submodule.sh | 31 ++++
t/t7506-status-submodule.sh | 25 +++
10 files changed, 497 insertions(+), 79 deletions(-)
Range-diff against v7:
1: 311b1abfbe ! 1: 5d51250c67 run-command: add duplicate_output_fn to run_processes_parallel_opts
@@ run-command.c: static void pp_init(struct parallel_processes *pp,
if (!opts->get_next_task)
BUG("you need to specify a get_next_task function");
-+ if (opts->duplicate_output && opts->ungroup)
-+ BUG("duplicate_output and ungroup are incompatible with each other");
++ if (opts->ungroup) {
++ if (opts->duplicate_output)
++ BUG("duplicate_output and ungroup are incompatible with each other");
++ }
+
CALLOC_ARRAY(pp->children, n);
if (!opts->ungroup)
@@ run-command.c: static void pp_buffer_stderr(struct parallel_processes *pp,
+ } else if (n < 0) {
if (errno != EAGAIN)
die_errno("read");
-+ } else {
-+ if (opts->duplicate_output)
-+ opts->duplicate_output(&pp->children[i].err,
-+ strlen(pp->children[i].err.buf) - n,
-+ opts->data,
-+ pp->children[i].data);
++ } else if (opts->duplicate_output) {
++ opts->duplicate_output(&pp->children[i].err,
++ pp->children[i].err.len - n,
++ opts->data, pp->children[i].data);
+ }
}
}
@@ run-command.h: typedef int (*start_failure_fn)(struct strbuf *out,
+ *
+ * This function is incompatible with "ungroup"
+ */
-+typedef void (*duplicate_output_fn)(struct strbuf *out,
-+ size_t offset,
-+ void *pp_cb,
-+ void *pp_task_cb);
++typedef void (*duplicate_output_fn)(struct strbuf *out, size_t offset,
++ void *pp_cb, void *pp_task_cb);
+
/**
* This callback is called on every child process that finished processing.
@@ run-command.h: struct run_process_parallel_opts
start_failure_fn start_failure;
+ /**
-+ * duplicate_output: See duplicate_output_fn() above. This should be
-+ * NULL unless process specific output is needed
++ * duplicate_output: See duplicate_output_fn() above. Unless you need
++ * to capture output from child processes, leave this as NULL.
+ */
+ duplicate_output_fn duplicate_output;
+
@@ t/helper/test-run-command.c: static int no_job(struct child_process *cp,
+ void *pp_task_cb UNUSED)
+{
+ struct string_list list = STRING_LIST_INIT_DUP;
++ struct string_list_item *item;
+
+ string_list_split(&list, out->buf + offset, '\n', -1);
-+ for (size_t i = 0; i < list.nr; i++) {
-+ if (strlen(list.items[i].string) > 0)
-+ fprintf(stderr, "duplicate_output: %s\n", list.items[i].string);
-+ }
++ for_each_string_list_item(item, &list)
++ fprintf(stderr, "duplicate_output: %s\n", item->string);
+ string_list_clear(&list, 0);
+}
+
@@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
+ test_must_be_empty out &&
+ test 4 = $(grep -c "duplicate_output: Hello" err) &&
+ test 4 = $(grep -c "duplicate_output: World" err) &&
-+ sed "/duplicate_output/d" err > err1 &&
++ sed "/duplicate_output/d" err >err1 &&
+ test_cmp expect err1
+'
+
@@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with a
+ test_must_be_empty out &&
+ test 4 = $(grep -c "duplicate_output: Hello" err) &&
+ test 4 = $(grep -c "duplicate_output: World" err) &&
-+ sed "/duplicate_output/d" err > err1 &&
++ sed "/duplicate_output/d" err >err1 &&
+ test_cmp expect err1
+'
+
@@ t/t0061-run-command.sh: test_expect_success 'run_command runs in parallel with m
+ test_must_be_empty out &&
+ test 4 = $(grep -c "duplicate_output: Hello" err) &&
+ test 4 = $(grep -c "duplicate_output: World" err) &&
-+ sed "/duplicate_output/d" err > err1 &&
++ sed "/duplicate_output/d" err >err1 &&
+ test_cmp expect err1
+'
+
2: d00a18dd84 = 2: 6ded5b6788 submodule: strbuf variable rename
3: dcda518922 = 3: 0c71cea8cd submodule: move status parsing into function
4: c6fc5ba13b ! 4: 5c8cc93f9f submodule: refactor is_submodule_modified()
@@ submodule.c: static int config_update_recurse_submodules = RECURSE_SUBMODULES_OF
static int initialized_fetch_ref_tips;
static struct oid_array ref_tips_before_fetch;
static struct oid_array ref_tips_after_fetch;
-+static const char *status_porcelain_start_error =
-+ N_("could not run 'git status --porcelain=2' in submodule %s");
-+static const char *status_porcelain_fail_error =
-+ N_("'git status --porcelain=2' failed in submodule %s");
++#define STATUS_PORCELAIN_START_ERROR \
++ N_("could not run 'git status --porcelain=2' in submodule %s")
++#define STATUS_PORCELAIN_FAIL_ERROR \
++ N_("'git status --porcelain=2' failed in submodule %s")
/*
* Check if the .gitmodules file is unmerged. Parsing of the .gitmodules file
@@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
+ prepare_status_porcelain(&cp, path, ignore_untracked);
if (start_command(&cp))
- die(_("Could not run 'git status --porcelain=2' in submodule %s"), path);
-+ die(_(status_porcelain_start_error), path);
++ die(_(STATUS_PORCELAIN_START_ERROR), path);
fp = xfdopen(cp.out, "r");
while (strbuf_getwholeline(&buf, fp, '\n') != EOF) {
@@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
if (finish_command(&cp) && !ignore_cp_exit_code)
- die(_("'git status --porcelain=2' failed in submodule %s"), path);
-+ die(_(status_porcelain_fail_error), path);
++ die(_(STATUS_PORCELAIN_FAIL_ERROR), path);
strbuf_release(&buf);
return dirty_submodule;
5: 1ea8eae9c9 = 5: 6c2b62abc8 diff-lib: refactor out diff_change logic
6: 0d35fcc38d < -: ---------- diff-lib: refactor match_stat_with_submodule
7: fd1eec974d ! 6: bb25dadbe5 diff-lib: parallelize run_diff_files for submodules
@@ diff-lib.c: static int check_removed(const struct index_state *istate, const str
+ unsigned *ignore_untracked)
{
int changed = ie_match_stat(diffopt->repo->index, ce, st, ce_option);
- struct diff_flags orig_flags;
+- if (S_ISGITLINK(ce->ce_mode)) {
+- struct diff_flags orig_flags = diffopt->flags;
+- if (!diffopt->flags.override_submodule_config)
+- set_diffopt_flags_from_submodule_config(diffopt, ce->name);
+- if (diffopt->flags.ignore_submodules)
+- changed = 0;
+- else if (!diffopt->flags.ignore_dirty_submodules &&
+- (!changed || diffopt->flags.dirty_submodules))
++ struct diff_flags orig_flags;
+ int defer = 0;
-
- if (!S_ISGITLINK(ce->ce_mode))
-- return changed;
++
++ if (!S_ISGITLINK(ce->ce_mode))
+ goto ret;
-
- orig_flags = diffopt->flags;
- if (!diffopt->flags.override_submodule_config)
-@@ diff-lib.c: static int match_stat_with_submodule(struct diff_options *diffopt,
- goto cleanup;
- }
- if (!diffopt->flags.ignore_dirty_submodules &&
-- (!changed || diffopt->flags.dirty_submodules))
-- *dirty_submodule = is_submodule_modified(ce->name,
++
++ orig_flags = diffopt->flags;
++ if (!diffopt->flags.override_submodule_config)
++ set_diffopt_flags_from_submodule_config(diffopt, ce->name);
++ if (diffopt->flags.ignore_submodules) {
++ changed = 0;
++ goto cleanup;
++ }
++ if (!diffopt->flags.ignore_dirty_submodules &&
+ (!changed || diffopt->flags.dirty_submodules)) {
+ if (defer_submodule_status && *defer_submodule_status) {
+ defer = 1;
+ *ignore_untracked = diffopt->flags.ignore_untracked_in_submodules;
+ } else {
-+ *dirty_submodule = is_submodule_modified(ce->name,
- diffopt->flags.ignore_untracked_in_submodules);
+ *dirty_submodule = is_submodule_modified(ce->name,
+- diffopt->flags.ignore_untracked_in_submodules);
+- diffopt->flags = orig_flags;
++ diffopt->flags.ignore_untracked_in_submodules);
+ }
-+ }
- cleanup:
- diffopt->flags = orig_flags;
+ }
++cleanup:
++ diffopt->flags = orig_flags;
+ret:
+ if (defer_submodule_status)
+ *defer_submodule_status = defer;
@@ diff-lib.c: int run_diff_files(struct rev_info *revs, unsigned int option)
changed, istate, ce))
continue;
}
-+ if (submodules.nr > 0) {
-+ int parallel_jobs;
-+ if (git_config_get_int("submodule.diffjobs", ¶llel_jobs))
++ if (submodules.nr) {
++ unsigned long parallel_jobs;
++ struct string_list_item *item;
++
++ if (git_config_get_ulong("submodule.diffjobs", ¶llel_jobs))
+ parallel_jobs = 1;
+ else if (!parallel_jobs)
+ parallel_jobs = online_cpus();
-+ else if (parallel_jobs < 0)
-+ die(_("submodule.diffjobs cannot be negative"));
+
+ if (get_submodules_status(&submodules, parallel_jobs))
+ die(_("submodule status failed"));
-+ for (size_t i = 0; i < submodules.nr; i++) {
-+ struct submodule_status_util *util = submodules.items[i].util;
++ for_each_string_list_item(item, &submodules) {
++ struct submodule_status_util *util = item->util;
+
+ if (diff_change_helper(&revs->diffopt, util->newmode,
+ util->dirty_submodule, util->changed,
@@ submodule.c: int submodule_touches_in_range(struct repository *r,
+ int result;
+
+ struct string_list *submodule_names;
-+
-+ /* Pending statuses by OIDs */
-+ struct status_task **oid_status_tasks;
-+ int oid_status_tasks_nr, oid_status_tasks_alloc;
+};
+
struct submodule_parallel_fetch {
@@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
+ struct status_task *task = task_cb;
+
+ sps->result = 1;
-+ strbuf_addf(err,
-+ _(status_porcelain_start_error),
-+ task->path);
++ strbuf_addf(err, _(STATUS_PORCELAIN_START_ERROR), task->path);
+ return 0;
+}
+
@@ submodule.c: unsigned is_submodule_modified(const char *path, int ignore_untrack
+
+ if (retvalue) {
+ sps->result = 1;
-+ strbuf_addf(err,
-+ _(status_porcelain_fail_error),
-+ task->path);
++ strbuf_addf(err, _(STATUS_PORCELAIN_FAIL_ERROR), task->path);
+ }
+
+ parse_status_porcelain_strbuf(&task->out,
--
2.39.1.519.gcb327c4b5f-goog
next prev parent reply other threads:[~2023-02-09 0:03 UTC|newest]
Thread overview: 86+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <https://lore.kernel.org/git/20221108184200.2813458-1-calvinwan@google.com/>
2023-01-04 21:54 ` [PATCH v5 0/6] submodule: parallelize diff Calvin Wan
2023-01-05 23:23 ` Calvin Wan
2023-01-17 19:30 ` [PATCH v6 " Calvin Wan
2023-02-07 18:16 ` [PATCH v7 0/7] " Calvin Wan
2023-02-08 0:55 ` Ævar Arnfjörð Bjarmason
2023-02-09 0:02 ` Calvin Wan [this message]
2023-02-09 1:42 ` [PATCH v8 0/6] " Ævar Arnfjörð Bjarmason
2023-02-09 19:50 ` Junio C Hamano
2023-02-09 21:52 ` Calvin Wan
2023-02-09 22:25 ` Junio C Hamano
2023-02-10 13:24 ` Ævar Arnfjörð Bjarmason
2023-02-10 17:42 ` Junio C Hamano
2023-02-09 20:50 ` Phillip Wood
2023-03-02 21:52 ` [PATCH v9 " Calvin Wan
2023-03-02 22:02 ` [PATCH v9 1/6] run-command: add on_stderr_output_fn to run_processes_parallel_opts Calvin Wan
2023-03-02 22:02 ` [PATCH v9 2/6] submodule: rename strbuf variable Calvin Wan
2023-03-03 0:25 ` Junio C Hamano
2023-03-06 17:37 ` Calvin Wan
2023-03-06 18:30 ` Junio C Hamano
2023-03-06 19:00 ` Calvin Wan
2023-03-02 22:02 ` [PATCH v9 3/6] submodule: move status parsing into function Calvin Wan
2023-03-17 20:42 ` Glen Choo
2023-03-02 22:02 ` [PATCH v9 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-03-02 22:02 ` [PATCH v9 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-03-02 22:02 ` [PATCH v9 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-03-07 8:41 ` Ævar Arnfjörð Bjarmason
2023-03-07 10:21 ` Ævar Arnfjörð Bjarmason
2023-03-07 17:55 ` Junio C Hamano
2023-03-17 1:09 ` Glen Choo
2023-03-17 2:51 ` Glen Choo
2023-02-09 0:02 ` [PATCH v8 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-13 6:34 ` Glen Choo
2023-02-13 17:52 ` Junio C Hamano
2023-02-13 18:26 ` Calvin Wan
2023-02-09 0:02 ` [PATCH v8 2/6] submodule: strbuf variable rename Calvin Wan
2023-02-13 8:37 ` Glen Choo
2023-02-09 0:02 ` [PATCH v8 3/6] submodule: move status parsing into function Calvin Wan
2023-02-09 0:02 ` [PATCH v8 4/6] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-13 7:06 ` Glen Choo
2023-02-09 0:02 ` [PATCH v8 5/6] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-09 1:48 ` Ævar Arnfjörð Bjarmason
2023-02-13 8:42 ` Glen Choo
2023-02-13 18:29 ` Calvin Wan
2023-02-14 4:03 ` Glen Choo
2023-02-09 0:02 ` [PATCH v8 6/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-13 8:36 ` Glen Choo
2023-02-07 18:17 ` [PATCH v7 1/7] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-02-07 22:16 ` Ævar Arnfjörð Bjarmason
2023-02-08 22:50 ` Calvin Wan
2023-02-08 14:19 ` Phillip Wood
2023-02-08 22:54 ` Calvin Wan
2023-02-09 20:37 ` Phillip Wood
2023-02-07 18:17 ` [PATCH v7 2/7] submodule: strbuf variable rename Calvin Wan
2023-02-07 22:47 ` Ævar Arnfjörð Bjarmason
2023-02-08 22:59 ` Calvin Wan
2023-02-07 18:17 ` [PATCH v7 3/7] submodule: move status parsing into function Calvin Wan
2023-02-07 18:17 ` [PATCH v7 4/7] submodule: refactor is_submodule_modified() Calvin Wan
2023-02-07 22:59 ` Ævar Arnfjörð Bjarmason
2023-02-07 18:17 ` [PATCH v7 5/7] diff-lib: refactor out diff_change logic Calvin Wan
2023-02-08 14:28 ` Phillip Wood
2023-02-08 23:12 ` Calvin Wan
2023-02-09 20:53 ` Phillip Wood
2023-02-07 18:17 ` [PATCH v7 6/7] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-02-08 8:18 ` Ævar Arnfjörð Bjarmason
2023-02-08 17:07 ` Phillip Wood
2023-02-08 23:13 ` Calvin Wan
2023-02-08 14:22 ` Phillip Wood
2023-02-07 18:17 ` [PATCH v7 7/7] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-02-07 23:06 ` Ævar Arnfjörð Bjarmason
2023-01-17 19:30 ` [PATCH v6 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-17 19:30 ` [PATCH v6 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-17 19:30 ` [PATCH v6 3/6] submodule: move status parsing into function Calvin Wan
2023-01-17 19:30 ` [PATCH v6 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-17 19:30 ` [PATCH v6 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-26 9:09 ` Glen Choo
2023-01-26 9:16 ` Glen Choo
2023-01-26 18:52 ` Calvin Wan
2023-01-17 19:30 ` [PATCH v6 6/6] submodule: call parallel code from serial status Calvin Wan
2023-01-26 8:09 ` Glen Choo
2023-01-26 8:45 ` Glen Choo
2023-01-04 21:54 ` [PATCH v5 1/6] run-command: add duplicate_output_fn to run_processes_parallel_opts Calvin Wan
2023-01-04 21:54 ` [PATCH v5 2/6] submodule: strbuf variable rename Calvin Wan
2023-01-04 21:54 ` [PATCH v5 3/6] submodule: move status parsing into function Calvin Wan
2023-01-04 21:54 ` [PATCH v5 4/6] diff-lib: refactor match_stat_with_submodule Calvin Wan
2023-01-04 21:54 ` [PATCH v5 5/6] diff-lib: parallelize run_diff_files for submodules Calvin Wan
2023-01-04 21:54 ` [PATCH v5 6/6] submodule: call parallel code from serial status Calvin Wan
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230209000212.1892457-1-calvinwan@google.com \
--to=calvinwan@google.com \
--cc=avarab@gmail.com \
--cc=chooglen@google.com \
--cc=git@vger.kernel.org \
--cc=jonathantanmy@google.com \
--cc=newren@gmail.com \
--cc=phillip.wood123@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).