From: Taylor Blau <me@ttaylorr.com>
To: git@vger.kernel.org
Cc: peff@peff.net, dstolee@microsoft.com, szeder.dev@gmail.com
Subject: [PATCH v2 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>'
Date: Wed, 5 Aug 2020 13:03:05 -0400 [thread overview]
Message-ID: <3b66ae4a9c2aa97dd64f88904ad2bf2756ccd9ef.1596646576.git.me@ttaylorr.com> (raw)
In-Reply-To: <cover.1596646576.git.me@ttaylorr.com>
Introduce a command-line flag and configuration variable to fill in the
'max_new_filters' variable introduced by the previous patch.
The command-line option '--max-new-filters' takes precedence over
'commitGraph.maxNewFilters', which is the default value.
'--no-max-new-filters' can also be provided, which sets the value back
to '-1', indicating that an unlimited number of new Bloom filters may be
generated. (OPT_INTEGER only allows setting the '--no-' variant back to
'0', hence a custom callback was used instead).
Signed-off-by: Taylor Blau <me@ttaylorr.com>
---
Documentation/config/commitgraph.txt | 4 +++
Documentation/git-commit-graph.txt | 4 +++
bloom.c | 15 +++++++++++
builtin/commit-graph.c | 39 +++++++++++++++++++++++++---
commit-graph.c | 16 +++++++++---
commit-graph.h | 1 +
t/t4216-log-bloom.sh | 19 ++++++++++++++
7 files changed, 91 insertions(+), 7 deletions(-)
diff --git a/Documentation/config/commitgraph.txt b/Documentation/config/commitgraph.txt
index cff0797b54..4582c39fc4 100644
--- a/Documentation/config/commitgraph.txt
+++ b/Documentation/config/commitgraph.txt
@@ -1,3 +1,7 @@
+commitGraph.maxNewFilters::
+ Specifies the default value for the `--max-new-filters` option of `git
+ commit-graph write` (c.f., linkgit:git-commit-graph[1]).
+
commitGraph.readChangedPaths::
If true, then git will use the changed-path Bloom filters in the
commit-graph file (if it exists, and they are present). Defaults to
diff --git a/Documentation/git-commit-graph.txt b/Documentation/git-commit-graph.txt
index 17405c73a9..9c887d5d79 100644
--- a/Documentation/git-commit-graph.txt
+++ b/Documentation/git-commit-graph.txt
@@ -67,6 +67,10 @@ this option is given, future commit-graph writes will automatically assume
that this option was intended. Use `--no-changed-paths` to stop storing this
data.
+
+With the `--max-new-filters=<n>` option, generate at most `n` new Bloom
+filters (if `--changed-paths` is specified). If `n` is `-1`, no limit is
+enforced. Overrides the `commitGraph.maxNewFilters` configuration.
++
With the `--split[=<strategy>]` option, write the commit-graph as a
chain of multiple commit-graph files stored in
`<dir>/info/commit-graphs`. Commit-graph layers are merged based on the
diff --git a/bloom.c b/bloom.c
index ed54e96e57..d0c0fd049d 100644
--- a/bloom.c
+++ b/bloom.c
@@ -51,6 +51,21 @@ static int load_bloom_filter_from_graph(struct commit_graph *g,
else
start_index = 0;
+ if ((start_index == end_index) &&
+ (g->bloom_large.word_alloc && !bitmap_get(&g->bloom_large, lex_pos))) {
+ /*
+ * If the filter is zero-length, either (1) the filter has no
+ * changes, (2) the filter has too many changes, or (3) it
+ * wasn't computed (eg., due to '--max-new-filters').
+ *
+ * If either (1) or (2) is the case, the 'large' bit will be set
+ * for this Bloom filter. If it is unset, then it wasn't
+ * computed. In that case, return nothing, since we don't have
+ * that filter in the graph.
+ */
+ return 0;
+ }
+
filter->len = end_index - start_index;
filter->data = (unsigned char *)(g->chunk_bloom_data +
sizeof(unsigned char) * start_index +
diff --git a/builtin/commit-graph.c b/builtin/commit-graph.c
index 38f5f57d15..3500a6e1f1 100644
--- a/builtin/commit-graph.c
+++ b/builtin/commit-graph.c
@@ -13,7 +13,8 @@ static char const * const builtin_commit_graph_usage[] = {
N_("git commit-graph verify [--object-dir <objdir>] [--shallow] [--[no-]progress]"),
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -25,7 +26,8 @@ static const char * const builtin_commit_graph_verify_usage[] = {
static const char * const builtin_commit_graph_write_usage[] = {
N_("git commit-graph write [--object-dir <objdir>] [--append] "
"[--split[=<strategy>]] [--reachable|--stdin-packs|--stdin-commits] "
- "[--changed-paths] [--[no-]progress] <split options>"),
+ "[--changed-paths] [--[no-]max-new-filters <n>] [--[no-]progress] "
+ "<split options>"),
NULL
};
@@ -162,6 +164,23 @@ static int read_one_commit(struct oidset *commits, struct progress *progress,
return 0;
}
+static int write_option_max_new_filters(const struct option *opt,
+ const char *arg,
+ int unset)
+{
+ int *to = opt->value;
+ if (unset)
+ *to = -1;
+ else {
+ const char *s;
+ *to = strtol(arg, (char **)&s, 10);
+ if (*s)
+ return error(_("%s expects a numerical value"),
+ optname(opt, opt->flags));
+ }
+ return 0;
+}
+
static int graph_write(int argc, const char **argv)
{
struct string_list pack_indexes = STRING_LIST_INIT_NODUP;
@@ -197,6 +216,9 @@ static int graph_write(int argc, const char **argv)
N_("maximum ratio between two levels of a split commit-graph")),
OPT_EXPIRY_DATE(0, "expire-time", &write_opts.expire_time,
N_("only expire files older than a given date-time")),
+ OPT_CALLBACK_F(0, "max-new-filters", &write_opts.max_new_filters,
+ NULL, N_("maximum number of changed-path Bloom filters to compute"),
+ 0, write_option_max_new_filters),
OPT_END(),
};
@@ -205,6 +227,7 @@ static int graph_write(int argc, const char **argv)
write_opts.size_multiple = 2;
write_opts.max_commits = 0;
write_opts.expire_time = 0;
+ write_opts.max_new_filters = -1;
trace2_cmd_mode("write");
@@ -270,6 +293,16 @@ static int graph_write(int argc, const char **argv)
return result;
}
+static int git_commit_graph_config(const char *var, const char *value, void *cb)
+{
+ if (!strcmp(var, "commitgraph.maxnewfilters")) {
+ write_opts.max_new_filters = git_config_int(var, value);
+ return 0;
+ }
+
+ return git_default_config(var, value, cb);
+}
+
int cmd_commit_graph(int argc, const char **argv, const char *prefix)
{
static struct option builtin_commit_graph_options[] = {
@@ -283,7 +316,7 @@ int cmd_commit_graph(int argc, const char **argv, const char *prefix)
usage_with_options(builtin_commit_graph_usage,
builtin_commit_graph_options);
- git_config(git_default_config, NULL);
+ git_config(git_commit_graph_config, &opts);
argc = parse_options(argc, argv, prefix,
builtin_commit_graph_options,
builtin_commit_graph_usage,
diff --git a/commit-graph.c b/commit-graph.c
index 82fca07579..76b1238262 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -948,7 +948,8 @@ struct tree *get_commit_tree_in_graph(struct repository *r, const struct commit
}
static int get_bloom_filter_large_in_graph(struct commit_graph *g,
- const struct commit *c)
+ const struct commit *c,
+ uint32_t max_changed_paths)
{
uint32_t graph_pos = commit_graph_position(c);
if (graph_pos == COMMIT_NOT_FROM_GRAPH)
@@ -1475,6 +1476,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
int i;
struct progress *progress = NULL;
int *sorted_commits;
+ int max_new_filters;
init_bloom_filters();
ctx->bloom_large = bitmap_word_alloc(ctx->commits.nr / BITS_IN_EWORD + 1);
@@ -1491,10 +1493,15 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->order_by_pack ? commit_pos_cmp : commit_gen_cmp,
&ctx->commits);
+ max_new_filters = ctx->opts->max_new_filters >= 0 ?
+ ctx->opts->max_new_filters : ctx->commits.nr;
+
for (i = 0; i < ctx->commits.nr; i++) {
int pos = sorted_commits[i];
struct commit *c = ctx->commits.list[pos];
- if (get_bloom_filter_large_in_graph(ctx->r->objects->commit_graph, c)) {
+ if (get_bloom_filter_large_in_graph(ctx->r->objects->commit_graph,
+ c,
+ ctx->bloom_settings->max_changed_paths)) {
bitmap_set(ctx->bloom_large, pos);
ctx->count_bloom_filter_known_large++;
} else {
@@ -1502,7 +1509,7 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
struct bloom_filter *filter = get_or_compute_bloom_filter(
ctx->r,
c,
- 1,
+ ctx->count_bloom_filter_computed < max_new_filters,
ctx->bloom_settings,
&computed);
if (computed) {
@@ -1512,7 +1519,8 @@ static void compute_bloom_filters(struct write_commit_graph_context *ctx)
ctx->count_bloom_filter_found_large++;
}
}
- ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
+ if (filter)
+ ctx->total_bloom_filter_data_size += sizeof(unsigned char) * filter->len;
}
display_progress(progress, i + 1);
}
diff --git a/commit-graph.h b/commit-graph.h
index 1d147b7b76..47d99ea4bf 100644
--- a/commit-graph.h
+++ b/commit-graph.h
@@ -115,6 +115,7 @@ struct commit_graph_opts {
int max_commits;
timestamp_t expire_time;
enum commit_graph_split_flags flags;
+ int max_new_filters;
};
/*
diff --git a/t/t4216-log-bloom.sh b/t/t4216-log-bloom.sh
index 6859d85369..3aab8ffbe3 100755
--- a/t/t4216-log-bloom.sh
+++ b/t/t4216-log-bloom.sh
@@ -286,4 +286,23 @@ test_expect_success 'Bloom generation does not recompute too-large filters' '
)
'
+test_expect_success 'Bloom generation is limited by --max-new-filters' '
+ (
+ cd limits &&
+ test_commit c2 filter &&
+ test_commit c3 filter &&
+ test_commit c4 no-filter &&
+ test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=2" \
+ 2 0 2
+ )
+'
+
+test_expect_success 'Bloom generation backfills previously-skipped filters' '
+ (
+ cd limits &&
+ test_bloom_filters_computed "--reachable --changed-paths --split=replace --max-new-filters=1" \
+ 2 0 1
+ )
+'
+
test_done
--
2.28.0.rc1.13.ge78abce653
next prev parent reply other threads:[~2020-08-05 17:22 UTC|newest]
Thread overview: 117+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-03 18:57 [PATCH 00/10] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-03 18:57 ` [PATCH 01/10] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-04 7:24 ` Jeff King
2020-08-04 20:08 ` Taylor Blau
2020-08-03 18:57 ` [PATCH 02/10] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-03 18:57 ` [PATCH 03/10] t4216: use an '&&'-chain Taylor Blau
2020-08-03 18:57 ` [PATCH 04/10] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-03 18:57 ` [PATCH 05/10] commit-graph: respect 'commitgraph.readChangedPaths' Taylor Blau
2020-08-03 18:57 ` [PATCH 06/10] commit-graph.c: sort index into commits list Taylor Blau
2020-08-04 12:31 ` Derrick Stolee
2020-08-04 20:10 ` Taylor Blau
2020-08-03 18:57 ` [PATCH 07/10] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-03 18:59 ` Taylor Blau
2020-08-04 12:57 ` Derrick Stolee
2020-08-03 18:57 ` [PATCH 08/10] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-04 13:00 ` Derrick Stolee
2020-08-04 20:12 ` Taylor Blau
2020-08-03 18:57 ` [PATCH 09/10] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-03 18:57 ` [PATCH 10/10] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-08-04 13:03 ` Derrick Stolee
2020-08-04 20:14 ` Taylor Blau
2020-08-05 17:01 ` [PATCH v2 00/14] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-05 17:01 ` [PATCH v2 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-05 17:02 ` [PATCH v2 02/14] t4216: use an '&&'-chain Taylor Blau
2020-08-05 17:02 ` [PATCH v2 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-05 17:02 ` [PATCH v2 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-05 17:02 ` [PATCH v2 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-08-05 17:02 ` [PATCH v2 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-08-05 17:02 ` [PATCH v2 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-05 17:02 ` [PATCH v2 08/14] bloom: use provided 'struct bloom_filter_settings' Taylor Blau
2020-08-05 17:02 ` [PATCH v2 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-08-05 17:02 ` [PATCH v2 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-08-05 17:02 ` [PATCH v2 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-08-05 17:02 ` [PATCH v2 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-05 21:01 ` Junio C Hamano
2020-08-05 21:17 ` Taylor Blau
2020-08-05 22:21 ` Junio C Hamano
2020-08-05 22:25 ` Taylor Blau
2020-08-11 13:48 ` Taylor Blau
2020-08-11 18:59 ` Junio C Hamano
2020-08-05 17:03 ` [PATCH v2 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-05 17:03 ` Taylor Blau [this message]
2020-08-11 20:51 ` [PATCH v3 00/14] more miscellaneous Bloom filter improvements Taylor Blau
2020-08-11 20:51 ` [PATCH v3 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-08-11 21:18 ` SZEDER Gábor
2020-08-11 21:21 ` Taylor Blau
2020-08-11 21:27 ` SZEDER Gábor
2020-08-11 21:34 ` Taylor Blau
2020-08-11 23:55 ` SZEDER Gábor
2020-08-12 11:48 ` Derrick Stolee
2020-08-14 20:17 ` Taylor Blau
2020-08-11 20:51 ` [PATCH v3 02/14] t4216: use an '&&'-chain Taylor Blau
2020-08-11 20:51 ` [PATCH v3 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-08-11 20:51 ` [PATCH v3 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-08-11 20:51 ` [PATCH v3 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-08-11 20:51 ` [PATCH v3 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-08-11 20:51 ` [PATCH v3 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-08-11 20:51 ` [PATCH v3 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-08-11 20:51 ` [PATCH v3 08/14] bloom: use provided 'struct bloom_filter_settings' Taylor Blau
2020-08-11 20:51 ` [PATCH v3 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-08-11 20:52 ` [PATCH v3 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-08-11 20:52 ` [PATCH v3 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-08-11 21:11 ` Derrick Stolee
2020-08-11 21:18 ` Taylor Blau
2020-08-11 22:05 ` Taylor Blau
2020-08-19 13:35 ` SZEDER Gábor
2020-09-02 20:23 ` Taylor Blau
2020-09-01 14:35 ` SZEDER Gábor
2020-09-02 20:40 ` Taylor Blau
2020-08-11 20:52 ` [PATCH v3 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-08-19 9:56 ` SZEDER Gábor
2020-09-02 21:02 ` Taylor Blau
2020-08-11 20:52 ` [PATCH v3 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-08-12 11:49 ` SZEDER Gábor
2020-08-14 20:20 ` Taylor Blau
2020-08-17 22:50 ` SZEDER Gábor
2020-09-02 21:03 ` Taylor Blau
2020-08-12 12:29 ` Derrick Stolee
2020-08-14 20:10 ` Taylor Blau
2020-08-18 22:23 ` SZEDER Gábor
2020-09-03 16:35 ` Taylor Blau
2020-08-19 8:20 ` SZEDER Gábor
2020-09-03 16:42 ` Taylor Blau
2020-09-04 8:50 ` SZEDER Gábor
2020-09-01 14:36 ` SZEDER Gábor
2020-09-03 18:49 ` Taylor Blau
2020-09-03 21:45 ` [PATCH v3 00/14] more miscellaneous Bloom filter improvements Junio C Hamano
2020-09-03 22:33 ` Taylor Blau
2020-09-03 22:45 ` [PATCH v4 " Taylor Blau
2020-09-03 22:46 ` [PATCH v4 01/14] commit-graph: introduce 'get_bloom_filter_settings()' Taylor Blau
2020-09-03 22:46 ` [PATCH v4 02/14] t4216: use an '&&'-chain Taylor Blau
2020-09-03 22:46 ` [PATCH v4 03/14] commit-graph: pass a 'struct repository *' in more places Taylor Blau
2020-09-03 22:46 ` [PATCH v4 04/14] t/helper/test-read-graph.c: prepare repo settings Taylor Blau
2020-09-03 22:46 ` [PATCH v4 05/14] commit-graph: respect 'commitGraph.readChangedPaths' Taylor Blau
2020-09-03 22:46 ` [PATCH v4 06/14] commit-graph.c: store maximum changed paths Taylor Blau
2020-09-03 22:46 ` [PATCH v4 07/14] bloom: split 'get_bloom_filter()' in two Taylor Blau
2020-09-05 17:22 ` Jakub Narębski
2020-09-05 17:38 ` Taylor Blau
2020-09-05 17:50 ` Jakub Narębski
2020-09-05 18:01 ` Taylor Blau
2020-09-05 18:18 ` Jakub Narębski
2020-09-05 18:38 ` Taylor Blau
2020-09-05 18:55 ` Taylor Blau
2020-09-05 19:04 ` SZEDER Gábor
2020-09-05 19:49 ` Taylor Blau
2020-09-06 21:52 ` Junio C Hamano
2020-09-03 22:46 ` [PATCH v4 08/14] bloom: use provided 'struct bloom_filter_settings' Taylor Blau
2020-09-03 22:46 ` [PATCH v4 09/14] bloom/diff: properly short-circuit on max_changes Taylor Blau
2020-09-03 22:46 ` [PATCH v4 10/14] commit-graph.c: sort index into commits list Taylor Blau
2020-09-03 22:46 ` [PATCH v4 11/14] csum-file.h: introduce 'hashwrite_be64()' Taylor Blau
2020-09-04 20:18 ` René Scharfe
2020-09-04 20:22 ` Taylor Blau
2020-09-03 22:46 ` [PATCH v4 12/14] commit-graph: add large-filters bitmap chunk Taylor Blau
2020-09-03 22:46 ` [PATCH v4 13/14] commit-graph: rename 'split_commit_graph_opts' Taylor Blau
2020-09-04 15:20 ` Taylor Blau
2020-09-03 22:47 ` [PATCH v4 14/14] builtin/commit-graph.c: introduce '--max-new-filters=<n>' Taylor Blau
2020-09-04 14:39 ` [PATCH v4 00/14] more miscellaneous Bloom filter improvements Derrick Stolee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3b66ae4a9c2aa97dd64f88904ad2bf2756ccd9ef.1596646576.git.me@ttaylorr.com \
--to=me@ttaylorr.com \
--cc=dstolee@microsoft.com \
--cc=git@vger.kernel.org \
--cc=peff@peff.net \
--cc=szeder.dev@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).