From: "SZEDER Gábor" <szeder.dev@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
"Garima Singh" <garima.singh@microsoft.com>,
"Derrick Stolee" <stolee@gmail.com>,
"Jakub Narebski" <jnareb@gmail.com>, "Jeff King" <peff@peff.net>,
"Taylor Blau" <me@ttaylorr.com>,
"SZEDER Gábor" <szeder.dev@gmail.com>
Subject: [PATCH 22/34] commit-graph: write the Modified Path Bloom Filters chunk
Date: Fri, 29 May 2020 10:50:26 +0200 [thread overview]
Message-ID: <20200529085038.26008-23-szeder.dev@gmail.com> (raw)
In-Reply-To: <20200529085038.26008-1-szeder.dev@gmail.com>
Write the Modified Path Bloom Filters chunk first, keeping track of
the offsets where each (non-embedded) Bloom filter has been written,
and then write the Modified Path Bloom Filter Index chunk using those
recorded offsets.
---
commit-graph.c | 78 ++++++++++++++++++++++++++++++++++++++++++++------
1 file changed, 69 insertions(+), 9 deletions(-)
diff --git a/commit-graph.c b/commit-graph.c
index fb24600bb3..3210ec2f93 100644
--- a/commit-graph.c
+++ b/commit-graph.c
@@ -26,6 +26,7 @@
#define GRAPH_CHUNKID_EXTRAEDGES 0x45444745 /* "EDGE" */
#define GRAPH_CHUNKID_BASE 0x42415345 /* "BASE" */
#define GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_INDEX 0x4d504249 /* "MPBI" */
+#define GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTERS 0x4d504246 /* "MPBF" */
#define GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_EXCLUDES 0x4d504258 /* "MPBX" */
#define GRAPH_DATA_WIDTH (the_hash_algo->rawsz + 16)
@@ -56,6 +57,12 @@
struct modified_path_bloom_filter_info {
struct bloom_filter filter;
+ /*
+ * The offset relative to the start of the Modified Path Bloom
+ * Filters chunk where this Bloom filter has been written,
+ * -1 before that.
+ */
+ uint64_t offset;
};
static void free_modified_path_bloom_filter_info_in_slab(
@@ -1019,6 +1026,9 @@ struct write_commit_graph_context {
*/
uint32_t *hashes;
int hashes_nr, hashes_alloc;
+
+ /* Excluding embedded modified path Bloom filters */
+ uint64_t total_filter_size;
} mpbfctx;
};
@@ -1219,6 +1229,43 @@ static int write_graph_chunk_extra_edges(struct hashfile *f,
return 0;
}
+static int write_graph_chunk_modified_path_bloom_filters(struct hashfile *f,
+ struct write_commit_graph_context *ctx)
+{
+ uint64_t offset = 0;
+ int i;
+
+ for (i = 0; i < ctx->commits.nr; i++) {
+ struct commit *commit = ctx->commits.list[i];
+ struct modified_path_bloom_filter_info *bfi;
+ unsigned int filter_size;
+
+ display_progress(ctx->progress, ++ctx->progress_cnt);
+
+ bfi = modified_path_bloom_filters_peek(
+ &modified_path_bloom_filters, commit);
+
+ if (!bfi || !bfi->filter.nr_bits)
+ continue;
+ if (bfi->filter.nr_bits == GRAPH_MODIFIED_PATH_BLOOM_FILTER_EMBEDDED_NR_BITS)
+ continue;
+
+ if (offset >> 62)
+ BUG("offset %lu is too large for the Modified Path Bloom Filter Index chunk",
+ offset);
+
+ bfi->offset = offset;
+
+ filter_size = bloom_filter_bytes(&bfi->filter);
+
+ hashwrite_be32(f, bfi->filter.nr_bits);
+ hashwrite(f, bfi->filter.bits, filter_size);
+
+ offset += sizeof(uint32_t) + filter_size;
+ }
+ return 0;
+}
+
static int write_graph_chunk_modified_path_bloom_index(struct hashfile *f,
struct write_commit_graph_context *ctx)
{
@@ -1247,8 +1294,11 @@ static int write_graph_chunk_modified_path_bloom_index(struct hashfile *f,
*/
filterdata[0] |= 1 << 7;
hashwrite(f, filterdata, sizeof(filterdata));
+ } else if (bfi->offset != -1) {
+ uint64_t offset = htonll(bfi->offset);
+ hashwrite(f, &offset, sizeof(offset));
} else
- BUG("writing non-embedded Bloom filters is not implemented yet");
+ BUG("modified path Bloom filter offset is still -1?!");
}
return 0;
}
@@ -1424,17 +1474,20 @@ static void create_modified_path_bloom_filter(
diff_tree_oid(parent_oid, &commit->object.oid, "", &mpbfctx->diffopt);
path_component_count = mpbfctx->hashes_nr / mpbfctx->num_hashes;
+ bfi = modified_path_bloom_filters_at(&modified_path_bloom_filters,
+ commit);
+ bfi->offset = -1;
if (path_component_count > mpbfctx->embedded_limit) {
- /* Not implemented yet. */
- } else {
- bfi = modified_path_bloom_filters_at(
- &modified_path_bloom_filters, commit);
-
+ bloom_filter_init(&bfi->filter, mpbfctx->num_hashes,
+ path_component_count);
+ mpbfctx->total_filter_size += sizeof(uint32_t) +
+ bloom_filter_bytes(&bfi->filter);
+ } else
bloom_filter_init_with_size(&bfi->filter,
GRAPH_MODIFIED_PATH_BLOOM_FILTER_EMBEDDED_NR_BITS);
- bloom_filter_set_bits(&bfi->filter, mpbfctx->hashes,
- mpbfctx->hashes_nr);
- }
+
+ bloom_filter_set_bits(&bfi->filter, mpbfctx->hashes,
+ mpbfctx->hashes_nr);
}
static void add_missing_parents(struct write_commit_graph_context *ctx, struct commit *commit)
@@ -1845,6 +1898,13 @@ static int write_commit_graph_file(struct write_commit_graph_context *ctx)
chunks_nr++;
}
if (ctx->mpbfctx.use_modified_path_bloom_filters) {
+ if (ctx->mpbfctx.total_filter_size) {
+ ALLOC_GROW(chunks, chunks_nr + 1, chunks_alloc);
+ chunks[chunks_nr].id = GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTERS;
+ chunks[chunks_nr].size = ctx->mpbfctx.total_filter_size;
+ chunks[chunks_nr].write_fn = write_graph_chunk_modified_path_bloom_filters;
+ chunks_nr++;
+ }
ALLOC_GROW(chunks, chunks_nr + 1, chunks_alloc);
chunks[chunks_nr].id = GRAPH_CHUNKID_MODIFIED_PATH_BLOOM_FILTER_INDEX;
chunks[chunks_nr].size = sizeof(uint8_t) +
--
2.27.0.rc1.431.g5c813f95dc
next prev parent reply other threads:[~2020-05-29 8:52 UTC|newest]
Thread overview: 43+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-05-29 8:50 [PoC PATCH 00/34] An alternative modified path Bloom filters implementation SZEDER Gábor
2020-05-28 22:09 ` Johannes Schindelin
2020-05-29 8:50 ` [PATCH 01/34] tree-walk.c: don't match submodule entries for 'submod/anything' SZEDER Gábor
2020-05-29 8:50 ` [PATCH 02/34] commit-graph: fix parsing the Chunk Lookup table SZEDER Gábor
2020-05-29 8:50 ` [PATCH 03/34] commit-graph-format.txt: all multi-byte numbers are in network byte order SZEDER Gábor
2020-05-29 8:50 ` [PATCH 04/34] commit-slab: add a function to deep free entries on the slab SZEDER Gábor
2020-06-04 16:43 ` Derrick Stolee
2020-06-27 15:56 ` SZEDER Gábor
2020-06-29 11:30 ` Derrick Stolee
2020-05-29 8:50 ` [PATCH 05/34] diff.h: drop diff_tree_oid() & friends' return value SZEDER Gábor
2020-05-29 8:50 ` [PATCH 06/34] commit-graph: clean up #includes SZEDER Gábor
2020-05-29 8:50 ` [PATCH 07/34] commit-graph: simplify parse_commit_graph() #1 SZEDER Gábor
2020-05-29 8:50 ` [PATCH 08/34] commit-graph: simplify parse_commit_graph() #2 SZEDER Gábor
2020-05-29 8:50 ` [PATCH 09/34] commit-graph: simplify write_commit_graph_file() #1 SZEDER Gábor
2020-05-29 8:50 ` [PATCH 10/34] commit-graph: simplify write_commit_graph_file() #2 SZEDER Gábor
2020-05-29 8:50 ` [PATCH 11/34] commit-graph: allocate the 'struct chunk_info' array dinamically SZEDER Gábor
2020-05-29 8:50 ` [PATCH 12/34] commit-graph: unify the signatures of all write_graph_chunk_*() functions SZEDER Gábor
2020-05-29 8:50 ` [PATCH 13/34] commit-graph: simplify write_commit_graph_file() #3 SZEDER Gábor
2020-05-29 8:50 ` [PATCH 14/34] commit-graph: check chunk sizes after writing SZEDER Gábor
2020-05-29 8:50 ` [PATCH 15/34] commit-graph-format.txt: document the modified path Bloom filter chunks SZEDER Gábor
2020-06-02 17:59 ` SZEDER Gábor
2020-05-29 8:50 ` [PATCH 16/34] Add a generic and minimal Bloom filter implementation SZEDER Gábor
2020-05-29 8:50 ` [PATCH 17/34] Import a streaming-capable Murmur3 hash function implementation SZEDER Gábor
2020-05-29 8:50 ` [PATCH 18/34] commit-graph: write "empty" Modified Path Bloom Filter Index chunk SZEDER Gábor
2020-05-29 8:50 ` [PATCH 19/34] commit-graph: add commit slab for modified path Bloom filters SZEDER Gábor
2020-05-29 8:50 ` [PATCH 20/34] commit-graph: fill the Modified Path Bloom Filter Index chunk SZEDER Gábor
2020-05-29 8:50 ` [PATCH 21/34] commit-graph: load and use " SZEDER Gábor
2020-05-29 8:50 ` SZEDER Gábor [this message]
2020-05-29 8:50 ` [PATCH 23/34] commit-graph: load and use the Modified Path Bloom Filters chunk SZEDER Gábor
2020-05-29 8:50 ` [PATCH 24/34] commit-graph: check all leading directories in modified path Bloom filters SZEDER Gábor
2020-05-29 8:50 ` [PATCH 25/34] commit-graph: check embedded modified path Bloom filters with a mask SZEDER Gábor
2020-05-29 8:50 ` [PATCH 26/34] commit-graph: deduplicate modified path Bloom filters SZEDER Gábor
2020-05-29 8:50 ` [PATCH 27/34] commit-graph: load modified path Bloom filters for merge commits SZEDER Gábor
2020-05-29 8:50 ` [PATCH 28/34] commit-graph: write Modified Path Bloom Filter Merge Index chunk SZEDER Gábor
2020-05-29 8:50 ` [PATCH 29/34] commit-graph: extract init and free write_commit_graph_context SZEDER Gábor
2020-05-29 8:50 ` [PATCH 30/34] commit-graph: move write_commit_graph_reachable below write_commit_graph SZEDER Gábor
2020-05-29 8:50 ` [PATCH 31/34] t7007-show: make the first test compatible with the next patch SZEDER Gábor
2020-05-29 8:50 ` [PATCH 32/34] PoC commit-graph: use revision walk machinery for '--reachable' SZEDER Gábor
2020-05-29 8:50 ` [PATCH 33/34] commit-graph: write modified path Bloom filters in "history order" SZEDER Gábor
2020-05-29 8:50 ` [PATCH 34/34] commit-graph: use modified path Bloom filters with wildcards, if possible SZEDER Gábor
2020-05-29 13:59 ` [PoC PATCH 00/34] An alternative modified path Bloom filters implementation Derrick Stolee
2020-06-01 23:25 ` Taylor Blau
2020-06-02 17:08 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200529085038.26008-23-szeder.dev@gmail.com \
--to=szeder.dev@gmail.com \
--cc=garima.singh@microsoft.com \
--cc=git@vger.kernel.org \
--cc=gitster@pobox.com \
--cc=jnareb@gmail.com \
--cc=me@ttaylorr.com \
--cc=peff@peff.net \
--cc=stolee@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).