From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: jrnieder@gmail.com, Derrick Stolee <derrickstolee@github.com>,
Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 20/30] packed-refs: read optional prefix chunks
Date: Mon, 07 Nov 2022 18:35:54 +0000 [thread overview]
Message-ID: <9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1408.git.1667846164.gitgitgadget@gmail.com>
From: Derrick Stolee <derrickstolee@github.com>
Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
refs/packed-backend.c | 2 +
refs/packed-backend.h | 9 +++
refs/packed-format-v2.c | 159 ++++++++++++++++++++++++++++++++++++++++
3 files changed, 170 insertions(+)
diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 549cce1f84a..ae904de9014 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -475,6 +475,8 @@ static struct ref_iterator *packed_ref_iterator_begin(
iter->version = snapshot->version;
iter->row = v2_row;
+ init_iterator_prefix_info(prefix, iter);
+
iter->pos = start;
iter->eof = snapshot->eof;
strbuf_init(&iter->refname_buf, 0);
diff --git a/refs/packed-backend.h b/refs/packed-backend.h
index 3a8649857f1..1936bb5c76c 100644
--- a/refs/packed-backend.h
+++ b/refs/packed-backend.h
@@ -103,9 +103,12 @@ struct snapshot {
* packed-refs v2 values *
*************************/
size_t nr;
+ size_t prefixes_nr;
size_t buflen;
const unsigned char *offset_chunk;
const char *refs_chunk;
+ const unsigned char *prefix_offsets_chunk;
+ const char *prefix_chunk;
/*
* Count of references to this instance, including the pointer
@@ -212,6 +215,9 @@ struct packed_ref_iterator {
***********************************/
size_t nr;
size_t row;
+ size_t prefix_row_end;
+ size_t prefix_i;
+ const char *cur_prefix;
};
typedef int (*write_ref_fn)(const char *refname,
@@ -308,4 +314,7 @@ struct write_packed_refs_v2_context *create_v2_context(struct packed_ref_store *
int write_packed_refs_v2(struct write_packed_refs_v2_context *ctx);
void free_v2_context(struct write_packed_refs_v2_context *ctx);
+void init_iterator_prefix_info(const char *prefix,
+ struct packed_ref_iterator *iter);
+
#endif /* REFS_PACKED_BACKEND_H */
diff --git a/refs/packed-format-v2.c b/refs/packed-format-v2.c
index d75df9545ec..0ab277f7ad4 100644
--- a/refs/packed-format-v2.c
+++ b/refs/packed-format-v2.c
@@ -14,6 +14,79 @@
#define PACKED_REFS_SIGNATURE 0x50524546 /* "PREF" */
#define CHREFS_CHUNKID_OFFSETS 0x524F4646 /* "ROFF" */
#define CHREFS_CHUNKID_REFS 0x52454653 /* "REFS" */
+#define CHREFS_CHUNKID_PREFIX_DATA 0x50465844 /* "PFXD" */
+#define CHREFS_CHUNKID_PREFIX_OFFSETS 0x5046584F /* "PFXO" */
+
+static const char *get_nth_prefix(struct snapshot *snapshot,
+ size_t n, size_t *len)
+{
+ uint64_t offset, next_offset;
+
+ if (n >= snapshot->prefixes_nr)
+ BUG("asking for prefix %"PRIu64" outside of bounds (%"PRIu64")",
+ (uint64_t)n, (uint64_t)snapshot->prefixes_nr);
+
+ if (n)
+ offset = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * (n - 1));
+ else
+ offset = 0;
+
+ if (len) {
+ next_offset = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * n);
+
+ /* Prefix includes null terminator. */
+ *len = next_offset - offset - 1;
+ }
+
+ return snapshot->prefix_chunk + offset;
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+static const char *find_prefix_location(struct snapshot *snapshot,
+ const char *refname, size_t *pos)
+{
+ size_t lo = 0, hi = snapshot->prefixes_nr;
+
+ while (lo != hi) {
+ const char *rec;
+ int cmp;
+ size_t len;
+ size_t mid = lo + (hi - lo) / 2;
+
+ rec = get_nth_prefix(snapshot, mid, &len);
+ cmp = strncmp(rec, refname, len);
+ if (cmp < 0) {
+ lo = mid + 1;
+ } else if (cmp > 0) {
+ hi = mid;
+ } else {
+ /* we have a prefix match! */
+ *pos = mid;
+ return rec;
+ }
+ }
+
+ *pos = lo;
+ if (lo < snapshot->prefixes_nr)
+ return get_nth_prefix(snapshot, lo, NULL);
+ else
+ return NULL;
+}
int detect_packed_format_v2_header(struct packed_ref_store *refs,
struct snapshot *snapshot)
@@ -63,6 +136,46 @@ const char *find_reference_location_v2(struct snapshot *snapshot,
{
size_t lo = 0, hi = snapshot->nr;
+ if (snapshot->prefix_chunk) {
+ size_t prefix_row;
+ const char *prefix;
+ int found = 1;
+
+ prefix = find_prefix_location(snapshot, refname, &prefix_row);
+
+ if (!prefix || !starts_with(refname, prefix)) {
+ if (mustexist)
+ return NULL;
+ found = 0;
+ }
+
+ /* The second 4-byte column of the prefix offsets */
+ if (prefix_row) {
+ /* if prefix_row == 0, then lo = 0, which is already true. */
+ lo = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * (prefix_row - 1) + sizeof(uint32_t));
+ }
+
+ if (!found) {
+ const char *ret;
+ /* Terminate early with this lo position as the insertion point. */
+ if (pos)
+ *pos = lo;
+
+ if (lo >= snapshot->nr)
+ return NULL;
+
+ ret = get_nth_ref(snapshot, lo);
+ return ret;
+ }
+
+ hi = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * prefix_row + sizeof(uint32_t));
+
+ if (prefix)
+ refname += strlen(prefix);
+ }
+
while (lo != hi) {
const char *rec;
int cmp;
@@ -132,6 +245,16 @@ static int packed_refs_read_offsets(const unsigned char *chunk_start,
return 0;
}
+static int packed_refs_read_prefix_offsets(const unsigned char *chunk_start,
+ size_t chunk_size, void *data)
+{
+ struct snapshot *snapshot = data;
+
+ snapshot->prefix_offsets_chunk = chunk_start;
+ snapshot->prefixes_nr = chunk_size / sizeof(uint64_t);
+ return 0;
+}
+
void fill_snapshot_v2(struct snapshot *snapshot)
{
uint32_t file_signature, file_version, hash_version;
@@ -163,6 +286,9 @@ void fill_snapshot_v2(struct snapshot *snapshot)
read_chunk(cf, CHREFS_CHUNKID_OFFSETS, packed_refs_read_offsets, snapshot);
pair_chunk(cf, CHREFS_CHUNKID_REFS, (const unsigned char**)&snapshot->refs_chunk);
+ read_chunk(cf, CHREFS_CHUNKID_PREFIX_OFFSETS, packed_refs_read_prefix_offsets, snapshot);
+ pair_chunk(cf, CHREFS_CHUNKID_PREFIX_DATA, (const unsigned char**)&snapshot->prefix_chunk);
+
/* TODO: add error checks for invalid chunk combinations. */
cleanup:
@@ -187,6 +313,8 @@ int next_record_v2(struct packed_ref_iterator *iter)
iter->base.flags = REF_ISPACKED;
+ if (iter->cur_prefix)
+ strbuf_addstr(&iter->refname_buf, iter->cur_prefix);
strbuf_addstr(&iter->refname_buf, pos);
iter->base.refname = iter->refname_buf.buf;
pos += strlen(pos) + 1;
@@ -221,9 +349,40 @@ int next_record_v2(struct packed_ref_iterator *iter)
iter->row++;
+ if (iter->row == iter->prefix_row_end && iter->snapshot->prefix_chunk) {
+ size_t prefix_pos = get_be32(iter->snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i);
+ iter->cur_prefix = iter->snapshot->prefix_chunk + prefix_pos;
+ iter->prefix_i++;
+ iter->prefix_row_end = get_be32(iter->snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i + sizeof(uint32_t));
+ }
+
return ITER_OK;
}
+void init_iterator_prefix_info(const char *prefix,
+ struct packed_ref_iterator *iter)
+{
+ struct snapshot *snapshot = iter->snapshot;
+
+ if (snapshot->version != 2 || !snapshot->prefix_chunk) {
+ iter->prefix_row_end = snapshot->nr;
+ return;
+ }
+
+ if (prefix)
+ iter->cur_prefix = find_prefix_location(snapshot, prefix, &iter->prefix_i);
+ else {
+ iter->cur_prefix = snapshot->prefix_chunk;
+ iter->prefix_i = 0;
+ }
+
+ iter->prefix_row_end = get_be32(snapshot->prefix_offsets_chunk +
+ 2 * sizeof(uint32_t) * iter->prefix_i +
+ sizeof(uint32_t));
+}
+
struct write_packed_refs_v2_context {
struct packed_ref_store *refs;
struct string_list *updates;
--
gitgitgadget
next prev parent reply other threads:[~2022-11-07 18:38 UTC|newest]
Thread overview: 56+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-07 18:35 [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 01/30] hashfile: allow skipping the hash function Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 02/30] read-cache: add index.computeHash config option Derrick Stolee via GitGitGadget
2022-11-11 23:31 ` Elijah Newren
2022-11-14 16:30 ` Derrick Stolee
2022-11-17 16:13 ` Ævar Arnfjörð Bjarmason
2022-11-07 18:35 ` [PATCH 03/30] extensions: add refFormat extension Derrick Stolee via GitGitGadget
2022-11-11 23:39 ` Elijah Newren
2022-11-16 14:37 ` Derrick Stolee
2022-11-07 18:35 ` [PATCH 04/30] config: fix multi-level bulleted list Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 05/30] repository: wire ref extensions to ref backends Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 06/30] refs: allow loose files without packed-refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 07/30] chunk-format: number of chunks is optional Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 08/30] chunk-format: document trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 09/30] chunk-format: store chunk offset during write Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 10/30] chunk-format: allow trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 11/30] chunk-format: parse " Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 12/30] refs: extract packfile format to new file Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 13/30] packed-backend: extract add_write_error() Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 14/30] packed-backend: extract iterator/updates merge Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 15/30] packed-backend: create abstraction for writing refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 16/30] config: add config values for packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 17/30] packed-backend: create shell of v2 writes Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 18/30] packed-refs: write file format version 2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 19/30] packed-refs: read file format v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` Derrick Stolee via GitGitGadget [this message]
2022-11-07 18:35 ` [PATCH 21/30] packed-refs: write prefix chunks Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 22/30] packed-backend: create GIT_TEST_PACKED_REFS_VERSION Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 23/30] t1409: test with packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 24/30] t5312: allow packed-refs v2 format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 25/30] t5502: add PACKED_REFS_V1 prerequisite Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 26/30] t3210: require packed-refs v1 for some tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 27/30] t*: skip packed-refs v2 over http tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 28/30] ci: run GIT_TEST_PACKED_REFS_VERSION=2 in some builds Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 29/30] p1401: create performance test for ref operations Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 30/30] refs: skip hashing when writing packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-09 15:15 ` [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee
2022-11-11 23:28 ` Elijah Newren
2022-11-14 0:07 ` Derrick Stolee
2022-11-15 2:47 ` Elijah Newren
2022-11-16 14:45 ` Derrick Stolee
2022-11-17 4:28 ` Elijah Newren
2022-11-18 23:31 ` Junio C Hamano
2022-11-19 0:41 ` Elijah Newren
2022-11-19 3:00 ` Taylor Blau
2022-11-30 15:31 ` Derrick Stolee
2022-11-28 18:56 ` Han-Wen Nienhuys
2022-11-30 15:16 ` Derrick Stolee
2022-11-30 15:38 ` Phillip Wood
2022-11-30 16:37 ` Taylor Blau
2022-11-30 18:30 ` Han-Wen Nienhuys
2022-11-30 18:37 ` Sean Allred
2022-12-01 20:18 ` Derrick Stolee
2022-12-02 16:46 ` Han-Wen Nienhuys
2022-12-02 18:24 ` Ævar Arnfjörð Bjarmason
2022-11-30 22:55 ` Junio C Hamano
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: http://vger.kernel.org/majordomo-info.html
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com \
--to=gitgitgadget@gmail.com \
--cc=derrickstolee@github.com \
--cc=git@vger.kernel.org \
--cc=jrnieder@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).