git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: jrnieder@gmail.com, Derrick Stolee <derrickstolee@github.com>,
	Derrick Stolee <derrickstolee@github.com>
Subject: [PATCH 20/30] packed-refs: read optional prefix chunks
Date: Mon, 07 Nov 2022 18:35:54 +0000	[thread overview]
Message-ID: <9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1408.git.1667846164.gitgitgadget@gmail.com>

From: Derrick Stolee <derrickstolee@github.com>

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 refs/packed-backend.c   |   2 +
 refs/packed-backend.h   |   9 +++
 refs/packed-format-v2.c | 159 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 170 insertions(+)

diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index 549cce1f84a..ae904de9014 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -475,6 +475,8 @@ static struct ref_iterator *packed_ref_iterator_begin(
 	iter->version = snapshot->version;
 	iter->row = v2_row;
 
+	init_iterator_prefix_info(prefix, iter);
+
 	iter->pos = start;
 	iter->eof = snapshot->eof;
 	strbuf_init(&iter->refname_buf, 0);
diff --git a/refs/packed-backend.h b/refs/packed-backend.h
index 3a8649857f1..1936bb5c76c 100644
--- a/refs/packed-backend.h
+++ b/refs/packed-backend.h
@@ -103,9 +103,12 @@ struct snapshot {
 	 * packed-refs v2 values *
 	 *************************/
 	size_t nr;
+	size_t prefixes_nr;
 	size_t buflen;
 	const unsigned char *offset_chunk;
 	const char *refs_chunk;
+	const unsigned char *prefix_offsets_chunk;
+	const char *prefix_chunk;
 
 	/*
 	 * Count of references to this instance, including the pointer
@@ -212,6 +215,9 @@ struct packed_ref_iterator {
 	 ***********************************/
 	size_t nr;
 	size_t row;
+	size_t prefix_row_end;
+	size_t prefix_i;
+	const char *cur_prefix;
 };
 
 typedef int (*write_ref_fn)(const char *refname,
@@ -308,4 +314,7 @@ struct write_packed_refs_v2_context *create_v2_context(struct packed_ref_store *
 int write_packed_refs_v2(struct write_packed_refs_v2_context *ctx);
 void free_v2_context(struct write_packed_refs_v2_context *ctx);
 
+void init_iterator_prefix_info(const char *prefix,
+			       struct packed_ref_iterator *iter);
+
 #endif /* REFS_PACKED_BACKEND_H */
diff --git a/refs/packed-format-v2.c b/refs/packed-format-v2.c
index d75df9545ec..0ab277f7ad4 100644
--- a/refs/packed-format-v2.c
+++ b/refs/packed-format-v2.c
@@ -14,6 +14,79 @@
 #define PACKED_REFS_SIGNATURE          0x50524546 /* "PREF" */
 #define CHREFS_CHUNKID_OFFSETS         0x524F4646 /* "ROFF" */
 #define CHREFS_CHUNKID_REFS            0x52454653 /* "REFS" */
+#define CHREFS_CHUNKID_PREFIX_DATA     0x50465844 /* "PFXD" */
+#define CHREFS_CHUNKID_PREFIX_OFFSETS  0x5046584F /* "PFXO" */
+
+static const char *get_nth_prefix(struct snapshot *snapshot,
+				  size_t n, size_t *len)
+{
+	uint64_t offset, next_offset;
+
+	if (n >= snapshot->prefixes_nr)
+		BUG("asking for prefix %"PRIu64" outside of bounds (%"PRIu64")",
+		    (uint64_t)n, (uint64_t)snapshot->prefixes_nr);
+
+	if (n)
+		offset = get_be32(snapshot->prefix_offsets_chunk +
+				  2 * sizeof(uint32_t) * (n - 1));
+	else
+		offset = 0;
+
+	if (len) {
+		next_offset = get_be32(snapshot->prefix_offsets_chunk +
+				       2 * sizeof(uint32_t) * n);
+
+		/* Prefix includes null terminator. */
+		*len = next_offset - offset - 1;
+	}
+
+	return snapshot->prefix_chunk + offset;
+}
+
+/*
+ * Find the place in `snapshot->buf` where the start of the record for
+ * `refname` starts. If `mustexist` is true and the reference doesn't
+ * exist, then return NULL. If `mustexist` is false and the reference
+ * doesn't exist, then return the point where that reference would be
+ * inserted, or `snapshot->eof` (which might be NULL) if it would be
+ * inserted at the end of the file. In the latter mode, `refname`
+ * doesn't have to be a proper reference name; for example, one could
+ * search for "refs/replace/" to find the start of any replace
+ * references.
+ *
+ * The record is sought using a binary search, so `snapshot->buf` must
+ * be sorted.
+ */
+static const char *find_prefix_location(struct snapshot *snapshot,
+					const char *refname, size_t *pos)
+{
+	size_t lo = 0, hi = snapshot->prefixes_nr;
+
+	while (lo != hi) {
+		const char *rec;
+		int cmp;
+		size_t len;
+		size_t mid = lo + (hi - lo) / 2;
+
+		rec = get_nth_prefix(snapshot, mid, &len);
+		cmp = strncmp(rec, refname, len);
+		if (cmp < 0) {
+			lo = mid + 1;
+		} else if (cmp > 0) {
+			hi = mid;
+		} else {
+			/* we have a prefix match! */
+			*pos = mid;
+			return rec;
+		}
+	}
+
+	*pos = lo;
+	if (lo < snapshot->prefixes_nr)
+		return get_nth_prefix(snapshot, lo, NULL);
+	else
+		return NULL;
+}
 
 int detect_packed_format_v2_header(struct packed_ref_store *refs,
 				   struct snapshot *snapshot)
@@ -63,6 +136,46 @@ const char *find_reference_location_v2(struct snapshot *snapshot,
 {
 	size_t lo = 0, hi = snapshot->nr;
 
+	if (snapshot->prefix_chunk) {
+		size_t prefix_row;
+		const char *prefix;
+		int found = 1;
+
+		prefix = find_prefix_location(snapshot, refname, &prefix_row);
+
+		if (!prefix || !starts_with(refname, prefix)) {
+			if (mustexist)
+				return NULL;
+			found = 0;
+		}
+
+		/* The second 4-byte column of the prefix offsets */
+		if (prefix_row) {
+			/* if prefix_row == 0, then lo = 0, which is already true. */
+			lo = get_be32(snapshot->prefix_offsets_chunk +
+				2 * sizeof(uint32_t) * (prefix_row - 1) + sizeof(uint32_t));
+		}
+
+		if (!found) {
+			const char *ret;
+			/* Terminate early with this lo position as the insertion point. */
+			if (pos)
+				*pos = lo;
+
+			if (lo >= snapshot->nr)
+				return NULL;
+
+			ret = get_nth_ref(snapshot, lo);
+			return ret;
+		}
+
+		hi = get_be32(snapshot->prefix_offsets_chunk +
+			      2 * sizeof(uint32_t) * prefix_row + sizeof(uint32_t));
+
+		if (prefix)
+			refname += strlen(prefix);
+	}
+
 	while (lo != hi) {
 		const char *rec;
 		int cmp;
@@ -132,6 +245,16 @@ static int packed_refs_read_offsets(const unsigned char *chunk_start,
 	return 0;
 }
 
+static int packed_refs_read_prefix_offsets(const unsigned char *chunk_start,
+					    size_t chunk_size, void *data)
+{
+	struct snapshot *snapshot = data;
+
+	snapshot->prefix_offsets_chunk = chunk_start;
+	snapshot->prefixes_nr = chunk_size / sizeof(uint64_t);
+	return 0;
+}
+
 void fill_snapshot_v2(struct snapshot *snapshot)
 {
 	uint32_t file_signature, file_version, hash_version;
@@ -163,6 +286,9 @@ void fill_snapshot_v2(struct snapshot *snapshot)
 	read_chunk(cf, CHREFS_CHUNKID_OFFSETS, packed_refs_read_offsets, snapshot);
 	pair_chunk(cf, CHREFS_CHUNKID_REFS, (const unsigned char**)&snapshot->refs_chunk);
 
+	read_chunk(cf, CHREFS_CHUNKID_PREFIX_OFFSETS, packed_refs_read_prefix_offsets, snapshot);
+	pair_chunk(cf, CHREFS_CHUNKID_PREFIX_DATA, (const unsigned char**)&snapshot->prefix_chunk);
+
 	/* TODO: add error checks for invalid chunk combinations. */
 
 cleanup:
@@ -187,6 +313,8 @@ int next_record_v2(struct packed_ref_iterator *iter)
 
 	iter->base.flags = REF_ISPACKED;
 
+	if (iter->cur_prefix)
+		strbuf_addstr(&iter->refname_buf, iter->cur_prefix);
 	strbuf_addstr(&iter->refname_buf, pos);
 	iter->base.refname = iter->refname_buf.buf;
 	pos += strlen(pos) + 1;
@@ -221,9 +349,40 @@ int next_record_v2(struct packed_ref_iterator *iter)
 
 	iter->row++;
 
+	if (iter->row == iter->prefix_row_end && iter->snapshot->prefix_chunk) {
+		size_t prefix_pos = get_be32(iter->snapshot->prefix_offsets_chunk +
+					     2 * sizeof(uint32_t) * iter->prefix_i);
+		iter->cur_prefix = iter->snapshot->prefix_chunk + prefix_pos;
+		iter->prefix_i++;
+		iter->prefix_row_end = get_be32(iter->snapshot->prefix_offsets_chunk +
+						2 * sizeof(uint32_t) * iter->prefix_i + sizeof(uint32_t));
+	}
+
 	return ITER_OK;
 }
 
+void init_iterator_prefix_info(const char *prefix,
+			       struct packed_ref_iterator *iter)
+{
+	struct snapshot *snapshot = iter->snapshot;
+
+	if (snapshot->version != 2 || !snapshot->prefix_chunk) {
+		iter->prefix_row_end = snapshot->nr;
+		return;
+	}
+
+	if (prefix)
+		iter->cur_prefix = find_prefix_location(snapshot, prefix, &iter->prefix_i);
+	else {
+		iter->cur_prefix = snapshot->prefix_chunk;
+		iter->prefix_i = 0;
+	}
+
+	iter->prefix_row_end = get_be32(snapshot->prefix_offsets_chunk +
+					2 * sizeof(uint32_t) * iter->prefix_i +
+					sizeof(uint32_t));
+}
+
 struct write_packed_refs_v2_context {
 	struct packed_ref_store *refs;
 	struct string_list *updates;
-- 
gitgitgadget


  parent reply	other threads:[~2022-11-07 18:38 UTC|newest]

Thread overview: 56+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-07 18:35 [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 01/30] hashfile: allow skipping the hash function Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 02/30] read-cache: add index.computeHash config option Derrick Stolee via GitGitGadget
2022-11-11 23:31   ` Elijah Newren
2022-11-14 16:30     ` Derrick Stolee
2022-11-17 16:13   ` Ævar Arnfjörð Bjarmason
2022-11-07 18:35 ` [PATCH 03/30] extensions: add refFormat extension Derrick Stolee via GitGitGadget
2022-11-11 23:39   ` Elijah Newren
2022-11-16 14:37     ` Derrick Stolee
2022-11-07 18:35 ` [PATCH 04/30] config: fix multi-level bulleted list Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 05/30] repository: wire ref extensions to ref backends Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 06/30] refs: allow loose files without packed-refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 07/30] chunk-format: number of chunks is optional Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 08/30] chunk-format: document trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 09/30] chunk-format: store chunk offset during write Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 10/30] chunk-format: allow trailing table of contents Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 11/30] chunk-format: parse " Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 12/30] refs: extract packfile format to new file Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 13/30] packed-backend: extract add_write_error() Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 14/30] packed-backend: extract iterator/updates merge Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 15/30] packed-backend: create abstraction for writing refs Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 16/30] config: add config values for packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 17/30] packed-backend: create shell of v2 writes Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 18/30] packed-refs: write file format version 2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 19/30] packed-refs: read file format v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` Derrick Stolee via GitGitGadget [this message]
2022-11-07 18:35 ` [PATCH 21/30] packed-refs: write prefix chunks Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 22/30] packed-backend: create GIT_TEST_PACKED_REFS_VERSION Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 23/30] t1409: test with packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 24/30] t5312: allow packed-refs v2 format Derrick Stolee via GitGitGadget
2022-11-07 18:35 ` [PATCH 25/30] t5502: add PACKED_REFS_V1 prerequisite Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 26/30] t3210: require packed-refs v1 for some tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 27/30] t*: skip packed-refs v2 over http tests Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 28/30] ci: run GIT_TEST_PACKED_REFS_VERSION=2 in some builds Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 29/30] p1401: create performance test for ref operations Derrick Stolee via GitGitGadget
2022-11-07 18:36 ` [PATCH 30/30] refs: skip hashing when writing packed-refs v2 Derrick Stolee via GitGitGadget
2022-11-09 15:15 ` [PATCH 00/30] [RFC] extensions.refFormat and packed-refs v2 file format Derrick Stolee
2022-11-11 23:28 ` Elijah Newren
2022-11-14  0:07   ` Derrick Stolee
2022-11-15  2:47     ` Elijah Newren
2022-11-16 14:45       ` Derrick Stolee
2022-11-17  4:28         ` Elijah Newren
2022-11-18 23:31     ` Junio C Hamano
2022-11-19  0:41       ` Elijah Newren
2022-11-19  3:00         ` Taylor Blau
2022-11-30 15:31       ` Derrick Stolee
2022-11-28 18:56 ` Han-Wen Nienhuys
2022-11-30 15:16   ` Derrick Stolee
2022-11-30 15:38     ` Phillip Wood
2022-11-30 16:37     ` Taylor Blau
2022-11-30 18:30     ` Han-Wen Nienhuys
2022-11-30 18:37       ` Sean Allred
2022-12-01 20:18       ` Derrick Stolee
2022-12-02 16:46         ` Han-Wen Nienhuys
2022-12-02 18:24           ` Ævar Arnfjörð Bjarmason
2022-11-30 22:55     ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=9b3bd93e51e5ed4358c76263e96c4b4e218987b7.1667846165.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).