git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Michael Haggerty <mhagger@alum.mit.edu>
To: Junio C Hamano <gitster@pobox.com>
Cc: "Stefan Beller" <sbeller@google.com>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Jeff King" <peff@peff.net>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Brandon Williams" <bmwill@google.com>,
	git@vger.kernel.org, "Michael Haggerty" <mhagger@alum.mit.edu>
Subject: [PATCH v2 10/21] mmapped_ref_iterator: add iterator over a packed-refs file
Date: Tue, 19 Sep 2017 08:22:18 +0200	[thread overview]
Message-ID: <aab75b5a61b5b71248267d29c04254b60f4ae385.1505799700.git.mhagger@alum.mit.edu> (raw)
In-Reply-To: <cover.1505799700.git.mhagger@alum.mit.edu>

Add a new `mmapped_ref_iterator`, which can iterate over the
references in an mmapped `packed-refs` file directly. Use this
iterator from `read_packed_refs()` to fill the packed refs cache.

Note that we are not yet willing to promise that the new iterator
generates its output in order. That doesn't matter for now, because
the packed refs cache doesn't care what order it is filled.

This change adds a lot of boilerplate without providing any obvious
benefits. The benefits will come soon, when we get rid of the
`ref_cache` for packed references altogether.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
---
 refs/packed-backend.c | 207 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 152 insertions(+), 55 deletions(-)

diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index ae276f3445..312116a99d 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -163,6 +163,141 @@ static NORETURN void die_invalid_line(const char *path,
 
 }
 
+/*
+ * An iterator over a packed-refs file that is currently mmapped.
+ */
+struct mmapped_ref_iterator {
+	struct ref_iterator base;
+
+	struct packed_ref_cache *packed_refs;
+
+	/* The current position in the mmapped file: */
+	const char *pos;
+
+	/* The end of the mmapped file: */
+	const char *eof;
+
+	struct object_id oid, peeled;
+
+	struct strbuf refname_buf;
+};
+
+static int mmapped_ref_iterator_advance(struct ref_iterator *ref_iterator)
+{
+	struct mmapped_ref_iterator *iter =
+		(struct mmapped_ref_iterator *)ref_iterator;
+	const char *p = iter->pos, *eol;
+
+	strbuf_reset(&iter->refname_buf);
+
+	if (iter->pos == iter->eof)
+		return ref_iterator_abort(ref_iterator);
+
+	iter->base.flags = REF_ISPACKED;
+
+	if (iter->eof - p < GIT_SHA1_HEXSZ + 2 ||
+	    parse_oid_hex(p, &iter->oid, &p) ||
+	    !isspace(*p++))
+		die_invalid_line(iter->packed_refs->refs->path,
+				 iter->pos, iter->eof - iter->pos);
+
+	eol = memchr(p, '\n', iter->eof - p);
+	if (!eol)
+		die_unterminated_line(iter->packed_refs->refs->path,
+				      iter->pos, iter->eof - iter->pos);
+
+	strbuf_add(&iter->refname_buf, p, eol - p);
+	iter->base.refname = iter->refname_buf.buf;
+
+	if (check_refname_format(iter->base.refname, REFNAME_ALLOW_ONELEVEL)) {
+		if (!refname_is_safe(iter->base.refname))
+			die("packed refname is dangerous: %s",
+			    iter->base.refname);
+		oidclr(&iter->oid);
+		iter->base.flags |= REF_BAD_NAME | REF_ISBROKEN;
+	}
+	if (iter->packed_refs->peeled == PEELED_FULLY ||
+	    (iter->packed_refs->peeled == PEELED_TAGS &&
+	     starts_with(iter->base.refname, "refs/tags/")))
+		iter->base.flags |= REF_KNOWS_PEELED;
+
+	iter->pos = eol + 1;
+
+	if (iter->pos < iter->eof && *iter->pos == '^') {
+		p = iter->pos + 1;
+		if (iter->eof - p < GIT_SHA1_HEXSZ + 1 ||
+		    parse_oid_hex(p, &iter->peeled, &p) ||
+		    *p++ != '\n')
+			die_invalid_line(iter->packed_refs->refs->path,
+					 iter->pos, iter->eof - iter->pos);
+		iter->pos = p;
+
+		/*
+		 * Regardless of what the file header said, we
+		 * definitely know the value of *this* reference:
+		 */
+		iter->base.flags |= REF_KNOWS_PEELED;
+	} else {
+		oidclr(&iter->peeled);
+	}
+
+	return ITER_OK;
+}
+
+static int mmapped_ref_iterator_peel(struct ref_iterator *ref_iterator,
+				    struct object_id *peeled)
+{
+	struct mmapped_ref_iterator *iter =
+		(struct mmapped_ref_iterator *)ref_iterator;
+
+	if ((iter->base.flags & REF_KNOWS_PEELED)) {
+		oidcpy(peeled, &iter->peeled);
+		return is_null_oid(&iter->peeled) ? -1 : 0;
+	} else if ((iter->base.flags & (REF_ISBROKEN | REF_ISSYMREF))) {
+		return -1;
+	} else {
+		return !!peel_object(iter->oid.hash, peeled->hash);
+	}
+}
+
+static int mmapped_ref_iterator_abort(struct ref_iterator *ref_iterator)
+{
+	struct mmapped_ref_iterator *iter =
+		(struct mmapped_ref_iterator *)ref_iterator;
+
+	release_packed_ref_cache(iter->packed_refs);
+	strbuf_release(&iter->refname_buf);
+	base_ref_iterator_free(ref_iterator);
+	return ITER_DONE;
+}
+
+static struct ref_iterator_vtable mmapped_ref_iterator_vtable = {
+	mmapped_ref_iterator_advance,
+	mmapped_ref_iterator_peel,
+	mmapped_ref_iterator_abort
+};
+
+struct ref_iterator *mmapped_ref_iterator_begin(
+		const char *packed_refs_file,
+		struct packed_ref_cache *packed_refs,
+		const char *pos, const char *eof)
+{
+	struct mmapped_ref_iterator *iter = xcalloc(1, sizeof(*iter));
+	struct ref_iterator *ref_iterator = &iter->base;
+
+	base_ref_iterator_init(ref_iterator, &mmapped_ref_iterator_vtable, 0);
+
+	iter->packed_refs = packed_refs;
+	acquire_packed_ref_cache(iter->packed_refs);
+	iter->pos = pos;
+	iter->eof = eof;
+	strbuf_init(&iter->refname_buf, 0);
+
+	iter->base.oid = &iter->oid;
+
+	return ref_iterator;
+}
+
 /*
  * Read from the `packed-refs` file into a newly-allocated
  * `packed_ref_cache` and return it. The return value will already
@@ -199,9 +334,10 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 	struct stat st;
 	size_t size;
 	char *buf;
-	const char *pos, *eol, *eof;
-	struct strbuf tmp = STRBUF_INIT;
+	const char *pos, *eof;
 	struct ref_dir *dir;
+	struct ref_iterator *iter;
+	int ok;
 
 	packed_refs->refs = refs;
 	acquire_packed_ref_cache(packed_refs);
@@ -235,7 +371,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 
 	/* If the file has a header line, process it: */
 	if (pos < eof && *pos == '#') {
+		struct strbuf tmp = STRBUF_INIT;
 		char *p;
+		const char *eol;
 		struct string_list traits = STRING_LIST_INIT_NODUP;
 
 		eol = memchr(pos, '\n', eof - pos);
@@ -259,69 +397,28 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 		pos = eol + 1;
 
 		string_list_clear(&traits, 0);
-		strbuf_reset(&tmp);
+		strbuf_release(&tmp);
 	}
 
 	dir = get_ref_dir(packed_refs->cache->root);
-	while (pos < eof) {
-		const char *p = pos;
-		struct object_id oid;
-		const char *refname;
-		int flag = REF_ISPACKED;
-		struct ref_entry *entry = NULL;
-
-		if (eof - pos < GIT_SHA1_HEXSZ + 2 ||
-		    parse_oid_hex(p, &oid, &p) ||
-		    !isspace(*p++))
-			die_invalid_line(refs->path, pos, eof - pos);
+	iter = mmapped_ref_iterator_begin(refs->path, packed_refs, pos, eof);
+	while ((ok = ref_iterator_advance(iter)) == ITER_OK) {
+		struct ref_entry *entry =
+			create_ref_entry(iter->refname, iter->oid, iter->flags);
 
-		eol = memchr(p, '\n', eof - p);
-		if (!eol)
-			die_unterminated_line(refs->path, pos, eof - pos);
-
-		strbuf_add(&tmp, p, eol - p);
-		refname = tmp.buf;
-
-		if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
-			if (!refname_is_safe(refname))
-				die("packed refname is dangerous: %s", refname);
-			oidclr(&oid);
-			flag |= REF_BAD_NAME | REF_ISBROKEN;
-		}
-		if (packed_refs->peeled == PEELED_FULLY ||
-		    (packed_refs->peeled == PEELED_TAGS &&
-		     starts_with(refname, "refs/tags/")))
-			flag |= REF_KNOWS_PEELED;
-		entry = create_ref_entry(refname, &oid, flag);
+		if ((iter->flags & REF_KNOWS_PEELED))
+			ref_iterator_peel(iter, &entry->u.value.peeled);
 		add_ref_entry(dir, entry);
-
-		pos = eol + 1;
-
-		if (pos < eof && *pos == '^') {
-			p = pos + 1;
-			if (eof - p < GIT_SHA1_HEXSZ + 1 ||
-			    parse_oid_hex(p, &entry->u.value.peeled, &p) ||
-			    *p++ != '\n')
-				die_invalid_line(refs->path, pos, eof - pos);
-
-			/*
-			 * Regardless of what the file header said,
-			 * we definitely know the value of *this*
-			 * reference:
-			 */
-			entry->flag |= REF_KNOWS_PEELED;
-
-			pos = p;
-		}
-
-		strbuf_reset(&tmp);
 	}
 
+	if (ok != ITER_DONE)
+		die("error reading packed-refs file %s", refs->path);
+
 	if (munmap(buf, size))
-		die_errno("error ummapping packed-refs file");
+		die_errno("error ummapping packed-refs file %s", refs->path);
+
 	close(fd);
 
-	strbuf_release(&tmp);
 	return packed_refs;
 }
 
-- 
2.14.1


  parent reply	other threads:[~2017-09-19  6:23 UTC|newest]

Thread overview: 40+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-19  6:22 [PATCH v2 00/21] Read `packed-refs` using mmap() Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 01/21] ref_iterator: keep track of whether the iterator output is ordered Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 02/21] prefix_ref_iterator: break when we leave the prefix Michael Haggerty
2017-09-20 20:25   ` Stefan Beller
2017-09-21  4:59     ` Jeff King
2017-09-21 17:29       ` Stefan Beller
2017-09-21  7:42     ` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 03/21] packed_ref_cache: add a backlink to the associated `packed_ref_store` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 04/21] die_unterminated_line(), die_invalid_line(): new functions Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 05/21] read_packed_refs(): use mmap to read the `packed-refs` file Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 06/21] read_packed_refs(): only check for a header at the top of the file Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 07/21] read_packed_refs(): make parsing of the header line more robust Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 08/21] read_packed_refs(): read references with minimal copying Michael Haggerty
2017-09-20 18:27   ` Jeff King
2017-09-21  7:34     ` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 09/21] packed_ref_cache: remember the file-wide peeling state Michael Haggerty
2017-09-19  6:22 ` Michael Haggerty [this message]
2017-09-19  6:22 ` [PATCH v2 11/21] mmapped_ref_iterator_advance(): no peeled value for broken refs Michael Haggerty
2017-09-20 18:29   ` Jeff King
2017-09-19  6:22 ` [PATCH v2 12/21] packed-backend.c: reorder some definitions Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 13/21] packed_ref_cache: keep the `packed-refs` file mmapped if possible Michael Haggerty
2017-09-19 12:44   ` Michael Haggerty
2017-09-24  6:56     ` Junio C Hamano
2017-09-20 18:40   ` Jeff King
2017-09-20 18:51     ` Jeff King
2017-09-21  8:04       ` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 14/21] read_packed_refs(): ensure that references are ordered when read Michael Haggerty
2017-09-20 18:50   ` Jeff King
2017-09-21  8:27     ` Michael Haggerty
2017-09-25 15:44       ` Johannes Schindelin
2017-09-19  6:22 ` [PATCH v2 15/21] packed_ref_iterator_begin(): iterate using `mmapped_ref_iterator` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 16/21] packed_read_raw_ref(): read the reference from the mmapped buffer Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 17/21] ref_store: implement `refs_peel_ref()` generically Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 18/21] packed_ref_store: get rid of the `ref_cache` entirely Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 19/21] ref_cache: remove support for storing peeled values Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 20/21] mmapped_ref_iterator: inline into `packed_ref_iterator` Michael Haggerty
2017-09-19  6:22 ` [PATCH v2 21/21] packed-backend.c: rename a bunch of things and update comments Michael Haggerty
2017-09-19 19:53 ` [PATCH v2 00/21] Read `packed-refs` using mmap() Johannes Schindelin
2017-09-20 18:57 ` Jeff King
2017-09-25 15:55   ` Johannes Schindelin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aab75b5a61b5b71248267d29c04254b60f4ae385.1505799700.git.mhagger@alum.mit.edu \
    --to=mhagger@alum.mit.edu \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=avarab@gmail.com \
    --cc=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).