git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Michael Haggerty <mhagger@alum.mit.edu>
To: Junio C Hamano <gitster@pobox.com>
Cc: "Stefan Beller" <sbeller@google.com>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>,
	"Jeff King" <peff@peff.net>,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Brandon Williams" <bmwill@google.com>,
	git@vger.kernel.org, "Michael Haggerty" <mhagger@alum.mit.edu>
Subject: [PATCH v3 08/21] read_packed_refs(): read references with minimal copying
Date: Mon, 25 Sep 2017 10:00:05 +0200	[thread overview]
Message-ID: <6fc5f32b6c526c9f6a0aa983b977b946723b61e0.1506325610.git.mhagger@alum.mit.edu> (raw)
In-Reply-To: <cover.1506325610.git.mhagger@alum.mit.edu>

Instead of copying data from the `packed-refs` file one line at time
and then processing it, process the data in place as much as possible.

Also, instead of processing one line per iteration of the main loop,
process a reference line plus its corresponding peeled line (if
present) together.

Note that this change slightly tightens up the parsing of the
`packed-refs` file. Previously, the parser would have accepted
multiple "peeled" lines for a single reference (ignoring all but the
last one). Now it would reject that.

Signed-off-by: Michael Haggerty <mhagger@alum.mit.edu>
---
 refs/packed-backend.c | 101 ++++++++++++++++++++------------------------------
 1 file changed, 40 insertions(+), 61 deletions(-)

diff --git a/refs/packed-backend.c b/refs/packed-backend.c
index a45e3ff92f..2b80f244c8 100644
--- a/refs/packed-backend.c
+++ b/refs/packed-backend.c
@@ -134,33 +134,6 @@ static void clear_packed_ref_cache(struct packed_ref_store *refs)
 	}
 }
 
-/* The length of a peeled reference line in packed-refs, including EOL: */
-#define PEELED_LINE_LENGTH 42
-
-/*
- * Parse one line from a packed-refs file.  Write the SHA1 to sha1.
- * Return a pointer to the refname within the line (null-terminated),
- * or NULL if there was a problem.
- */
-static const char *parse_ref_line(struct strbuf *line, struct object_id *oid)
-{
-	const char *ref;
-
-	if (parse_oid_hex(line->buf, oid, &ref) < 0)
-		return NULL;
-	if (!isspace(*ref++))
-		return NULL;
-
-	if (isspace(*ref))
-		return NULL;
-
-	if (line->buf[line->len - 1] != '\n')
-		return NULL;
-	line->buf[--line->len] = 0;
-
-	return ref;
-}
-
 static NORETURN void die_unterminated_line(const char *path,
 					   const char *p, size_t len)
 {
@@ -221,8 +194,7 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 	size_t size;
 	char *buf;
 	const char *pos, *eol, *eof;
-	struct ref_entry *last = NULL;
-	struct strbuf line = STRBUF_INIT;
+	struct strbuf tmp = STRBUF_INIT;
 	enum { PEELED_NONE, PEELED_TAGS, PEELED_FULLY } peeled = PEELED_NONE;
 	struct ref_dir *dir;
 
@@ -264,9 +236,9 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 		if (!eol)
 			die_unterminated_line(refs->path, pos, eof - pos);
 
-		strbuf_add(&line, pos, eol - pos);
+		strbuf_add(&tmp, pos, eol - pos);
 
-		if (!skip_prefix(line.buf, "# pack-refs with:", (const char **)&p))
+		if (!skip_prefix(tmp.buf, "# pack-refs with:", (const char **)&p))
 			die_invalid_line(refs->path, pos, eof - pos);
 
 		string_list_split_in_place(&traits, p, ' ', -1);
@@ -281,61 +253,68 @@ static struct packed_ref_cache *read_packed_refs(struct packed_ref_store *refs)
 		pos = eol + 1;
 
 		string_list_clear(&traits, 0);
-		strbuf_reset(&line);
+		strbuf_reset(&tmp);
 	}
 
 	dir = get_ref_dir(packed_refs->cache->root);
 	while (pos < eof) {
+		const char *p = pos;
 		struct object_id oid;
 		const char *refname;
+		int flag = REF_ISPACKED;
+		struct ref_entry *entry = NULL;
 
-		eol = memchr(pos, '\n', eof - pos);
+		if (eof - pos < GIT_SHA1_HEXSZ + 2 ||
+		    parse_oid_hex(p, &oid, &p) ||
+		    !isspace(*p++))
+			die_invalid_line(refs->path, pos, eof - pos);
+
+		eol = memchr(p, '\n', eof - p);
 		if (!eol)
 			die_unterminated_line(refs->path, pos, eof - pos);
 
-		strbuf_add(&line, pos, eol + 1 - pos);
+		strbuf_add(&tmp, p, eol - p);
+		refname = tmp.buf;
+
+		if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
+			if (!refname_is_safe(refname))
+				die("packed refname is dangerous: %s", refname);
+			oidclr(&oid);
+			flag |= REF_BAD_NAME | REF_ISBROKEN;
+		}
+		if (peeled == PEELED_FULLY ||
+		    (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/")))
+			flag |= REF_KNOWS_PEELED;
+		entry = create_ref_entry(refname, &oid, flag);
+		add_ref_entry(dir, entry);
 
-		refname = parse_ref_line(&line, &oid);
-		if (refname) {
-			int flag = REF_ISPACKED;
+		pos = eol + 1;
+
+		if (pos < eof && *pos == '^') {
+			p = pos + 1;
+			if (eof - p < GIT_SHA1_HEXSZ + 1 ||
+			    parse_oid_hex(p, &entry->u.value.peeled, &p) ||
+			    *p++ != '\n')
+				die_invalid_line(refs->path, pos, eof - pos);
 
-			if (check_refname_format(refname, REFNAME_ALLOW_ONELEVEL)) {
-				if (!refname_is_safe(refname))
-					die("packed refname is dangerous: %s", refname);
-				oidclr(&oid);
-				flag |= REF_BAD_NAME | REF_ISBROKEN;
-			}
-			last = create_ref_entry(refname, &oid, flag);
-			if (peeled == PEELED_FULLY ||
-			    (peeled == PEELED_TAGS && starts_with(refname, "refs/tags/")))
-				last->flag |= REF_KNOWS_PEELED;
-			add_ref_entry(dir, last);
-		} else if (last &&
-		    line.buf[0] == '^' &&
-		    line.len == PEELED_LINE_LENGTH &&
-		    line.buf[PEELED_LINE_LENGTH - 1] == '\n' &&
-		    !get_oid_hex(line.buf + 1, &oid)) {
-			oidcpy(&last->u.value.peeled, &oid);
 			/*
 			 * Regardless of what the file header said,
 			 * we definitely know the value of *this*
 			 * reference:
 			 */
-			last->flag |= REF_KNOWS_PEELED;
-		} else {
-			die_invalid_line(refs->path, line.buf, line.len);
+			entry->flag |= REF_KNOWS_PEELED;
+
+			pos = p;
 		}
 
-		/* The "+ 1" is for the LF character. */
-		pos = eol + 1;
-		strbuf_reset(&line);
+		strbuf_reset(&tmp);
 	}
 
 	if (munmap(buf, size))
 		die_errno("error ummapping packed-refs file");
 	close(fd);
 
-	strbuf_release(&line);
+	strbuf_release(&tmp);
 	return packed_refs;
 }
 
-- 
2.14.1


  parent reply	other threads:[~2017-09-25  8:00 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-09-25  7:59 [PATCH v3 00/21] Read `packed-refs` using mmap() Michael Haggerty
2017-09-25  7:59 ` [PATCH v3 01/21] ref_iterator: keep track of whether the iterator output is ordered Michael Haggerty
2017-09-25  7:59 ` [PATCH v3 02/21] prefix_ref_iterator: break when we leave the prefix Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 03/21] packed_ref_cache: add a backlink to the associated `packed_ref_store` Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 04/21] die_unterminated_line(), die_invalid_line(): new functions Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 05/21] read_packed_refs(): use mmap to read the `packed-refs` file Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 06/21] read_packed_refs(): only check for a header at the top of the file Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 07/21] read_packed_refs(): make parsing of the header line more robust Michael Haggerty
2017-09-25  8:00 ` Michael Haggerty [this message]
2017-09-25  8:00 ` [PATCH v3 09/21] packed_ref_cache: remember the file-wide peeling state Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 10/21] mmapped_ref_iterator: add iterator over a packed-refs file Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 11/21] mmapped_ref_iterator_advance(): no peeled value for broken refs Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 12/21] packed-backend.c: reorder some definitions Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 13/21] packed_ref_cache: keep the `packed-refs` file mmapped if possible Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 14/21] read_packed_refs(): ensure that references are ordered when read Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 15/21] packed_ref_iterator_begin(): iterate using `mmapped_ref_iterator` Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 16/21] packed_read_raw_ref(): read the reference from the mmapped buffer Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 17/21] ref_store: implement `refs_peel_ref()` generically Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 18/21] packed_ref_store: get rid of the `ref_cache` entirely Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 19/21] ref_cache: remove support for storing peeled values Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 20/21] mmapped_ref_iterator: inline into `packed_ref_iterator` Michael Haggerty
2017-09-25  8:00 ` [PATCH v3 21/21] packed-backend.c: rename a bunch of things and update comments Michael Haggerty
2017-09-25 12:22 ` [PATCH v3 00/21] Read `packed-refs` using mmap() Jeff King
2017-09-29  2:13   ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=6fc5f32b6c526c9f6a0aa983b977b946723b61e0.1506325610.git.mhagger@alum.mit.edu \
    --to=mhagger@alum.mit.edu \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=avarab@gmail.com \
    --cc=bmwill@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=pclouds@gmail.com \
    --cc=peff@peff.net \
    --cc=sbeller@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).