git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Johan Herland <johan@herland.net>
To: gitster@pobox.com
Cc: git@vger.kernel.org, johan@herland.net,
	Johannes.Schindelin@gmx.de, trast@student.ethz.ch,
	tavestbo@trolltech.com, git@drmicha.warpmail.net,
	chriscool@tuxfamily.org, spearce@spearce.org,
	Johannes Schindelin <johannes.schindelin@gmx.de>
Subject: [PATCHv5 03/14] Speed up git notes lookup
Date: Tue, 08 Sep 2009 04:26:51 +0200	[thread overview]
Message-ID: <1252376822-6138-4-git-send-email-johan@herland.net> (raw)
In-Reply-To: <1252376822-6138-1-git-send-email-johan@herland.net>

From: Johannes Schindelin <Johannes.Schindelin@gmx.de>

To avoid looking up each and every commit in the notes ref's tree
object, which is very expensive, speed things up by slurping the tree
object's contents into a hash_map.

The idea for the hashmap singleton is from David Reiss, initial
benchmarking by Jeff King.

Note: the implementation allows for arbitrary entries in the notes
tree object, ignoring those that do not reference a valid object.  This
allows you to annotate arbitrary branches, or objects.

This patch has been improved by the following contributions:
- Junio C Hamano: fixed an obvious error in initialize_hash_map()

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
Signed-off-by: Johan Herland <johan@herland.net>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 notes.c |  112 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/notes.c b/notes.c
index 401966d..9172154 100644
--- a/notes.c
+++ b/notes.c
@@ -4,15 +4,112 @@
 #include "refs.h"
 #include "utf8.h"
 #include "strbuf.h"
+#include "tree-walk.h"
+
+struct entry {
+	unsigned char commit_sha1[20];
+	unsigned char notes_sha1[20];
+};
+
+struct hash_map {
+	struct entry *entries;
+	off_t count, size;
+};
 
 static int initialized;
+static struct hash_map hash_map;
+
+static int hash_index(struct hash_map *map, const unsigned char *sha1)
+{
+	int i = ((*(unsigned int *)sha1) % map->size);
+
+	for (;;) {
+		unsigned char *current = map->entries[i].commit_sha1;
+
+		if (!hashcmp(sha1, current))
+			return i;
+
+		if (is_null_sha1(current))
+			return -1 - i;
+
+		if (++i == map->size)
+			i = 0;
+	}
+}
+
+static void add_entry(const unsigned char *commit_sha1,
+		const unsigned char *notes_sha1)
+{
+	int index;
+
+	if (hash_map.count + 1 > hash_map.size >> 1) {
+		int i, old_size = hash_map.size;
+		struct entry *old = hash_map.entries;
+
+		hash_map.size = old_size ? old_size << 1 : 64;
+		hash_map.entries = (struct entry *)
+			xcalloc(sizeof(struct entry), hash_map.size);
+
+		for (i = 0; i < old_size; i++)
+			if (!is_null_sha1(old[i].commit_sha1)) {
+				index = -1 - hash_index(&hash_map,
+						old[i].commit_sha1);
+				memcpy(hash_map.entries + index, old + i,
+					sizeof(struct entry));
+			}
+		free(old);
+	}
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0) {
+		index = -1 - index;
+		hash_map.count++;
+	}
+
+	hashcpy(hash_map.entries[index].commit_sha1, commit_sha1);
+	hashcpy(hash_map.entries[index].notes_sha1, notes_sha1);
+}
+
+static void initialize_hash_map(const char *notes_ref_name)
+{
+	unsigned char sha1[20], commit_sha1[20];
+	unsigned mode;
+	struct tree_desc desc;
+	struct name_entry entry;
+	void *buf;
+
+	if (!notes_ref_name || read_ref(notes_ref_name, commit_sha1) ||
+	    get_tree_entry(commit_sha1, "", sha1, &mode))
+		return;
+
+	buf = fill_tree_descriptor(&desc, sha1);
+	if (!buf)
+		die("Could not read %s for notes-index", sha1_to_hex(sha1));
+
+	while (tree_entry(&desc, &entry))
+		if (!get_sha1(entry.path, commit_sha1))
+			add_entry(commit_sha1, entry.sha1);
+	free(buf);
+}
+
+static unsigned char *lookup_notes(const unsigned char *commit_sha1)
+{
+	int index;
+
+	if (!hash_map.size)
+		return NULL;
+
+	index = hash_index(&hash_map, commit_sha1);
+	if (index < 0)
+		return NULL;
+	return hash_map.entries[index].notes_sha1;
+}
 
 void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 		const char *output_encoding)
 {
 	static const char utf8[] = "utf-8";
-	struct strbuf name = STRBUF_INIT;
-	unsigned char sha1[20];
+	unsigned char *sha1;
 	char *msg, *msg_p;
 	unsigned long linelen, msglen;
 	enum object_type type;
@@ -23,17 +120,12 @@ void get_commit_notes(const struct commit *commit, struct strbuf *sb,
 			notes_ref_name = getenv(GIT_NOTES_REF_ENVIRONMENT);
 		else if (!notes_ref_name)
 			notes_ref_name = GIT_NOTES_DEFAULT_REF;
-		if (notes_ref_name && read_ref(notes_ref_name, sha1))
-			notes_ref_name = NULL;
+		initialize_hash_map(notes_ref_name);
 		initialized = 1;
 	}
 
-	if (!notes_ref_name)
-		return;
-
-	strbuf_addf(&name, "%s:%s", notes_ref_name,
-			sha1_to_hex(commit->object.sha1));
-	if (get_sha1(name.buf, sha1))
+	sha1 = lookup_notes(commit->object.sha1);
+	if (!sha1)
 		return;
 
 	if (!(msg = read_sha1_file(sha1, &type, &msglen)) || !msglen ||
-- 
1.6.4.304.g1365c.dirty

  parent reply	other threads:[~2009-09-08  2:28 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-09-08  2:26 [PATCHv5 00/14] git notes Johan Herland
2009-09-08  2:26 ` [PATCHv5 01/14] Introduce commit notes Johan Herland
2009-09-08  2:26 ` [PATCHv5 02/14] Add a script to edit/inspect notes Johan Herland
2009-09-08  2:26 ` Johan Herland [this message]
2009-09-08  2:26 ` [PATCHv5 04/14] Add an expensive test for git-notes Johan Herland
2009-09-08  2:26 ` [PATCHv5 05/14] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-09-08  2:26 ` [PATCHv5 06/14] fast-import: Add support for importing commit notes Johan Herland
2009-09-08  2:26 ` [PATCHv5 07/14] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-09-08  2:26 ` [PATCHv5 08/14] Add flags to get_commit_notes() to control the format of the note string Johan Herland
2009-09-08  2:26 ` [PATCHv5 09/14] Add '%N'-format for pretty-printing commit notes Johan Herland
2009-09-08  2:26 ` [PATCHv5 10/14] Teach notes code to free its internal data structures on request Johan Herland
2009-09-08  2:26 ` [PATCHv5 11/14] Teach the notes lookup code to parse notes trees with various fanout schemes Johan Herland
2009-09-08  2:27 ` [PATCHv5 12/14] Selftests verifying semantics when loading notes trees with various fanouts Johan Herland
2009-09-08  2:27 ` [PATCHv5 13/14] Allow flexible organization of notes trees, using both commit date and SHA1 Johan Herland
2009-09-08  2:27 ` [PATCHv5 14/14] Add test cases for date-based fanouts Johan Herland
2009-09-08  3:12 ` [PATCHv5 00/14] git notes Johan Herland
2009-09-08  4:16   ` Junio C Hamano
2009-09-08  8:54     ` Johan Herland
2009-09-08  9:32       ` Johannes Schindelin
2009-09-08 12:36         ` Johan Herland
2009-09-08 15:53           ` Johannes Schindelin
2009-09-08 22:46             ` Johan Herland
2009-09-10  6:23               ` Stephen R. van den Berg
2009-09-10  9:25           ` Johan Herland
2009-09-08 20:31         ` Junio C Hamano
2009-09-08 21:10           ` Shawn O. Pearce
2009-09-08 21:36             ` Sverre Rabbelier
2009-09-08 21:39               ` Shawn O. Pearce
2009-09-08 21:57                 ` Sverre Rabbelier
2009-09-08 21:40           ` Johan Herland
2009-09-12 15:50   ` Johan Herland
2009-09-12 18:11     ` Shawn O. Pearce
2009-09-12 18:35       ` Johan Herland
2009-09-10 14:00 ` Geert Bosch
2009-09-10 14:09   ` Michael J Gruber
2009-09-10 14:12     ` Geert Bosch
2009-09-12  0:11 ` Junio C Hamano
2009-09-12 15:52   ` Johan Herland
2009-09-12 16:08     ` [PATCHv6 " Johan Herland
2009-09-12 16:08     ` [PATCHv6 01/14] Introduce commit notes Johan Herland
2009-09-12 16:08     ` [PATCHv6 02/14] Add a script to edit/inspect notes Johan Herland
2009-09-12 16:08     ` [PATCHv6 03/14] Speed up git notes lookup Johan Herland
2009-09-12 16:08     ` [PATCHv6 04/14] Add an expensive test for git-notes Johan Herland
2009-09-12 16:08     ` [PATCHv6 05/14] Teach "-m <msg>" and "-F <file>" to "git notes edit" Johan Herland
2009-09-12 16:08     ` [PATCHv6 06/14] fast-import: Add support for importing commit notes Johan Herland
2009-09-12 16:08     ` [PATCHv6 07/14] t3302-notes-index-expensive: Speed up create_repo() Johan Herland
2009-09-12 16:08     ` [PATCHv6 08/14] Add flags to get_commit_notes() to control the format of the note string Johan Herland
2009-09-12 16:08     ` [PATCHv6 09/14] Add '%N'-format for pretty-printing commit notes Johan Herland
2009-09-12 16:08     ` [PATCHv6 10/14] Teach notes code to free its internal data structures on request Johan Herland
2009-09-12 18:40       ` Junio C Hamano
2009-09-12 22:21         ` Johan Herland
2009-09-12 16:08     ` [PATCHv6 11/14] Teach the notes lookup code to parse notes trees with various fanout schemes Johan Herland
2009-09-12 16:08     ` [PATCHv6 12/14] Selftests verifying semantics when loading notes trees with various fanouts Johan Herland
2009-09-12 16:08     ` [PATCHv6 13/14] Allow flexible organization of notes trees, using both commit date and SHA1 Johan Herland
2009-09-12 18:41       ` Junio C Hamano
2009-09-12 22:33         ` Johan Herland
2009-09-12 23:37           ` Junio C Hamano
2009-09-12 16:08     ` [PATCHv6 14/14] Add test cases for various date-based fanouts Johan Herland

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1252376822-6138-4-git-send-email-johan@herland.net \
    --to=johan@herland.net \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=chriscool@tuxfamily.org \
    --cc=git@drmicha.warpmail.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=spearce@spearce.org \
    --cc=tavestbo@trolltech.com \
    --cc=trast@student.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).