git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / Atom feed
From: Thomas Gummerer <t.gummerer@gmail.com>
To: git@vger.kernel.org
Cc: trast@inf.ethz.ch, mhagger@alum.mit.edu, gitster@pobox.com,
	pclouds@gmail.com, robin.rosenberg@dewire.com,
	t.gummerer@gmail.com
Subject: [PATCH 17/22] read-cache: read cache-tree in index-v5
Date: Sun,  7 Jul 2013 10:11:55 +0200
Message-ID: <1373184720-29767-18-git-send-email-t.gummerer@gmail.com> (raw)
In-Reply-To: <1373184720-29767-1-git-send-email-t.gummerer@gmail.com>

Since the cache-tree data is saved as part of the directory data,
we already read it at the beginning of the index. The cache-tree
is only converted from this directory data.

The cache-tree data is arranged in a tree, with the children sorted by
pathlen at each node, while the ondisk format is sorted lexically.
So we have to rebuild this format from the on-disk directory list.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache-tree.c    |   2 +-
 cache-tree.h    |   6 ++++
 read-cache-v5.c | 100 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+), 1 deletion(-)

diff --git a/cache-tree.c b/cache-tree.c
index 37e4d00..f4b0917 100644
--- a/cache-tree.c
+++ b/cache-tree.c
@@ -31,7 +31,7 @@ void cache_tree_free(struct cache_tree **it_p)
 	*it_p = NULL;
 }
 
-static int subtree_name_cmp(const char *one, int onelen,
+int subtree_name_cmp(const char *one, int onelen,
 			    const char *two, int twolen)
 {
 	if (onelen < twolen)
diff --git a/cache-tree.h b/cache-tree.h
index 55d0f59..9aac493 100644
--- a/cache-tree.h
+++ b/cache-tree.h
@@ -21,10 +21,16 @@ struct cache_tree {
 	struct cache_tree_sub **down;
 };
 
+struct directory_queue {
+	struct directory_queue *down;
+	struct directory_entry *de;
+};
+
 struct cache_tree *cache_tree(void);
 void cache_tree_free(struct cache_tree **);
 void cache_tree_invalidate_path(struct cache_tree *, const char *);
 struct cache_tree_sub *cache_tree_sub(struct cache_tree *, const char *);
+int subtree_name_cmp(const char *, int, const char *, int);
 
 void cache_tree_write(struct strbuf *, struct cache_tree *root);
 struct cache_tree *cache_tree_read(const char *buffer, unsigned long size);
diff --git a/read-cache-v5.c b/read-cache-v5.c
index 193970a..f1ad132 100644
--- a/read-cache-v5.c
+++ b/read-cache-v5.c
@@ -448,6 +448,103 @@ static int read_conflicts(struct conflict_entry **head,
 	return 0;
 }
 
+static struct cache_tree *convert_one(struct directory_queue *queue, int dirnr)
+{
+	int i, subtree_nr;
+	struct cache_tree *it;
+	struct directory_queue *down;
+
+	it = cache_tree();
+	it->entry_count = queue[dirnr].de->de_nentries;
+	subtree_nr = queue[dirnr].de->de_nsubtrees;
+	if (0 <= it->entry_count)
+		hashcpy(it->sha1, queue[dirnr].de->sha1);
+
+	/*
+	 * Just a heuristic -- we do not add directories that often but
+	 * we do not want to have to extend it immediately when we do,
+	 * hence +2.
+	 */
+	it->subtree_alloc = subtree_nr + 2;
+	it->down = xcalloc(it->subtree_alloc, sizeof(struct cache_tree_sub *));
+	down = queue[dirnr].down;
+	for (i = 0; i < subtree_nr; i++) {
+		struct cache_tree *sub;
+		struct cache_tree_sub *subtree;
+		char *buf, *name;
+
+		name = "";
+		buf = strtok(down[i].de->pathname, "/");
+		while (buf) {
+			name = buf;
+			buf = strtok(NULL, "/");
+		}
+		sub = convert_one(down, i);
+		if(!sub)
+			goto free_return;
+		subtree = cache_tree_sub(it, name);
+		subtree->cache_tree = sub;
+	}
+	if (subtree_nr != it->subtree_nr)
+		die("cache-tree: internal error");
+	return it;
+ free_return:
+	cache_tree_free(&it);
+	return NULL;
+}
+
+static int compare_cache_tree_elements(const void *a, const void *b)
+{
+	const struct directory_entry *de1, *de2;
+
+	de1 = ((const struct directory_queue *)a)->de;
+	de2 = ((const struct directory_queue *)b)->de;
+	return subtree_name_cmp(de1->pathname, de1->de_pathlen,
+				de2->pathname, de2->de_pathlen);
+}
+
+static struct directory_entry *sort_directories(struct directory_entry *de,
+						struct directory_queue *queue)
+{
+	int i, nsubtrees;
+
+	nsubtrees = de->de_nsubtrees;
+	for (i = 0; i < nsubtrees; i++) {
+		struct directory_entry *new_de;
+		de = de->next;
+		new_de = xmalloc(directory_entry_size(de->de_pathlen));
+		memcpy(new_de, de, directory_entry_size(de->de_pathlen));
+		queue[i].de = new_de;
+		if (de->de_nsubtrees) {
+			queue[i].down = xcalloc(de->de_nsubtrees,
+					sizeof(struct directory_queue));
+			de = sort_directories(de,
+					queue[i].down);
+		}
+	}
+	qsort(queue, nsubtrees, sizeof(struct directory_queue),
+			compare_cache_tree_elements);
+	return de;
+}
+
+/*
+ * This function modifys the directory argument that is given to it.
+ * Don't use it if the directory entries are still needed after.
+ */
+static struct cache_tree *cache_tree_convert_v5(struct directory_entry *de)
+{
+	struct directory_queue *queue;
+
+	if (!de->de_nentries)
+		return NULL;
+	queue = xcalloc(1, sizeof(struct directory_queue));
+	queue[0].de = de;
+	queue[0].down = xcalloc(de->de_nsubtrees, sizeof(struct directory_queue));
+
+	sort_directories(de, queue[0].down);
+	return convert_one(queue, 0);
+}
+
 static void resolve_undo_convert_v5(struct index_state *istate,
 				    struct conflict_entry *conflict)
 {
@@ -650,6 +747,7 @@ static int read_index_filtered_v5(struct index_state *istate, void *mmap,
 		} else
 			de = de->next;
 	}
+	istate->cache_tree = cache_tree_convert_v5(root_directory);
 	istate->cache_nr = nr;
 	istate->partially_read = 1;
 	return 0;
@@ -673,6 +771,8 @@ static int read_index_v5(struct index_state *istate, void *mmap,
 		if (read_entries(istate, &de, &entry_offset, &mmap,
 				 mmap_size, &nr, &foffsetblock, &prev) < 0)
 			return -1;
+
+	istate->cache_tree = cache_tree_convert_v5(root_directory);
 	istate->cache_nr = nr;
 	istate->partially_read = 0;
 	return 0;
-- 
1.8.3.453.g1dfc63d

  parent reply	other threads:[~2013-07-07  8:13 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-07  8:11 [PATCH 00/22] Index v5 Thomas Gummerer
2013-07-07  8:11 ` [PATCH 01/22] t2104: Don't fail for index versions other than [23] Thomas Gummerer
2013-07-07  8:11 ` [PATCH 02/22] read-cache: split index file version specific functionality Thomas Gummerer
2013-07-07  8:11 ` [PATCH 03/22] read-cache: move index v2 specific functions to their own file Thomas Gummerer
2013-07-07  8:11 ` [PATCH 04/22] read-cache: Re-read index if index file changed Thomas Gummerer
2013-07-07  8:11 ` [PATCH 05/22] read-cache: add index reading api Thomas Gummerer
2013-07-08  2:01   ` Duy Nguyen
2013-07-08 11:40     ` Thomas Gummerer
2013-07-08  2:19   ` Duy Nguyen
2013-07-08 11:20     ` Thomas Gummerer
2013-07-08 12:45       ` Duy Nguyen
2013-07-08 13:37         ` Thomas Gummerer
2013-07-08 20:54         ` [PATCH 5.5/22] Add documentation for the index api Thomas Gummerer
2013-07-09 15:42           ` Duy Nguyen
2013-07-09 20:10             ` Thomas Gummerer
2013-07-10  5:28               ` Duy Nguyen
2013-07-11 11:30                 ` Thomas Gummerer
2013-07-11 11:42                   ` Duy Nguyen
2013-07-11 12:27                     ` Duy Nguyen
2013-07-08 16:36   ` [PATCH 05/22] read-cache: add index reading api Junio C Hamano
2013-07-08 20:10     ` Thomas Gummerer
2013-07-08 23:09       ` Junio C Hamano
2013-07-09 20:13         ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 06/22] make sure partially read index is not changed Thomas Gummerer
2013-07-08 16:31   ` Junio C Hamano
2013-07-08 18:33     ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 07/22] dir.c: use index api Thomas Gummerer
2013-07-07  8:11 ` [PATCH 08/22] tree.c: " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 09/22] name-hash.c: " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 10/22] grep.c: Use " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 11/22] ls-files.c: use the " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 12/22] read-cache: make read_blob_data_from_index use " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 13/22] documentation: add documentation of the index-v5 file format Thomas Gummerer
2013-07-11 10:39   ` Duy Nguyen
2013-07-11 11:39     ` Thomas Gummerer
2013-07-11 11:47       ` Duy Nguyen
2013-07-11 12:26         ` Thomas Gummerer
2013-07-11 12:50           ` Duy Nguyen
2013-07-07  8:11 ` [PATCH 14/22] read-cache: make in-memory format aware of stat_crc Thomas Gummerer
2013-07-07  8:11 ` [PATCH 15/22] read-cache: read index-v5 Thomas Gummerer
2013-07-07 20:18   ` Eric Sunshine
2013-07-08 11:40     ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 16/22] read-cache: read resolve-undo data Thomas Gummerer
2013-07-07  8:11 ` Thomas Gummerer [this message]
2013-07-07 20:41   ` [PATCH 17/22] read-cache: read cache-tree in index-v5 Eric Sunshine
2013-07-07  8:11 ` [PATCH 18/22] read-cache: write index-v5 Thomas Gummerer
2013-07-07 20:43   ` Eric Sunshine
2013-07-07  8:11 ` [PATCH 19/22] read-cache: write index-v5 cache-tree data Thomas Gummerer
2013-07-07  8:11 ` [PATCH 20/22] read-cache: write resolve-undo data for index-v5 Thomas Gummerer
2013-07-07  8:11 ` [PATCH 21/22] update-index.c: rewrite index when index-version is given Thomas Gummerer
2013-07-07  8:12 ` [PATCH 22/22] p0003-index.sh: add perf test for the index formats Thomas Gummerer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1373184720-29767-18-git-send-email-t.gummerer@gmail.com \
    --to=t.gummerer@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mhagger@alum.mit.edu \
    --cc=pclouds@gmail.com \
    --cc=robin.rosenberg@dewire.com \
    --cc=trast@inf.ethz.ch \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org list mirror (unofficial, one of many)

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 git git/ https://public-inbox.org/git \
		git@vger.kernel.org
	public-inbox-index git

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.io/gmane.comp.version-control.git
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for the project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git