git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
From: Thomas Gummerer <t.gummerer@gmail.com>
To: git@vger.kernel.org
Cc: trast@inf.ethz.ch, mhagger@alum.mit.edu, gitster@pobox.com,
	pclouds@gmail.com, robin.rosenberg@dewire.com,
	t.gummerer@gmail.com
Subject: [PATCH 05/22] read-cache: add index reading api
Date: Sun,  7 Jul 2013 10:11:43 +0200	[thread overview]
Message-ID: <1373184720-29767-6-git-send-email-t.gummerer@gmail.com> (raw)
In-Reply-To: <1373184720-29767-1-git-send-email-t.gummerer@gmail.com>

Add an api for access to the index file.  Currently there is only a very
basic api for accessing the index file, which only allows a full read of
the index, and lets the users of the data filter it.  The new index api
gives the users the possibility to use only part of the index and
provides functions for iterating over and accessing cache entries.

This simplifies future improvements to the in-memory format, as changes
will be concentrated on one file, instead of the whole git source code.

Signed-off-by: Thomas Gummerer <t.gummerer@gmail.com>
---
 cache.h         |  57 +++++++++++++++++++++++++++++-
 read-cache-v2.c |  96 +++++++++++++++++++++++++++++++++++++++++++++++--
 read-cache.c    | 108 ++++++++++++++++++++++++++++++++++++++++++++++++++++----
 read-cache.h    |  12 ++++++-
 4 files changed, 263 insertions(+), 10 deletions(-)

diff --git a/cache.h b/cache.h
index 5082b34..d38dfbd 100644
--- a/cache.h
+++ b/cache.h
@@ -127,7 +127,8 @@ struct cache_entry {
 	unsigned int ce_flags;
 	unsigned int ce_namelen;
 	unsigned char sha1[20];
-	struct cache_entry *next;
+	struct cache_entry *next; /* used by name_hash */
+	struct cache_entry *next_ce; /* used to keep a list of cache entries */
 	char name[FLEX_ARRAY]; /* more */
 };
 
@@ -258,6 +259,32 @@ static inline unsigned int canon_mode(unsigned int mode)
 
 #define cache_entry_size(len) (offsetof(struct cache_entry,name) + (len) + 1)
 
+/*
+ * Options by which the index should be filtered when read partially.
+ *
+ * pathspec: The pathspec which the index entries have to match
+ * seen: Used to return the seen parameter from match_pathspec()
+ * max_prefix, max_prefix_len: These variables are set to the longest
+ *     common prefix, the length of the longest common prefix of the
+ *     given pathspec
+ *
+ * read_staged: used to indicate if the conflicted entries (entries
+ *     with a stage) should be included
+ * read_cache_tree: used to indicate if the cache-tree should be read
+ * read_resolve_undo: used to indicate if the resolve undo data should
+ *     be read
+ */
+struct filter_opts {
+	const char **pathspec;
+	char *seen;
+	char *max_prefix;
+	int max_prefix_len;
+
+	int read_staged;
+	int read_cache_tree;
+	int read_resolve_undo;
+};
+
 struct index_state {
 	struct cache_entry **cache;
 	unsigned int version;
@@ -270,6 +297,8 @@ struct index_state {
 	struct hash_table name_hash;
 	struct hash_table dir_hash;
 	struct index_ops *ops;
+	struct internal_ops *internal_ops;
+	struct filter_opts *filter_opts;
 };
 
 extern struct index_state the_index;
@@ -311,6 +340,17 @@ extern void free_name_hash(struct index_state *istate);
 #define unmerge_cache_entry_at(at) unmerge_index_entry_at(&the_index, at)
 #define unmerge_cache(pathspec) unmerge_index(&the_index, pathspec)
 #define read_blob_data_from_cache(path, sz) read_blob_data_from_index(&the_index, (path), (sz))
+
+/* index api */
+#define read_cache_filtered(opts) read_index_filtered(&the_index, (opts))
+#define read_cache_filtered_from(path, opts) read_index_filtered_from(&the_index, (path), (opts))
+#define get_cache_entry_by_name(name, namelen, ce) \
+	get_index_entry_by_name(&the_index, (name), (namelen), (ce))
+#define for_each_cache_entry(fn, cb_data) \
+	for_each_index_entry(&the_index, (fn), (cb_data))
+#define next_cache_entry(ce) next_index_entry(ce)
+#define cache_change_filter_opts(opts) index_change_filter_opts(&the_index, (opts))
+#define sort_cache() sort_index(&the_index)
 #endif
 
 enum object_type {
@@ -438,6 +478,21 @@ extern int init_db(const char *template_dir, unsigned int flags);
 		} \
 	} while (0)
 
+/* index api */
+extern int read_index_filtered(struct index_state *, struct filter_opts *opts);
+extern int read_index_filtered_from(struct index_state *, const char *path, struct filter_opts *opts);
+extern int get_index_entry_by_name(struct index_state *, const char *name, int namelen,
+				   struct cache_entry **ce);
+extern struct cache_entry *next_index_entry(struct cache_entry *ce);
+void index_change_filter_opts(struct index_state *istate, struct filter_opts *opts);
+void sort_index(struct index_state *istate);
+
+typedef int each_cache_entry_fn(struct cache_entry *ce, void *);
+
+extern int for_each_index_entry(struct index_state *istate,
+				each_cache_entry_fn, void *);
+
+
 /* Initialize and use the cache information */
 extern int read_index(struct index_state *);
 extern int read_index_preload(struct index_state *, const char **pathspec);
diff --git a/read-cache-v2.c b/read-cache-v2.c
index a6883c3..1ed640d 100644
--- a/read-cache-v2.c
+++ b/read-cache-v2.c
@@ -3,6 +3,7 @@
 #include "resolve-undo.h"
 #include "cache-tree.h"
 #include "varint.h"
+#include "dir.h"
 
 /* Mask for the name length in ce_flags in the on-disk index */
 #define CE_NAMEMASK  (0x0fff)
@@ -117,6 +118,7 @@ static struct cache_entry *cache_entry_from_ondisk(struct ondisk_cache_entry *on
 	hashcpy(ce->sha1, ondisk->sha1);
 	memcpy(ce->name, name, len);
 	ce->name[len] = '\0';
+	ce->next_ce = NULL;
 	return ce;
 }
 
@@ -207,14 +209,21 @@ static int read_index_extension(struct index_state *istate,
 	return 0;
 }
 
+/*
+ * The performance is the same if we read the whole index or only
+ * part of it, therefore we always read the whole index to avoid
+ * having to re-read it later.  The filter_opts will determine
+ * what part of the index is used when retrieving the cache-entries.
+ */
 static int read_index_v2(struct index_state *istate, void *mmap,
-			 unsigned long mmap_size)
+			 unsigned long mmap_size, struct filter_opts *opts)
 {
 	int i;
 	unsigned long src_offset;
 	struct cache_version_header *hdr;
 	struct cache_header *hdr_v2;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
+	struct cache_entry *prev = NULL;
 
 	hdr = mmap;
 	hdr_v2 = (struct cache_header *)((char *)mmap + sizeof(*hdr));
@@ -237,9 +246,12 @@ static int read_index_v2(struct index_state *istate, void *mmap,
 
 		disk_ce = (struct ondisk_cache_entry *)((char *)mmap + src_offset);
 		ce = create_from_disk(disk_ce, &consumed, previous_name);
+		if (prev)
+			prev->next_ce = ce;
 		set_index_entry(istate, i, ce);
 
 		src_offset += consumed;
+		prev = ce;
 	}
 	strbuf_release(&previous_name_buf);
 
@@ -267,6 +279,16 @@ unmap:
 	die("index file corrupt");
 }
 
+static void index_change_filter_opts_v2(struct index_state *istate, struct filter_opts *opts)
+{
+	/*
+	 * We don't need to re-read anything, because in index v2 we
+	 * read the whole index up front.  Just change the options by
+	 * which the index is filtered when accessing it.
+	 */
+	istate->filter_opts = opts;
+}
+
 #define WRITE_BUFFER_SIZE 8192
 static unsigned char write_buffer[WRITE_BUFFER_SIZE];
 static unsigned long write_buffer_len;
@@ -548,9 +570,79 @@ static int write_index_v2(struct index_state *istate, int newfd)
 	return 0;
 }
 
+int for_each_index_entry_v2(struct index_state *istate, each_cache_entry_fn fn, void *cb_data)
+{
+	int i, ret = 0;
+	struct filter_opts *opts= istate->filter_opts;
+
+	for (i = 0; i < istate->cache_nr; i++) {
+		struct cache_entry *ce = istate->cache[i];
+
+		if (opts && !opts->read_staged && ce_stage(ce))
+			continue;
+
+		if (opts && !match_pathspec(opts->pathspec, ce->name, ce_namelen(ce),
+					    opts->max_prefix_len, opts->seen))
+			continue;
+
+		if ((ret = fn(istate->cache[i], cb_data)))
+			break;
+	}
+	if (opts && !opts->max_prefix) {
+		opts->max_prefix = common_prefix(opts->pathspec);
+		opts->max_prefix_len = opts->max_prefix ? strlen(opts->max_prefix) : 0;
+	}
+
+	return ret;
+}
+
+int get_index_entry_by_name_v2(struct index_state *istate, const char *name, int namelen,
+			       struct cache_entry **ce)
+{
+	int pos = index_name_pos(istate, name, namelen);
+
+	*ce = NULL;
+	if (0 <= pos) {
+		*ce = istate->cache[pos];
+		return 1;
+	}
+	pos = -pos - 1;
+
+	if (pos < istate->cache_nr)
+		*ce = istate->cache[pos];
+	return 0;
+}
+
+static int cmp_cache_name_compare(const void *a_, const void *b_)
+{
+	const struct cache_entry *ce1, *ce2;
+
+	ce1 = *((const struct cache_entry **)a_);
+	ce2 = *((const struct cache_entry **)b_);
+	return cache_name_stage_compare(ce1->name, ce1->ce_namelen, ce_stage(ce1),
+					ce2->name, ce2->ce_namelen, ce_stage(ce2));
+}
+
+void sort_index_v2(struct index_state *istate)
+{
+	/*
+	 * Nuke the cache-tree first, as it will no longer be up to date
+	 */
+	cache_tree_free(&istate->cache_tree);
+	qsort(istate->cache, istate->cache_nr, sizeof(istate->cache[0]),
+	      cmp_cache_name_compare);
+}
+
 struct index_ops v2_ops = {
 	match_stat_basic,
 	verify_hdr,
 	read_index_v2,
-	write_index_v2
+	write_index_v2,
+	index_change_filter_opts_v2
+};
+
+struct internal_ops v2_internal_ops = {
+	for_each_index_entry_v2,
+	get_index_entry_by_name_v2,
+	sort_index_v2
 };
diff --git a/read-cache.c b/read-cache.c
index 3e3a0e2..b30ee75 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -471,6 +471,8 @@ int remove_index_entry_at(struct index_state *istate, int pos)
 	remove_name_hash(istate, ce);
 	istate->cache_changed = 1;
 	istate->cache_nr--;
+	if (pos > 0)
+		istate->cache[pos - 1]->next_ce = ce->next_ce;
 	if (pos >= istate->cache_nr)
 		return 0;
 	memmove(istate->cache + pos,
@@ -490,10 +492,13 @@ void remove_marked_cache_entries(struct index_state *istate)
 	unsigned int i, j;
 
 	for (i = j = 0; i < istate->cache_nr; i++) {
-		if (ce_array[i]->ce_flags & CE_REMOVE)
+		if (ce_array[i]->ce_flags & CE_REMOVE) {
 			remove_name_hash(istate, ce_array[i]);
-		else
+		} else {
+			if (j > 0)
+				ce_array[j - 1]->next_ce = ce_array[i]->next_ce;
 			ce_array[j++] = ce_array[i];
+		}
 	}
 	istate->cache_changed = 1;
 	istate->cache_nr = j;
@@ -996,6 +1001,13 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti
 		memmove(istate->cache + pos + 1,
 			istate->cache + pos,
 			(istate->cache_nr - pos - 1) * sizeof(ce));
+
+	if (pos + 1 >= istate->cache_nr)
+		ce->next_ce = NULL;
+	else
+		ce->next_ce = istate->cache[pos]->next_ce;
+	if (pos > 0)
+		istate->cache[pos - 1]->next_ce = ce;
 	set_index_entry(istate, pos, ce);
 	istate->cache_changed = 1;
 	return 0;
@@ -1272,7 +1284,82 @@ static int verify_hdr_version(struct index_state *istate,
 
 int read_index(struct index_state *istate)
 {
-	return read_index_from(istate, get_index_file());
+	return read_index_filtered_from(istate, get_index_file(), NULL);
+}
+
+int read_index_filtered(struct index_state *istate, struct filter_opts *opts)
+{
+	return read_index_filtered_from(istate, get_index_file(), opts);
+}
+
+int set_internal_ops(struct index_state *istate)
+{
+	if (!istate->internal_ops && istate->cache)
+		istate->internal_ops = &v2_internal_ops;
+	if (!istate->internal_ops)
+		return 0;
+	return 1;
+}
+
+/*
+ * Execute fn for each index entry which is currently in istate.  Data
+ * can be given to the function using the cb_data parameter.
+ */
+int for_each_index_entry(struct index_state *istate, each_cache_entry_fn fn, void *cb_data)
+{
+	if (!set_internal_ops(istate))
+		return 0;
+	return istate->internal_ops->for_each_index_entry(istate, fn, cb_data);
+}
+
+/*
+ * Search for an index entry by its name.
+ * The cache entry is returned using the the ce parameter.
+ * Returns: 1 if a cache-entry was an exact match
+ *          0 if the name of a cache-entry was partially matched.  The
+ *            first cache-entry that matches is returned using the ce
+ *            parameter.  Finding the cache-entry that is needed is left
+ *            to the caller.
+ */
+int get_index_entry_by_name(struct index_state *istate, const char *name, int namelen,
+			    struct cache_entry **ce)
+{
+	if (!set_internal_ops(istate)) {
+		*ce = NULL;
+		return 0;
+	}
+	return istate->internal_ops->get_index_entry_by_name(istate, name, namelen, ce);
+}
+
+/*
+ * Return the next index entry, using the given index entry.  Use this
+ * if there is the need to iterate from a given cache-entry.
+ */
+struct cache_entry *next_index_entry(struct cache_entry *ce)
+{
+	return ce->next_ce;
+}
+
+/*
+ * Sorts the index from an unordered list
+ */
+void sort_index(struct index_state *istate)
+{
+	if (!set_internal_ops(istate))
+		return;
+	istate->internal_ops->sort_index(istate);
+}
+/*
+ * Change the filter_opts, and re-read the index if necessary
+ */
+void index_change_filter_opts(struct index_state *istate, struct filter_opts *opts)
+{
+	if (!istate->ops) {
+		/* Just re-read the index, we haven't read it before */
+		read_index_filtered(istate, opts);
+		return;
+	}
+	istate->ops->index_change_filter_opts(istate, opts);
 }
 
 static int index_changed(struct stat *st_old, struct stat *st_new)
@@ -1295,8 +1382,9 @@ static int index_changed(struct stat *st_old, struct stat *st_new)
 	return 0;
 }
 
-/* remember to discard_cache() before reading a different cache! */
-int read_index_from(struct index_state *istate, const char *path)
+
+int read_index_filtered_from(struct index_state *istate, const char *path,
+			     struct filter_opts *opts)
 {
 	int fd, err, i;
 	struct stat st_old, st_new;
@@ -1337,7 +1425,7 @@ int read_index_from(struct index_state *istate, const char *path)
 		if (istate->ops->verify_hdr(mmap, mmap_size) < 0)
 			err = 1;
 
-		if (istate->ops->read_index(istate, mmap, mmap_size) < 0)
+		if (istate->ops->read_index(istate, mmap, mmap_size, opts) < 0)
 			err = 1;
 		istate->timestamp.sec = st_old.st_mtime;
 		istate->timestamp.nsec = ST_MTIME_NSEC(st_old);
@@ -1345,6 +1433,7 @@ int read_index_from(struct index_state *istate, const char *path)
 			die_errno("cannot stat the open index");
 
 		munmap(mmap, mmap_size);
+		istate->filter_opts = opts;
 		if (!index_changed(&st_old, &st_new) && !err)
 			return istate->cache_nr;
 	}
@@ -1352,6 +1441,13 @@ int read_index_from(struct index_state *istate, const char *path)
 	die("index file corrupt");
 }
 
+
+/* remember to discard_cache() before reading a different cache! */
+int read_index_from(struct index_state *istate, const char *path)
+{
+	return read_index_filtered_from(istate, path, NULL);
+}
+
 int is_index_unborn(struct index_state *istate)
 {
 	return (!istate->cache_nr && !istate->timestamp.sec);
diff --git a/read-cache.h b/read-cache.h
index f31a38b..ce9b79c 100644
--- a/read-cache.h
+++ b/read-cache.h
@@ -24,11 +24,21 @@ struct cache_version_header {
 struct index_ops {
 	int (*match_stat_basic)(const struct cache_entry *ce, struct stat *st, int changed);
 	int (*verify_hdr)(void *mmap, unsigned long size);
-	int (*read_index)(struct index_state *istate, void *mmap, unsigned long mmap_size);
+	int (*read_index)(struct index_state *istate, void *mmap, unsigned long mmap_size,
+			  struct filter_opts *opts);
 	int (*write_index)(struct index_state *istate, int newfd);
+	void (*index_change_filter_opts)(struct index_state *istate, struct filter_opts *opts);
+};
+
+struct internal_ops {
+	int (*for_each_index_entry)(struct index_state *istate, each_cache_entry_fn fn, void *cb_data);
+	int (*get_index_entry_by_name)(struct index_state *istate, const char *name, int namelen,
+				       struct cache_entry **ce);
+	void (*sort_index)(struct index_state *istate);
 };
 
 extern struct index_ops v2_ops;
+extern struct internal_ops v2_internal_ops;
 
 #ifndef NEEDS_ALIGNED_ACCESS
 #define ntoh_s(var) ntohs(var)
-- 
1.8.3.453.g1dfc63d

  parent reply	other threads:[~2013-07-07  8:13 UTC|newest]

Thread overview: 51+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-07-07  8:11 [PATCH 00/22] Index v5 Thomas Gummerer
2013-07-07  8:11 ` [PATCH 01/22] t2104: Don't fail for index versions other than [23] Thomas Gummerer
2013-07-07  8:11 ` [PATCH 02/22] read-cache: split index file version specific functionality Thomas Gummerer
2013-07-07  8:11 ` [PATCH 03/22] read-cache: move index v2 specific functions to their own file Thomas Gummerer
2013-07-07  8:11 ` [PATCH 04/22] read-cache: Re-read index if index file changed Thomas Gummerer
2013-07-07  8:11 ` Thomas Gummerer [this message]
2013-07-08  2:01   ` [PATCH 05/22] read-cache: add index reading api Duy Nguyen
2013-07-08 11:40     ` Thomas Gummerer
2013-07-08  2:19   ` Duy Nguyen
2013-07-08 11:20     ` Thomas Gummerer
2013-07-08 12:45       ` Duy Nguyen
2013-07-08 13:37         ` Thomas Gummerer
2013-07-08 20:54         ` [PATCH 5.5/22] Add documentation for the index api Thomas Gummerer
2013-07-09 15:42           ` Duy Nguyen
2013-07-09 20:10             ` Thomas Gummerer
2013-07-10  5:28               ` Duy Nguyen
2013-07-11 11:30                 ` Thomas Gummerer
2013-07-11 11:42                   ` Duy Nguyen
2013-07-11 12:27                     ` Duy Nguyen
2013-07-08 16:36   ` [PATCH 05/22] read-cache: add index reading api Junio C Hamano
2013-07-08 20:10     ` Thomas Gummerer
2013-07-08 23:09       ` Junio C Hamano
2013-07-09 20:13         ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 06/22] make sure partially read index is not changed Thomas Gummerer
2013-07-08 16:31   ` Junio C Hamano
2013-07-08 18:33     ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 07/22] dir.c: use index api Thomas Gummerer
2013-07-07  8:11 ` [PATCH 08/22] tree.c: " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 09/22] name-hash.c: " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 10/22] grep.c: Use " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 11/22] ls-files.c: use the " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 12/22] read-cache: make read_blob_data_from_index use " Thomas Gummerer
2013-07-07  8:11 ` [PATCH 13/22] documentation: add documentation of the index-v5 file format Thomas Gummerer
2013-07-11 10:39   ` Duy Nguyen
2013-07-11 11:39     ` Thomas Gummerer
2013-07-11 11:47       ` Duy Nguyen
2013-07-11 12:26         ` Thomas Gummerer
2013-07-11 12:50           ` Duy Nguyen
2013-07-07  8:11 ` [PATCH 14/22] read-cache: make in-memory format aware of stat_crc Thomas Gummerer
2013-07-07  8:11 ` [PATCH 15/22] read-cache: read index-v5 Thomas Gummerer
2013-07-07 20:18   ` Eric Sunshine
2013-07-08 11:40     ` Thomas Gummerer
2013-07-07  8:11 ` [PATCH 16/22] read-cache: read resolve-undo data Thomas Gummerer
2013-07-07  8:11 ` [PATCH 17/22] read-cache: read cache-tree in index-v5 Thomas Gummerer
2013-07-07 20:41   ` Eric Sunshine
2013-07-07  8:11 ` [PATCH 18/22] read-cache: write index-v5 Thomas Gummerer
2013-07-07 20:43   ` Eric Sunshine
2013-07-07  8:11 ` [PATCH 19/22] read-cache: write index-v5 cache-tree data Thomas Gummerer
2013-07-07  8:11 ` [PATCH 20/22] read-cache: write resolve-undo data for index-v5 Thomas Gummerer
2013-07-07  8:11 ` [PATCH 21/22] update-index.c: rewrite index when index-version is given Thomas Gummerer
2013-07-07  8:12 ` [PATCH 22/22] p0003-index.sh: add perf test for the index formats Thomas Gummerer

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1373184720-29767-6-git-send-email-t.gummerer@gmail.com \
    --to=t.gummerer@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=mhagger@alum.mit.edu \
    --cc=pclouds@gmail.com \
    --cc=robin.rosenberg@dewire.com \
    --cc=trast@inf.ethz.ch \
    --subject='Re: [PATCH 05/22] read-cache: add index reading api' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).