git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jeff King <peff@peff.net>
To: Junio C Hamano <gitster@pobox.com>
Cc: git@vger.kernel.org
Subject: [PATCH 2/2] cache patch ids on disk
Date: Thu, 4 Aug 2011 16:49:47 -0600	[thread overview]
Message-ID: <20110804224947.GB28215@sigill.intra.peff.net> (raw)
In-Reply-To: <20110804224848.GA27545@sigill.intra.peff.net>

Some workflows may involve running "git cherry" a lot to
look for identical patches. Git ends up calculating the
patch-id of some commits many times, which can be slow.

This patch provides an option to cache the calculated patch
ids persistently on disk. This trades more disk space (and
more RAM used for disk cache) for less CPU time. Whether
this is a good idea depends on your workflow and how much
disk and RAM you have (the cache uses 40 bytes per stored
commit).

Here's one cherry-heavy workflow (checking which topic
branches have been accepted upstream), and some timings:

  have_commits() {
	  test -z "`git cherry "$@" | grep -v ^-`"
  }
  for i in $topic_branches; do
    if have_commits origin/master $i $i@{u}; then
      echo $i: merged to origin/master
    elif have_commits origin/next $i $i@{u}; then
      echo $i: merged to origin/next
    else
      echo $i: not merged
  done

  # without patch
  real    0m9.709s
  user    0m8.693s
  sys     0m0.676s

  # with patch, first run
  real    0m1.946s
  user    0m1.244s
  sys     0m0.428s

  # with patch, subsequent run
  real    0m1.379s
  user    0m0.844s
  sys     0m0.268s

and the disk used:

  $ du -h .git/cache/patch_id/*
  8.0K .git/cache/patch_id/0000000000000000000000000000000000000000

Signed-off-by: Jeff King <peff@peff.net>
---
 cache.h          |    1 +
 config.c         |    5 +++++
 map.c            |   16 ++++++++++++++++
 map.h            |    6 ++++++
 metadata-cache.c |    2 ++
 metadata-cache.h |    2 ++
 patch-ids.c      |   22 +++++++++++++++++++++-
 7 files changed, 53 insertions(+), 1 deletions(-)

diff --git a/cache.h b/cache.h
index 9e12d55..060f0f9 100644
--- a/cache.h
+++ b/cache.h
@@ -596,6 +596,7 @@ extern int read_replace_refs;
 extern int fsync_object_files;
 extern int core_preload_index;
 extern int core_apply_sparse_checkout;
+extern int core_cache_patch_id;
 
 enum branch_track {
 	BRANCH_TRACK_UNSPECIFIED = -1,
diff --git a/config.c b/config.c
index e42c59b..09e84c3 100644
--- a/config.c
+++ b/config.c
@@ -659,6 +659,11 @@ static int git_default_core_config(const char *var, const char *value)
 		return 0;
 	}
 
+	if (!strcmp(var, "core.cachepatchid")) {
+		core_cache_patch_id = git_config_bool(var, value);
+		return 0;
+	}
+
 	/* Add other config variables here and to Documentation/config.txt. */
 	return 0;
 }
diff --git a/map.c b/map.c
index bb0d60a..9d8d5ab 100644
--- a/map.c
+++ b/map.c
@@ -33,6 +33,16 @@ static void disk_to_uint32(const unsigned char *disk, uint32_t *out)
 	*out = ntohl(*out);
 }
 
+static void sha1_to_disk(struct sha1 v, unsigned char *out)
+{
+	hashcpy(out, v.v);
+}
+
+static void disk_to_sha1(const unsigned char *disk, struct sha1 *out)
+{
+	hashcpy(out->v, disk);
+}
+
 static const unsigned char *disk_lookup_sha1(const unsigned char *buf,
 					     unsigned nr,
 					     unsigned ksize, unsigned vsize,
@@ -244,3 +254,9 @@ int map_persist_flush_##name(struct map_persist_##name *m, int fd) \
 
 IMPLEMENT_MAP(object_uint32, obj_equal, hash_obj)
 IMPLEMENT_MAP(object_void, obj_equal, hash_obj)
+
+IMPLEMENT_MAP(object_sha1, obj_equal, hash_obj)
+IMPLEMENT_MAP_PERSIST(object_sha1,
+		      20, obj_to_disk,
+		      20, sha1_to_disk, disk_to_sha1,
+		      disk_lookup_sha1)
diff --git a/map.h b/map.h
index ceddc14..18eb939 100644
--- a/map.h
+++ b/map.h
@@ -40,7 +40,13 @@ extern void map_persist_attach_##name(struct map_persist_##name *, \
 				      unsigned int len); \
 extern int map_persist_flush_##name(struct map_persist_##name *, int fd);
 
+struct sha1 {
+	unsigned char v[20];
+};
+
 DECLARE_MAP(object_uint32, const struct object *, uint32_t)
 DECLARE_MAP(object_void, const struct object *, void *)
+DECLARE_MAP(object_sha1, const struct object *, struct sha1)
+DECLARE_MAP_PERSIST(object_sha1)
 
 #endif /* MAP_H */
diff --git a/metadata-cache.c b/metadata-cache.c
index e217db1..0ce0e90 100644
--- a/metadata-cache.c
+++ b/metadata-cache.c
@@ -124,3 +124,5 @@ int name##_cache_set(map_ktype_##map key, map_vtype_##map value) \
 	init_##name##_cache(); \
 	return map_persist_set_##map(&name##_map, key, value); \
 }
+
+IMPLEMENT_METADATA_CACHE(patch_id, object_sha1, NULL)
diff --git a/metadata-cache.h b/metadata-cache.h
index 851a4eb..ff2f6d3 100644
--- a/metadata-cache.h
+++ b/metadata-cache.h
@@ -7,4 +7,6 @@
 extern int name##_cache_get(map_ktype_##map key, map_vtype_##map *value); \
 extern int name##_cache_set(map_ktype_##map key, map_vtype_##map value);
 
+DECLARE_METADATA_CACHE(patch_id, object_sha1)
+
 #endif /* METADATA_CACHE_H */
diff --git a/patch-ids.c b/patch-ids.c
index 5717257..d1818eb 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -3,17 +3,37 @@
 #include "commit.h"
 #include "sha1-lookup.h"
 #include "patch-ids.h"
+#include "metadata-cache.h"
+
+int core_cache_patch_id;
 
 static int commit_patch_id(struct commit *commit, struct diff_options *options,
 		    unsigned char *sha1)
 {
+	if (core_cache_patch_id) {
+		struct sha1 v;
+		if (patch_id_cache_get(&commit->object, &v)) {
+			hashcpy(sha1, v.v);
+			return 0;
+		}
+	}
+
 	if (commit->parents)
 		diff_tree_sha1(commit->parents->item->object.sha1,
 		               commit->object.sha1, "", options);
 	else
 		diff_root_tree_sha1(commit->object.sha1, "", options);
 	diffcore_std(options);
-	return diff_flush_patch_id(options, sha1);
+	if (diff_flush_patch_id(options, sha1) < 0)
+		return -1;
+
+	if (core_cache_patch_id) {
+		struct sha1 v;
+		hashcpy(v.v, sha1);
+		patch_id_cache_set(&commit->object, v);
+	}
+
+	return 0;
 }
 
 static const unsigned char *patch_id_access(size_t index, void *table)
-- 
1.7.6.34.g86521e

  parent reply	other threads:[~2011-08-04 22:49 UTC|newest]

Thread overview: 57+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-07-13  6:47 [RFC/PATCHv2 0/6] generation numbers for faster traversals Jeff King
2011-07-13  6:57 ` [RFC/PATCHv2 1/6] decorate: allow storing values instead of pointers Jeff King
2011-07-13 17:52   ` Jonathan Nieder
2011-07-13 20:08     ` Jeff King
2011-07-14 17:34       ` Jeff King
2011-07-14 17:51         ` [PATCH 1/3] implement generic key/value map Jeff King
2011-07-14 18:52           ` Bert Wesarg
2011-07-14 18:54             ` Bert Wesarg
2011-07-14 18:55               ` Jeff King
2011-07-14 19:07                 ` Bert Wesarg
2011-07-14 19:14                   ` Jeff King
2011-07-14 19:18                     ` Bert Wesarg
2011-07-14 17:52         ` [PATCH 2/3] fast-export: use object to uint32 map instead of "decorate" Jeff King
2011-07-15  9:40           ` Sverre Rabbelier
2011-07-15 20:00             ` Jeff King
2011-07-14 17:53         ` [PATCH 3/3] decorate: use "map" for the underlying implementation Jeff King
2011-07-14 21:06         ` [RFC/PATCHv2 1/6] decorate: allow storing values instead of pointers Junio C Hamano
2011-08-04 22:43           ` [RFC/PATCH 0/5] macro-based key/value maps Jeff King
2011-08-04 22:45             ` [PATCH 1/5] implement generic key/value map Jeff King
2011-08-04 22:46             ` [PATCH 2/5] fast-export: use object to uint32 map instead of "decorate" Jeff King
2011-08-04 22:46             ` [PATCH 3/5] decorate: use "map" for the underlying implementation Jeff King
2011-08-04 22:46             ` [PATCH 4/5] map: implement persistent maps Jeff King
2011-08-04 22:46             ` [PATCH 5/5] implement metadata cache subsystem Jeff King
2011-08-04 22:48             ` [RFC/PATCH 0/2] patch-id caching Jeff King
2011-08-04 22:49               ` [PATCH 1/2] cherry: read default config Jeff King
2011-08-04 22:49               ` Jeff King [this message]
2011-08-04 22:52                 ` [PATCH 2/2] cache patch ids on disk Jeff King
2011-08-05 11:03             ` [RFC/PATCH 0/5] macro-based key/value maps Jeff King
2011-08-05 15:31               ` René Scharfe
2011-08-06  6:30                 ` Jeff King
2011-07-13  7:04 ` [RFC/PATCHv2 2/6] add metadata-cache infrastructure Jeff King
2011-07-13  8:18   ` Bert Wesarg
2011-07-13  8:31     ` Jeff King
2011-07-13  8:45       ` Bert Wesarg
2011-07-13 19:18         ` Jeff King
2011-07-13 19:40       ` Junio C Hamano
2011-07-13 19:33   ` Junio C Hamano
2011-07-13 20:25     ` Jeff King
2011-07-13  7:05 ` [RFC/PATCHv2 3/6] commit: add commit_generation function Jeff King
2011-07-13 14:26   ` Eric Sunshine
2011-07-13  7:05 ` [RFC/PATCHv2 4/6] pretty: support %G to show the generation number of a commit Jeff King
2011-07-13  7:06 ` [RFC/PATCHv2 5/6] check commit generation cache validity against grafts Jeff King
2011-07-13 14:26   ` Eric Sunshine
2011-07-13 19:35     ` Jeff King
2011-07-13  7:06 ` [RFC/PATCHv2 6/6] limit "contains" traversals based on commit generation Jeff King
2011-07-13  7:23   ` Jeff King
2011-07-13 20:33     ` Junio C Hamano
2011-07-13 20:58       ` Jeff King
2011-07-13 21:12         ` Junio C Hamano
2011-07-13 21:18           ` Jeff King
2011-07-15 18:22   ` Junio C Hamano
2011-07-15 20:40     ` Jeff King
2011-07-15 21:04       ` Junio C Hamano
2011-07-15 21:14         ` Jeff King
2011-07-15 21:01 ` Generation numbers and replacement objects Jakub Narebski
2011-07-15 21:10   ` Jeff King
2011-07-16 21:10     ` Jakub Narebski

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110804224947.GB28215@sigill.intra.peff.net \
    --to=peff@peff.net \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).