git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: David Michael Barr <b@rr-dav.id.au>
To: Git Mailing List <git@vger.kernel.org>
Cc: David Michael Barr <b@rr-dav.id.au>
Subject: [RFC] pack-objects: compression level for non-blobs
Date: Mon, 26 Nov 2012 17:25:54 +1100	[thread overview]
Message-ID: <1353911154-23495-1-git-send-email-b@rr-dav.id.au> (raw)

Add config pack.graphcompression similar to pack.compression.
Applies to non-blob objects and if unspecified falls back to pack.compression.

We may identify objects compressed with level 0 by their leading bytes.
Use this to force recompression when the source and target levels mismatch.
Limit its application to when the config pack.graphcompression is set.

Signed-off-by: David Michael Barr <b@rr-dav.id.au>
---
 builtin/pack-objects.c | 49 +++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 45 insertions(+), 4 deletions(-)

 I started working on this just before taking a vacation,
 so it's been a little while coming.

 The intent is to allow selective recompression of pack data.
 For small objects/deltas the overhead of deflate is significant.
 This may improve read performance for the object graph.

 I ran some unscientific experiments with the chromium repository.
 With pack.graphcompression = 0, there was a 2.7% increase in pack size.
 I saw a 35% improvement with cold caches and 43% otherwise on git log --raw.

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index f069462..9518daf 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -40,6 +40,7 @@ struct object_entry {
 	unsigned long z_delta_size;	/* delta data size (compressed) */
 	unsigned int hash;	/* name hint hash */
 	enum object_type type;
+	enum object_type actual_type;
 	enum object_type in_pack_type;	/* could be delta */
 	unsigned char in_pack_header_size;
 	unsigned char preferred_base; /* we do not pack this, but is available
@@ -81,6 +82,8 @@ static int num_preferred_base;
 static struct progress *progress_state;
 static int pack_compression_level = Z_DEFAULT_COMPRESSION;
 static int pack_compression_seen;
+static int pack_graph_compression_level = Z_DEFAULT_COMPRESSION;
+static int pack_graph_compression_seen;
 
 static unsigned long delta_cache_size = 0;
 static unsigned long max_delta_cache_size = 256 * 1024 * 1024;
@@ -125,14 +128,14 @@ static void *get_delta(struct object_entry *entry)
 	return delta_buf;
 }
 
-static unsigned long do_compress(void **pptr, unsigned long size)
+static unsigned long do_compress(void **pptr, unsigned long size, int level)
 {
 	git_zstream stream;
 	void *in, *out;
 	unsigned long maxsize;
 
 	memset(&stream, 0, sizeof(stream));
-	git_deflate_init(&stream, pack_compression_level);
+	git_deflate_init(&stream, level);
 	maxsize = git_deflate_bound(&stream, size);
 
 	in = *pptr;
@@ -191,6 +194,18 @@ static unsigned long write_large_blob_data(struct git_istream *st, struct sha1fi
 	return olen;
 }
 
+static int check_pack_compressed(struct packed_git *p,
+		struct pack_window **w_curs,
+		off_t offset)
+{
+	unsigned long avail;
+	int compressed = 0;
+	unsigned char *in = use_pack(p, w_curs, offset, &avail);
+	if (avail >= 3)
+		compressed = !!(in[2] & 0x6);
+	return compressed;
+}
+
 /*
  * we are going to reuse the existing object data as is.  make
  * sure it is not corrupt.
@@ -240,6 +255,8 @@ static void copy_pack_data(struct sha1file *f,
 	}
 }
 
+#define compression_level(type) ((type) && (type) != OBJ_BLOB ? pack_graph_compression_level : pack_compression_level)
+
 /* Return 0 if we will bust the pack-size limit */
 static unsigned long write_no_reuse_object(struct sha1file *f, struct object_entry *entry,
 					   unsigned long limit, int usable_delta)
@@ -286,7 +303,7 @@ static unsigned long write_no_reuse_object(struct sha1file *f, struct object_ent
 	else if (entry->z_delta_size)
 		datalen = entry->z_delta_size;
 	else
-		datalen = do_compress(&buf, size);
+		datalen = do_compress(&buf, size, compression_level(entry->actual_type));
 
 	/*
 	 * The object header is a byte of 'type' followed by zero or
@@ -379,6 +396,13 @@ static unsigned long write_reuse_object(struct sha1file *f, struct object_entry
 	offset += entry->in_pack_header_size;
 	datalen -= entry->in_pack_header_size;
 
+	if (!pack_to_stdout &&
+	    pack_graph_compression_seen &&
+	    check_pack_compressed(p, &w_curs, offset) != !!compression_level(entry->actual_type)) {
+		unuse_pack(&w_curs);
+		return write_no_reuse_object(f, entry, limit, usable_delta);
+	}
+
 	if (!pack_to_stdout && p->index_version == 1 &&
 	    check_pack_inflate(p, &w_curs, offset, datalen, entry->size)) {
 		error("corrupt packed object for %s", sha1_to_hex(entry->idx.sha1));
@@ -955,6 +979,8 @@ static int add_object_entry(const unsigned char *sha1, enum object_type type,
 	memset(entry, 0, sizeof(*entry));
 	hashcpy(entry->idx.sha1, sha1);
 	entry->hash = hash;
+	if (pack_graph_compression_seen)
+		entry->actual_type = sha1_object_info(sha1, NULL);
 	if (type)
 		entry->type = type;
 	if (exclude)
@@ -1758,7 +1784,8 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
 		 */
 		if (entry->delta_data && !pack_to_stdout) {
 			entry->z_delta_size = do_compress(&entry->delta_data,
-							  entry->delta_size);
+							  entry->delta_size,
+							  compression_level(entry->actual_type));
 			cache_lock();
 			delta_cache_size -= entry->delta_size;
 			delta_cache_size += entry->z_delta_size;
@@ -2159,6 +2186,16 @@ static int git_pack_config(const char *k, const char *v, void *cb)
 			    pack_idx_opts.version);
 		return 0;
 	}
+	if (!strcmp(k, "pack.graphcompression")) {
+		int level = git_config_int(k, v);
+		if (level == -1)
+			level = Z_DEFAULT_COMPRESSION;
+		else if (level < 0 || level > Z_BEST_COMPRESSION)
+			die("bad pack graph compression level %d", level);
+		pack_graph_compression_level = level;
+		pack_graph_compression_seen = 1;
+		return 0;
+	}
 	return git_default_config(k, v, cb);
 }
 
@@ -2519,6 +2556,10 @@ int cmd_pack_objects(int argc, const char **argv, const char *prefix)
 	argc = parse_options(argc, argv, prefix, pack_objects_options,
 			     pack_usage, 0);
 
+	/* Fall back after option parsing to catch --compression */
+	if (!pack_graph_compression_seen)
+		pack_graph_compression_level = pack_compression_level;
+
 	if (argc) {
 		base_name = argv[0];
 		argc--;
-- 
1.8.0

             reply	other threads:[~2012-11-26  6:26 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-11-26  6:25 David Michael Barr [this message]
2012-11-26 12:35 ` [RFC] pack-objects: compression level for non-blobs David Michael Barr
2012-12-29  0:41 ` Jeff King
2012-12-29  4:34   ` Nguyen Thai Ngoc Duy
2012-12-29  5:07     ` Jeff King
2012-12-29  5:25       ` Nguyen Thai Ngoc Duy
2012-12-29  5:27         ` Jeff King
2012-12-29  9:05           ` Jeff King
2012-12-29  9:48             ` Jeff King
2012-12-30 12:05           ` Jeff King
2012-12-30 12:53             ` Nguyen Thai Ngoc Duy
2012-12-30 21:31               ` Jeff King
2012-12-31 18:06                 ` Shawn Pearce
2013-01-01  4:15                   ` Duy Nguyen
2013-01-01 12:10                     ` Duy Nguyen
2013-01-01 17:17                       ` Shawn Pearce
2013-01-01 23:47                         ` Junio C Hamano
2013-01-02  2:23                         ` Duy Nguyen
2013-01-01 20:02                       ` Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1353911154-23495-1-git-send-email-b@rr-dav.id.au \
    --to=b@rr-dav.id.au \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).