git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Junio C Hamano" <gitster@pobox.com>,
	"Ævar Arnfjörð" <avarab@gmail.com>,
	"Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH 13/15] pack-objects: shrink delta_size field in struct object_entry
Date: Sat, 14 Apr 2018 17:35:11 +0200	[thread overview]
Message-ID: <20180414153513.9902-14-pclouds@gmail.com> (raw)
In-Reply-To: <20180414153513.9902-1-pclouds@gmail.com>

Allowing a delta size of 64 bits is crazy. Shrink this field down to
20 bits with one overflow bit.

If we find an existing delta larger than 1MB, we do not cache
delta_size at all and will get the value from oe_size(), potentially
from disk if it's larger than 4GB.

Note, since DELTA_SIZE() is used in try_delta() code, it must be
thread-safe. Luckily oe_size() does guarantee this so we it is
thread-safe.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 builtin/pack-objects.c | 26 ++++++++++++++++----------
 pack-objects.h         | 23 ++++++++++++++++++++++-
 2 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index cccd0f8040..88d2bb8153 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -34,10 +34,12 @@
 #define IN_PACK(obj) oe_in_pack(&to_pack, obj)
 #define SIZE(obj) oe_size(&to_pack, obj)
 #define SET_SIZE(obj,size) oe_set_size(&to_pack, obj, size)
+#define DELTA_SIZE(obj) oe_delta_size(&to_pack, obj)
 #define DELTA(obj) oe_delta(&to_pack, obj)
 #define DELTA_CHILD(obj) oe_delta_child(&to_pack, obj)
 #define DELTA_SIBLING(obj) oe_delta_sibling(&to_pack, obj)
 #define SET_DELTA(obj, val) oe_set_delta(&to_pack, obj, val)
+#define SET_DELTA_SIZE(obj, val) oe_set_delta_size(&to_pack, obj, val)
 #define SET_DELTA_CHILD(obj, val) oe_set_delta_child(&to_pack, obj, val)
 #define SET_DELTA_SIBLING(obj, val) oe_set_delta_sibling(&to_pack, obj, val)
 
@@ -144,7 +146,7 @@ static void *get_delta(struct object_entry *entry)
 		    oid_to_hex(&DELTA(entry)->idx.oid));
 	delta_buf = diff_delta(base_buf, base_size,
 			       buf, size, &delta_size, 0);
-	if (!delta_buf || delta_size != entry->delta_size)
+	if (!delta_buf || delta_size != DELTA_SIZE(entry))
 		die("delta size changed");
 	free(buf);
 	free(base_buf);
@@ -294,14 +296,14 @@ static unsigned long write_no_reuse_object(struct hashfile *f, struct object_ent
 		FREE_AND_NULL(entry->delta_data);
 		entry->z_delta_size = 0;
 	} else if (entry->delta_data) {
-		size = entry->delta_size;
+		size = DELTA_SIZE(entry);
 		buf = entry->delta_data;
 		entry->delta_data = NULL;
 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
 	} else {
 		buf = get_delta(entry);
-		size = entry->delta_size;
+		size = DELTA_SIZE(entry);
 		type = (allow_ofs_delta && DELTA(entry)->idx.offset) ?
 			OBJ_OFS_DELTA : OBJ_REF_DELTA;
 	}
@@ -1509,7 +1511,7 @@ static void check_object(struct object_entry *entry)
 			oe_set_type(entry, entry->in_pack_type);
 			SET_SIZE(entry, in_pack_size); /* delta size */
 			SET_DELTA(entry, base_entry);
-			entry->delta_size = in_pack_size;
+			SET_DELTA_SIZE(entry, in_pack_size);
 			entry->delta_sibling_idx = base_entry->delta_child_idx;
 			SET_DELTA_CHILD(base_entry, entry);
 			unuse_pack(&w_curs);
@@ -1937,7 +1939,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 		max_size = trg_size/2 - 20;
 		ref_depth = 1;
 	} else {
-		max_size = trg_entry->delta_size;
+		max_size = DELTA_SIZE(trg_entry);
 		ref_depth = trg->depth;
 	}
 	max_size = (uint64_t)max_size * (max_depth - src->depth) /
@@ -2006,10 +2008,14 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	delta_buf = create_delta(src->index, trg->data, trg_size, &delta_size, max_size);
 	if (!delta_buf)
 		return 0;
+	if (delta_size >= (1U << OE_DELTA_SIZE_BITS)) {
+		free(delta_buf);
+		return 0;
+	}
 
 	if (DELTA(trg_entry)) {
 		/* Prefer only shallower same-sized deltas. */
-		if (delta_size == trg_entry->delta_size &&
+		if (delta_size == DELTA_SIZE(trg_entry) &&
 		    src->depth + 1 >= trg->depth) {
 			free(delta_buf);
 			return 0;
@@ -2024,7 +2030,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	free(trg_entry->delta_data);
 	cache_lock();
 	if (trg_entry->delta_data) {
-		delta_cache_size -= trg_entry->delta_size;
+		delta_cache_size -= DELTA_SIZE(trg_entry);
 		trg_entry->delta_data = NULL;
 	}
 	if (delta_cacheable(src_size, trg_size, delta_size)) {
@@ -2037,7 +2043,7 @@ static int try_delta(struct unpacked *trg, struct unpacked *src,
 	}
 
 	SET_DELTA(trg_entry, src_entry);
-	trg_entry->delta_size = delta_size;
+	SET_DELTA_SIZE(trg_entry, delta_size);
 	trg->depth = src->depth + 1;
 
 	return 1;
@@ -2160,11 +2166,11 @@ static void find_deltas(struct object_entry **list, unsigned *list_size,
 		if (entry->delta_data && !pack_to_stdout) {
 			unsigned long size;
 
-			size = do_compress(&entry->delta_data, entry->delta_size);
+			size = do_compress(&entry->delta_data, DELTA_SIZE(entry));
 			if (size < (1U << OE_Z_DELTA_BITS)) {
 				entry->z_delta_size = size;
 				cache_lock();
-				delta_cache_size -= entry->delta_size;
+				delta_cache_size -= DELTA_SIZE(entry);
 				delta_cache_size += entry->z_delta_size;
 				cache_unlock();
 			} else {
diff --git a/pack-objects.h b/pack-objects.h
index ee2c7ab382..1c588184b2 100644
--- a/pack-objects.h
+++ b/pack-objects.h
@@ -12,6 +12,7 @@
  * above this limit. Don't lower it too much.
  */
 #define OE_SIZE_BITS		31
+#define OE_DELTA_SIZE_BITS	20
 
 /*
  * State flags for depth-first search used for analyzing delta cycles.
@@ -85,7 +86,8 @@ struct object_entry {
 				     * uses the same base as me
 				     */
 	void *delta_data;	/* cached delta (uncompressed) */
-	unsigned long delta_size;	/* delta data size (uncompressed) */
+	unsigned delta_size_:OE_DELTA_SIZE_BITS; /* delta data size (uncompressed) */
+	unsigned delta_size_valid:1;
 	unsigned z_delta_size:OE_Z_DELTA_BITS;
 	unsigned type_:TYPE_BITS;
 	unsigned in_pack_type:TYPE_BITS; /* could be delta */
@@ -309,4 +311,23 @@ static inline void oe_set_size(struct packing_data *pack,
 	}
 }
 
+static inline unsigned long oe_delta_size(struct packing_data *pack,
+					  const struct object_entry *e)
+{
+	if (e->delta_size_valid)
+		return e->delta_size_;
+	return oe_size(pack, e);
+}
+
+static inline void oe_set_delta_size(struct packing_data *pack,
+				     struct object_entry *e,
+				     unsigned long size)
+{
+	e->delta_size_ = size;
+	e->delta_size_valid = e->delta_size_ == size;
+	if (!e->delta_size_valid && size != oe_size(pack, e))
+		BUG("this can only happen in check_object() "
+		    "where delta size is the same as entry size");
+}
+
 #endif
-- 
2.17.0.367.g5dd2e386c3


  parent reply	other threads:[~2018-04-14 15:35 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-14 15:34 [PATCH 00/15] nd/pack-objects-pack-struct update Nguyễn Thái Ngọc Duy
2018-04-14 15:34 ` [PATCH 01/15] read-cache.c: make $GIT_TEST_SPLIT_INDEX boolean Nguyễn Thái Ngọc Duy
2018-04-14 19:53   ` Ævar Arnfjörð Bjarmason
2018-04-14 15:35 ` [PATCH 02/15] pack-objects: a bit of document about struct object_entry Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 03/15] pack-objects: turn type and in_pack_type to bitfields Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 04/15] pack-objects: use bitfield for object_entry::dfs_state Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 05/15] pack-objects: use bitfield for object_entry::depth Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 06/15] pack-objects: move in_pack_pos out of struct object_entry Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 07/15] pack-objects: move in_pack " Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 08/15] pack-objects: refer to delta objects by index instead of pointer Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 09/15] pack-objects: shrink z_delta_size field in struct object_entry Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 10/15] pack-objects: don't check size when the object is bad Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 11/15] pack-objects: clarify the use of object_entry::size Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 12/15] pack-objects: shrink size field in struct object_entry Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` Nguyễn Thái Ngọc Duy [this message]
2018-04-14 15:35 ` [PATCH 14/15] pack-objects: reorder members to shrink " Nguyễn Thái Ngọc Duy
2018-04-14 15:35 ` [PATCH 15/15] ci: exercise the whole test suite with uncommon code in pack-objects Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180414153513.9902-14-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=avarab@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).