git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>, gitster@pobox.com
Subject: [PATCH v2 5/7] index-pack: calculate {ref,ofs}_{first,last} early
Date: Tue,  8 Sep 2020 12:48:33 -0700	[thread overview]
Message-ID: <cb0b3738dd0bc018e7f2677ca3b172364bf18961.1599594441.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1599594441.git.jonathantanmy@google.com>

This is refactoring 2 of 2 to simplify struct base_data.

Whenever we make a struct base_data, immediately calculate its delta
children. This eliminates confusion as to when the
{ref,ofs}_{first,last} fields are initialized.

Before this patch, the delta children were calculated at the last
possible moment. This allowed the members of struct base_data to be
populated in any order, superficially useful when we have the object
contents before the struct object_entry. But this makes reasoning about
the state of struct base_data more complicated, hence this patch.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
---
 builtin/index-pack.c | 123 +++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 63 deletions(-)

diff --git a/builtin/index-pack.c b/builtin/index-pack.c
index c8db464557..94d0f53b03 100644
--- a/builtin/index-pack.c
+++ b/builtin/index-pack.c
@@ -33,12 +33,15 @@ struct object_stat {
 };
 
 struct base_data {
+	/* Initialized by make_base(). */
 	struct base_data *base;
 	struct object_entry *obj;
-	void *data;
-	unsigned long size;
 	int ref_first, ref_last;
 	int ofs_first, ofs_last;
+
+	/* Not initialized by make_base(). */
+	void *data;
+	unsigned long size;
 };
 
 struct thread_local {
@@ -362,14 +365,6 @@ static void set_thread_data(struct thread_local *data)
 		pthread_setspecific(key, data);
 }
 
-static struct base_data *alloc_base_data(void)
-{
-	struct base_data *base = xcalloc(1, sizeof(struct base_data));
-	base->ref_last = -1;
-	base->ofs_last = -1;
-	return base;
-}
-
 static void free_base_data(struct base_data *c)
 {
 	if (c->data) {
@@ -406,19 +401,6 @@ static void prune_base_data(struct base_data *youngest_child)
 	free(ancestry);
 }
 
-static void link_base_data(struct base_data *base, struct base_data *c)
-{
-	c->base = base;
-	if (c->data)
-		get_thread_data()->base_cache_used += c->size;
-	prune_base_data(c);
-}
-
-static void unlink_base_data(struct base_data *c)
-{
-	free_base_data(c);
-}
-
 static int is_delta_type(enum object_type type)
 {
 	return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA);
@@ -929,10 +911,25 @@ static void *get_base_data(struct base_data *c)
 	return c->data;
 }
 
-static void resolve_delta(struct object_entry *delta_obj,
-			  struct base_data *base, struct base_data *result)
+static struct base_data *make_base(struct object_entry *obj,
+				   struct base_data *parent)
 {
-	void *base_data, *delta_data;
+	struct base_data *base = xcalloc(1, sizeof(struct base_data));
+	base->base = parent;
+	base->obj = obj;
+	find_ref_delta_children(&obj->idx.oid,
+				&base->ref_first, &base->ref_last);
+	find_ofs_delta_children(obj->idx.offset,
+				&base->ofs_first, &base->ofs_last);
+	return base;
+}
+
+static struct base_data *resolve_delta(struct object_entry *delta_obj,
+				       struct base_data *base)
+{
+	void *base_data, *delta_data, *result_data;
+	struct base_data *result;
+	unsigned long result_size;
 
 	if (show_stat) {
 		int i = delta_obj - objects;
@@ -946,19 +943,31 @@ static void resolve_delta(struct object_entry *delta_obj,
 	}
 	delta_data = get_data_from_pack(delta_obj);
 	base_data = get_base_data(base);
-	result->obj = delta_obj;
-	result->data = patch_delta(base_data, base->size,
-				   delta_data, delta_obj->size, &result->size);
+	result_data = patch_delta(base_data, base->size,
+				  delta_data, delta_obj->size, &result_size);
 	free(delta_data);
-	if (!result->data)
+	if (!result_data)
 		bad_object(delta_obj->idx.offset, _("failed to apply delta"));
-	hash_object_file(the_hash_algo, result->data, result->size,
+	hash_object_file(the_hash_algo, result_data, result_size,
 			 type_name(delta_obj->real_type), &delta_obj->idx.oid);
-	sha1_object(result->data, NULL, result->size, delta_obj->real_type,
+	sha1_object(result_data, NULL, result_size, delta_obj->real_type,
 		    &delta_obj->idx.oid);
+
+	result = make_base(delta_obj, base);
+	if (result->ref_last == -1 && result->ofs_last == -1) {
+		free(result_data);
+	} else {
+		result->data = result_data;
+		result->size = result_size;
+		get_thread_data()->base_cache_used += result->size;
+		prune_base_data(result);
+	}
+
 	counter_lock();
 	nr_resolved_deltas++;
 	counter_unlock();
+
+	return result;
 }
 
 /*
@@ -984,24 +993,9 @@ static int compare_and_swap_type(signed char *type,
 static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 						  struct base_data *prev_base)
 {
-	if (base->ref_last == -1 && base->ofs_last == -1) {
-		find_ref_delta_children(&base->obj->idx.oid,
-					&base->ref_first, &base->ref_last);
-
-		find_ofs_delta_children(base->obj->idx.offset,
-					&base->ofs_first, &base->ofs_last);
-
-		if (base->ref_last == -1 && base->ofs_last == -1) {
-			free(base->data);
-			return NULL;
-		}
-
-		link_base_data(prev_base, base);
-	}
-
 	if (base->ref_first <= base->ref_last) {
 		struct object_entry *child = objects + ref_deltas[base->ref_first].obj_no;
-		struct base_data *result = alloc_base_data();
+		struct base_data *result;
 
 		if (!compare_and_swap_type(&child->real_type, OBJ_REF_DELTA,
 					   base->obj->real_type))
@@ -1009,7 +1003,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 			    (uintmax_t)child->idx.offset,
 			    oid_to_hex(&base->obj->idx.oid));
 
-		resolve_delta(child, base, result);
+		result = resolve_delta(child, base);
 		if (base->ref_first == base->ref_last && base->ofs_last == -1)
 			free_base_data(base);
 
@@ -1019,11 +1013,11 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 
 	if (base->ofs_first <= base->ofs_last) {
 		struct object_entry *child = objects + ofs_deltas[base->ofs_first].obj_no;
-		struct base_data *result = alloc_base_data();
+		struct base_data *result;
 
 		assert(child->real_type == OBJ_OFS_DELTA);
 		child->real_type = base->obj->real_type;
-		resolve_delta(child, base, result);
+		result = resolve_delta(child, base);
 		if (base->ofs_first == base->ofs_last)
 			free_base_data(base);
 
@@ -1031,7 +1025,7 @@ static struct base_data *find_unresolved_deltas_1(struct base_data *base,
 		return result;
 	}
 
-	unlink_base_data(base);
+	free_base_data(base);
 	return NULL;
 }
 
@@ -1074,9 +1068,8 @@ static int compare_ref_delta_entry(const void *a, const void *b)
 
 static void resolve_base(struct object_entry *obj)
 {
-	struct base_data *base_obj = alloc_base_data();
-	base_obj->obj = obj;
-	base_obj->data = NULL;
+	struct base_data *base_obj = make_base(obj, NULL);
+
 	find_unresolved_deltas(base_obj);
 }
 
@@ -1369,22 +1362,26 @@ static void fix_unresolved_deltas(struct hashfile *f)
 	for (i = 0; i < nr_ref_deltas; i++) {
 		struct ref_delta_entry *d = sorted_by_pos[i];
 		enum object_type type;
-		struct base_data *base_obj = alloc_base_data();
+		struct base_data *base;
+		void *data;
+		unsigned long size;
+		struct object_entry *obj;
 
 		if (objects[d->obj_no].real_type != OBJ_REF_DELTA)
 			continue;
-		base_obj->data = read_object_file(&d->oid, &type,
-						  &base_obj->size);
-		if (!base_obj->data)
+		data = read_object_file(&d->oid, &type, &size);
+		if (!data)
 			continue;
 
 		if (check_object_signature(the_repository, &d->oid,
-					   base_obj->data, base_obj->size,
+					   data, size,
 					   type_name(type)))
 			die(_("local object %s is corrupt"), oid_to_hex(&d->oid));
-		base_obj->obj = append_obj_to_pack(f, d->oid.hash,
-					base_obj->data, base_obj->size, type);
-		find_unresolved_deltas(base_obj);
+		obj = append_obj_to_pack(f, d->oid.hash, data, size, type);
+		base = make_base(obj, NULL);
+		base->data = data;
+		base->size = size;
+		find_unresolved_deltas(base);
 		display_progress(progress, nr_resolved_deltas);
 	}
 	free(sorted_by_pos);
-- 
2.28.0.526.ge36021eeef-goog


  parent reply	other threads:[~2020-09-08 19:50 UTC|newest]

Thread overview: 29+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-08-24 19:16 [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Jonathan Tan
2020-08-24 19:16 ` [PATCH 1/7] Documentation: deltaBaseCacheLimit is per-thread Jonathan Tan
2020-08-24 19:16 ` [PATCH] fetch-pack: in partial clone, pass --promisor Jonathan Tan
2020-08-24 19:36   ` Jonathan Tan
2020-08-24 19:16 ` [PATCH 2/7] index-pack: remove redundant parameter Jonathan Tan
2020-08-24 21:01   ` Junio C Hamano
2020-08-24 19:16 ` [PATCH 3/7] index-pack: unify threaded and unthreaded code Jonathan Tan
2020-08-24 21:11   ` Junio C Hamano
2020-08-24 19:16 ` [PATCH 4/7] index-pack: remove redundant child field Jonathan Tan
2020-08-24 19:16 ` [PATCH 5/7] index-pack: calculate {ref,ofs}_{first,last} early Jonathan Tan
2020-08-24 19:16 ` [PATCH 6/7] index-pack: make resolve_delta() assume base data Jonathan Tan
2020-08-24 19:16 ` [PATCH 7/7] index-pack: make quantum of work smaller Jonathan Tan
2020-08-24 21:19   ` Junio C Hamano
2020-08-24 20:47 ` [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Junio C Hamano
2020-08-24 21:27 ` [PATCH] fixup! index-pack: make quantum of work smaller Jonathan Tan
2020-08-24 22:08 ` [PATCH 0/7] Better threaded delta resolution in index-pack (another try) Jeff King
2020-08-25 18:11   ` Jonathan Tan
2020-08-25 21:18     ` Jeff King
2020-08-25 21:46       ` Jeff King
2020-09-08 19:48 ` [PATCH v2 " Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 1/7] Documentation: deltaBaseCacheLimit is per-thread Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 2/7] index-pack: remove redundant parameter Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 3/7] index-pack: unify threaded and unthreaded code Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 4/7] index-pack: remove redundant child field Jonathan Tan
2020-09-08 19:48   ` Jonathan Tan [this message]
2020-09-08 19:48   ` [PATCH v2 6/7] index-pack: make resolve_delta() assume base data Jonathan Tan
2020-09-08 19:48   ` [PATCH v2 7/7] index-pack: make quantum of work smaller Jonathan Tan
2020-09-08 22:53   ` [PATCH v2 0/7] Better threaded delta resolution in index-pack (another try) Junio C Hamano
  -- strict thread matches above, loose matches on Subject: below --
2019-10-09 23:44 [RFC PATCH 0/6] Better threaded delta resolution in index-pack Jonathan Tan
2019-10-17 20:17 ` [PATCH v2 0/7] " Jonathan Tan
2019-10-17 20:17   ` [PATCH v2 5/7] index-pack: calculate {ref,ofs}_{first,last} early Jonathan Tan

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=cb0b3738dd0bc018e7f2677ca3b172364bf18961.1599594441.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).