git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
To: git@vger.kernel.org
Cc: "Nguyễn Thái Ngọc Duy" <pclouds@gmail.com>
Subject: [PATCH v3 08/28] shallow.c: add mark_new_shallow_refs()
Date: Mon, 25 Nov 2013 10:55:34 +0700	[thread overview]
Message-ID: <1385351754-9954-9-git-send-email-pclouds@gmail.com> (raw)
In-Reply-To: <1385351754-9954-1-git-send-email-pclouds@gmail.com>

When we receive a pack and the shallow points from another repository,
we may need to add more shallow points to current repo to make sure no
commits point to nowhere. But usually we don't want to do so because
(in future) new shallow points invalidate pack bitmaps and we need to
rebuild them again, which is not cheap.

So the default way is we allow ref updates that do not introduce new
shallow points and mark the others. If the user is fine with new
shallow point addition, we accept the marked refs.

But even so we do not blindly accept all shallow points provided. Some
of them might not point to any commits in the new pack. Some might
even do, but those might be unreachable object islands. Only shallow
points that are reachable from old and new refs can stay.

The way it's implemented is paint down from each ref, attach a bitmap
to each commit where one bit represents one ref. In order to avoid
allocating new bitmap for every commit, we try to reuse the same
bitmap for parent commits if possible. This reduces allocation and
leaks deliberately because it's hard to keep/time consuming track how
many pointers to the same buffer.

In a typical push or fetch, the new pack should not carry new shallow
roots. If the current repo does not have any commit islands refered by
the sender's shallow roots either, this function is just a few
has_sha1_file(). So quite cheap.

Once the sender diverts from that path (or the receiver detects
shallow roots attached to commit islands from remove_reachable_shallow_points),
it'll be a lot more expensive. Pack bitmaps won't help this kind of
commit traversal, but commit cache might.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@gmail.com>
---
 commit.h  |   4 ++
 shallow.c | 219 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 223 insertions(+)

diff --git a/commit.h b/commit.h
index 98044e6..e1fd587 100644
--- a/commit.h
+++ b/commit.h
@@ -194,6 +194,7 @@ extern struct commit_list *get_octopus_merge_bases(struct commit_list *in);
 #define INFINITE_DEPTH 0x7fffffff
 
 struct extra_have_objects;
+struct ref;
 extern int register_shallow(const unsigned char *sha1);
 extern int unregister_shallow(const unsigned char *sha1);
 extern int for_each_commit_graft(each_commit_graft_fn, void *);
@@ -209,6 +210,9 @@ extern char *setup_temporary_shallow(void);
 extern void advertise_shallow_grafts(int);
 extern void remove_reachable_shallow_points(struct extra_have_objects *out,
 					    const struct extra_have_objects *in);
+extern int mark_new_shallow_refs(const struct extra_have_objects *ref,
+				 int *ref_status, uint32_t **used,
+				 const struct extra_have_objects *shallow);
 
 int is_descendant_of(struct commit *, struct commit_list *);
 int in_merge_bases(struct commit *, struct commit *);
diff --git a/shallow.c b/shallow.c
index a974d2d..c92a1dc 100644
--- a/shallow.c
+++ b/shallow.c
@@ -4,6 +4,8 @@
 #include "pkt-line.h"
 #include "remote.h"
 #include "refs.h"
+#include "diff.h"
+#include "revision.h"
 
 static int is_shallow = -1;
 static struct stat shallow_stat;
@@ -280,3 +282,220 @@ void remove_reachable_shallow_points(struct extra_have_objects *out,
 	}
 	free(ca.commits);
 }
+
+static int paint_down(const unsigned char *sha1, int id, int nr_bits, int quick)
+{
+	int hit_bottom = 0;
+	unsigned int i, nr;
+	struct commit_list *head = NULL;
+	int bitmap_nr = (nr_bits + 31) / 32;
+	int bitmap_size = bitmap_nr * sizeof(uint32_t);
+	uint32_t *tmp = xmalloc(bitmap_size);
+	uint32_t *bitmap = xcalloc(bitmap_size, sizeof(uint32_t));
+	bitmap[id / 32] |= (1 << (id % 32));
+	commit_list_insert(lookup_commit(sha1), &head);
+	while (head) {
+		struct commit_list *p;
+		struct commit *c = head->item;
+		uint32_t *c_util = c->util;
+
+		p = head;
+		head = head->next;
+		free(p);
+
+		if (c->object.flags & (SEEN | UNINTERESTING))
+			continue;
+		else
+			c->object.flags |= SEEN;
+
+		if (c->util == NULL)
+			c->util = bitmap;
+		else {
+			/*
+			 * Deliberately leak a lot in commit->util
+			 * because there can be many pointers to the
+			 * same bitmap. Probably should allocate in a
+			 * pool and free the whole pool at the end.
+			 */
+			memcpy(tmp, c_util, bitmap_size);
+			for (i = 0; i < bitmap_nr; i++)
+				tmp[i] |= bitmap[i];
+			if (memcmp(tmp, c_util, bitmap_size)) {
+				c->util = xmalloc(bitmap_size);
+				memcpy(c->util, tmp, bitmap_size);
+			}
+		}
+
+		if (c->object.flags & BOTTOM) {
+			hit_bottom = 1;
+			if (quick) {
+				free_commit_list(head);
+				break;
+			} else
+				continue;
+		}
+
+		if (parse_commit(c))
+			die("unable to parse commit %s",
+			    sha1_to_hex(c->object.sha1));
+
+		for (p = c->parents; p; p = p->next) {
+			if (p->item->object.flags & SEEN)
+				continue;
+			if (p->item->util == NULL || p->item->util == c_util)
+				p->item->util = c->util;
+			commit_list_insert(p->item, &head);
+		}
+	}
+
+	nr = get_max_object_index();
+	for (i = 0; i < nr; i++) {
+		struct object *o = get_indexed_object(i);
+		if (o && o->type == OBJ_COMMIT) {
+			o->flags &= ~SEEN;
+		}
+	}
+
+	free(tmp);
+	return hit_bottom;
+}
+
+static int mark_uninteresting(const char *refname,
+			      const unsigned char *sha1,
+			      int flags, void *cb_data)
+{
+	struct commit *commit = lookup_commit(sha1);
+	commit->object.flags |= UNINTERESTING;
+	mark_parents_uninteresting(commit);
+	return 0;
+}
+
+struct saved_util {
+	unsigned char sha1[20];
+	void *util;
+};
+
+/*
+ * Given a set of refs and shallow roots, find out what ref can reach
+ * any of the given roots. If so mark that ref "reject_flag". If
+ * "used" is not NULL, mark all reachable roots. Return how many refs
+ * that need new shallow points.
+ */
+int mark_new_shallow_refs(const struct extra_have_objects *ref,
+			  int *ref_status, uint32_t **used,
+			  const struct extra_have_objects *shallow)
+{
+	struct saved_util *util = NULL;
+	unsigned int i, nr, ret = 0, nr_util = 0, alloc_util = 0;
+
+	/*
+	 * Quick check to see if we may need to add new shallow
+	 * roots. Go through the list of root candidates and check if
+	 * they exist (either in current repo, or in the new pack, we
+	 * can't distinguish).
+	 *
+	 * 1) If none of the new roots exist, the pack must connect to
+	 *    the main object graph, which is already guarded by
+	 *    current repo's shallow roots and we will not need to
+	 *    consider adding new shallow roots, so we can exit early.
+	 *
+	 * 2) The pack may connect to some existing object islands in
+	 *    current repo then add shallow roots to plug loose ends
+	 *    from those islands. In that case, new shallow roots must
+	 *    also exist in the repo as this stage (old objects plus
+	 *    the new pack).
+	 *
+	 * 3) The last, easiest case, is the pack contains some
+	 *    shallow roots, which may be used to tie up loose ends of
+	 *    some new refs, or redundanty (tying up loose ends of new
+	 *    object islands)
+	 */
+	for (i = 0;i < shallow->nr; i++)
+		if (has_sha1_file(shallow->array[i]))
+			break;
+	if (i == shallow->nr)
+		/*
+		 * this is the first and also the common case, where
+		 * the new pack does not carry any new shallow
+		 * points. No need to to the expensive commit traverse
+		 * dance below.
+		 */
+		return 0;
+
+	/*
+	 * Prepare the commit graph to track what refs can reach what
+	 * (new) shallow points.
+	 */
+	nr = get_max_object_index();
+	for (i = 0; i < nr; i++) {
+		struct object *o = get_indexed_object(i);
+		struct commit *c = (struct commit *)o;
+		if (!o || o->type != OBJ_COMMIT)
+			continue;
+
+		o->flags &= ~(UNINTERESTING | BOTTOM | SEEN);
+		/*
+		 * git-fetch makes use of "util" field. Save it and
+		 * restore later. For fetch/clone/push, "nr" should be
+		 * small because rev-list is delayed to pack-objects.
+		 */
+		if (c->util) {
+			ALLOC_GROW(util, nr_util+1, alloc_util);
+			hashcpy(util[nr_util].sha1, o->sha1);
+			util[nr_util].util = c->util;
+			nr_util++;
+			c->util = NULL;
+		}
+	}
+
+	/*
+	 * "--not --all" to cut short the traversal if new refs
+	 * connect to old refs. If not (e.g. force ref updates) it'll
+	 * have to go down to the current shallow roots.
+	 *
+	 * We could detect that a new commit is connected to an
+	 * existing commit by keeping new objects in a pack (i.e. the
+	 * index-pack code path) then check commit origin. If so stop
+	 * short, so we don't need to get to the bottom. But then it
+	 * will not work for case #2 because we need to go through
+	 * some of our commits before reaching new shallow roots.
+	 */
+	head_ref(mark_uninteresting, NULL);
+	for_each_ref(mark_uninteresting, NULL);
+
+	for (i = 0; i < shallow->nr; i++)
+		if (has_sha1_file(shallow->array[i])) {
+			struct commit *c = lookup_commit(shallow->array[i]);
+			c->object.flags |= BOTTOM;
+		}
+
+	for (i = 0; i < ref->nr; i++)
+		if (paint_down(ref->array[i], i, ref->nr, used == NULL)) {
+			if (ref_status)
+				ref_status[i] = 1;
+			ret++;
+		}
+
+	if (used) {
+		for (i = 0; i < shallow->nr; i++) {
+			struct commit *c = lookup_commit(shallow->array[i]);
+			used[i] = c->util;
+		}
+	}
+
+	if (nr_util) {
+		nr = get_max_object_index();
+		for (i = 0; i < nr; i++) {
+			struct object *o = get_indexed_object(i);
+			if (o && o->type == OBJ_COMMIT)
+				((struct commit *)o)->util = NULL;
+		}
+		for (i = 0; i < nr_util; i++) {
+			struct commit *c = lookup_commit(util[i].sha1);
+			c->util = util[i].util;
+		}
+		free(util);
+	}
+
+	return ret;
+}
-- 
1.8.2.83.gc99314b

  parent reply	other threads:[~2013-11-25  3:52 UTC|newest]

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-11-25  3:55 [PATCH v3 00/28] First class shallow clone Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 01/28] transport.h: remove send_pack prototype, already defined in send-pack.h Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 02/28] send-pack: forbid pushing from a shallow repository Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 03/28] clone: prevent --reference to " Nguyễn Thái Ngọc Duy
2013-11-26  5:52   ` Eric Sunshine
2013-11-25  3:55 ` [PATCH v3 04/28] update-server-info: do not publish shallow clones Nguyễn Thái Ngọc Duy
2013-11-25 20:08   ` Junio C Hamano
2013-11-26 12:41     ` Duy Nguyen
2013-11-25  3:55 ` [PATCH v3 05/28] Advertise shallow graft information on the server end Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 06/28] connect.c: teach get_remote_heads to parse "shallow" lines Nguyễn Thái Ngọc Duy
2013-11-25 21:42   ` Junio C Hamano
2013-11-25 22:42     ` Junio C Hamano
2013-11-27 13:02       ` Duy Nguyen
2013-11-25  3:55 ` [PATCH v3 07/28] shallow.c: add remove_reachable_shallow_points() Nguyễn Thái Ngọc Duy
2013-11-25 21:53   ` Junio C Hamano
2013-11-25  3:55 ` Nguyễn Thái Ngọc Duy [this message]
2013-11-25 22:20   ` [PATCH v3 08/28] shallow.c: add mark_new_shallow_refs() Junio C Hamano
2013-11-26 13:18     ` Duy Nguyen
2013-11-26 22:20       ` Junio C Hamano
2013-11-25  3:55 ` [PATCH v3 09/28] shallow.c: extend setup_*_shallow() to accept extra shallow points Nguyễn Thái Ngọc Duy
2013-11-25 22:25   ` Junio C Hamano
2013-11-25  3:55 ` [PATCH v3 10/28] fetch-pack.c: move shallow update code out of fetch_pack() Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 11/28] fetch-pack.h: one statement per bitfield declaration Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 12/28] clone: support remote shallow repository Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 13/28] fetch: support fetching from a " Nguyễn Thái Ngọc Duy
2013-11-27  9:47   ` Eric Sunshine
2013-11-25  3:55 ` [PATCH v3 14/28] upload-pack: make sure deepening preserves shallow roots Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 15/28] fetch: add --update-shallow to get refs that require updating .git/shallow Nguyễn Thái Ngọc Duy
2013-11-27  1:53   ` Eric Sunshine
2013-11-27 12:54     ` Duy Nguyen
2013-11-27 19:04       ` Junio C Hamano
2013-11-25  3:55 ` [PATCH v3 16/28] receive-pack: reorder some code in unpack() Nguyễn Thái Ngọc Duy
2013-12-02 22:25   ` Junio C Hamano
2013-11-25  3:55 ` [PATCH v3 17/28] Support pushing from a shallow clone Nguyễn Thái Ngọc Duy
2013-11-26 20:38   ` Eric Sunshine
2013-11-25  3:55 ` [PATCH v3 18/28] New var GIT_SHALLOW_FILE to propagate --shallow-file to subprocesses Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 19/28] connected.c: add new variant that runs with --shallow-file Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 20/28] receive-pack: allow pushing with new shallow roots Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 21/28] send-pack: support pushing to a shallow clone Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 22/28] remote-curl: pass ref SHA-1 to fetch-pack as well Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 23/28] Support fetch/clone over http Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 24/28] receive-pack: support pushing to a shallow clone via http Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 25/28] send-pack: support pushing from " Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 26/28] git-clone.txt: remove shallow clone limitations Nguyễn Thái Ngọc Duy
2013-11-25  3:55 ` [PATCH v3 27/28] clone: use git protocol for cloning shallow repo locally Nguyễn Thái Ngọc Duy
2013-11-27  1:36   ` Eric Sunshine
2013-11-25  3:55 ` [PATCH v3 28/28] prune: clean .git/shallow after pruning objects Nguyễn Thái Ngọc Duy
2013-12-05 13:02 ` [PATCH v4 00/28] First class shallow clone Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 01/28] transport.h: remove send_pack prototype, already defined in send-pack.h Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 02/28] Replace struct extra_have_objects with struct sha1_array Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 03/28] send-pack: forbid pushing from a shallow repository Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 04/28] clone: prevent --reference to " Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 05/28] Make the sender advertise shallow commits to the receiver Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 06/28] connect.c: teach get_remote_heads to parse "shallow" lines Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 07/28] shallow.c: extend setup_*_shallow() to accept extra shallow commits Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 08/28] shallow.c: the 8 steps to select new commits for .git/shallow Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 09/28] shallow.c: steps 6 and 7 " Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 10/28] fetch-pack.c: move shallow update code out of fetch_pack() Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 11/28] fetch-pack.h: one statement per bitfield declaration Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 12/28] clone: support remote shallow repository Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 13/28] fetch: support fetching from a " Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 14/28] upload-pack: make sure deepening preserves shallow roots Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 15/28] fetch: add --update-shallow to accept refs that update .git/shallow Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 16/28] receive-pack: reorder some code in unpack() Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 17/28] receive/send-pack: support pushing from a shallow clone Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 18/28] New var GIT_SHALLOW_FILE to propagate --shallow-file to subprocesses Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 19/28] connected.c: add new variant that runs with --shallow-file Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 20/28] receive-pack: allow pushes that update .git/shallow Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 21/28] send-pack: support pushing to a shallow clone Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 22/28] remote-curl: pass ref SHA-1 to fetch-pack as well Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 23/28] Support shallow fetch/clone over smart-http Nguyễn Thái Ngọc Duy
2014-01-08 11:25     ` Jeff King
2014-01-08 12:13       ` [PATCH] t5537: fix incorrect expectation in test case 10 Nguyễn Thái Ngọc Duy
2014-01-09 21:57         ` Jeff King
2013-12-05 13:02   ` [PATCH v4 24/28] receive-pack: support pushing to a shallow clone via http Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 25/28] send-pack: support pushing from " Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 26/28] clone: use git protocol for cloning shallow repo locally Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 27/28] prune: clean .git/shallow after pruning objects Nguyễn Thái Ngọc Duy
2013-12-05 13:02   ` [PATCH v4 28/28] git-clone.txt: remove shallow clone limitations Nguyễn Thái Ngọc Duy

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1385351754-9954-9-git-send-email-pclouds@gmail.com \
    --to=pclouds@gmail.com \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).