git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Junio C Hamano <junkio@cox.net>
To: Johannes Schindelin <Johannes.Schindelin@gmx.de>
Cc: git@vger.kernel.org
Subject: [PATCH] Shallow clone: low level machinery.
Date: Tue, 31 Jan 2006 03:02:37 -0800	[thread overview]
Message-ID: <7vmzhc1wz6.fsf_-_@assigned-by-dhcp.cox.net> (raw)
In-Reply-To: <7v8xsxa70o.fsf@assigned-by-dhcp.cox.net> (Junio C. Hamano's message of "Mon, 30 Jan 2006 10:46:15 -0800")

This adds --shallow=refname option to git-clone-pack, and
extends upload-pack protocol with "shallow" extension.

An example:

	$ mkdir junk && cd junk && git init-db
	$ git clone-pack --shallow=refs/heads/master ../git.git master

This creates a very shallow clone of my repository.  It says
"pretend refs/heads/master commit is the beginning of time, and
clone your master branch".  As before, clone-pack with explicit
head name outputs the commit object name and refname to the
standard output instead of creating the branch.  The command
creates a .git/info/grafts file to cauterize the history at that
commit as well.

I think upload-pack side is more or less ready to be debugged,
but the client side is highly experimental.  It has quite
serious limitations and is more of a proof of correctness at the
protocol extension level than for practical use:

 - Currently it can take only one ---shallow option.

 - It has to be spelled in full (refs/heads/master, not
   "master").

 - It has to be included as part of explicit refname list.

 - There is no matching --shallow in git-fetch-pack.

Signed-off-by: Junio C Hamano <junkio@cox.net>

---

 cache.h       |    9 +++
 clone-pack.c  |   69 ++++++++++++++++++++++-
 commit-tree.c |    5 --
 commit.c      |  174 +++++++++++++++++++++++++++++++++++++++------------------
 commit.h      |   14 +++++
 connect.c     |   24 ++++++++
 object.c      |    7 ++
 object.h      |    2 +
 upload-pack.c |   94 +++++++++++++++++++++++++++++--
 9 files changed, 331 insertions(+), 67 deletions(-)

75f1f4871277f403991c771eb642bdbd6fe82021
diff --git a/cache.h b/cache.h
index bdbe2d6..18d4cdb 100644
--- a/cache.h
+++ b/cache.h
@@ -111,11 +111,18 @@ static inline unsigned int create_ce_mod
 extern struct cache_entry **active_cache;
 extern unsigned int active_nr, active_alloc, active_cache_changed;
 
+/*
+ * Having more than two parents is not strange at all, and this is
+ * how multi-way merges are represented.
+ */
+#define MAXPARENT (16)
+
 #define GIT_DIR_ENVIRONMENT "GIT_DIR"
 #define DEFAULT_GIT_DIR_ENVIRONMENT ".git"
 #define DB_ENVIRONMENT "GIT_OBJECT_DIRECTORY"
 #define INDEX_ENVIRONMENT "GIT_INDEX_FILE"
 #define GRAFT_ENVIRONMENT "GIT_GRAFT_FILE"
+#define GRAFT_INFO_ENVIRONMENT "GIT_GRAFT_INFO"
 
 extern char *get_git_dir(void);
 extern char *get_object_directory(void);
@@ -296,6 +303,8 @@ struct ref {
 	char name[FLEX_ARRAY]; /* more */
 };
 
+extern void send_graft_info(int);
+
 extern int git_connect(int fd[2], char *url, const char *prog);
 extern int finish_connect(pid_t pid);
 extern int path_match(const char *path, int nr, char **match);
diff --git a/clone-pack.c b/clone-pack.c
index f634431..c1708d5 100644
--- a/clone-pack.c
+++ b/clone-pack.c
@@ -1,15 +1,76 @@
 #include "cache.h"
 #include "refs.h"
 #include "pkt-line.h"
+#include "commit.h"
 
 static const char clone_pack_usage[] =
-"git-clone-pack [--exec=<git-upload-pack>] [<host>:]<directory> [<heads>]*";
+"git-clone-pack [--shallow=name] [--exec=<git-upload-pack>] [<host>:]<directory> [<heads>]*";
 static const char *exec = "git-upload-pack";
+static char *shallow = NULL;
+
+static void shallow_exchange(int fd[2], struct ref *ref)
+{
+	char line[1024];
+	char *graft_file;
+	FILE *fp;
+	int i, j;
+
+	while (ref) {
+		if (!strcmp(ref->name, shallow))
+			break;
+		ref = ref->next;
+	}
+	if (!ref)
+		die("No matching ref specified for shallow clone %s",
+		    shallow);
+	if (!server_supports("shallow"))
+		die("The other end does not support shallow clone");
+	packet_write(fd[1], "shallow\n");
+	packet_flush(fd[1]);
+
+	/* Read their graft */
+	prepare_commit_graft();
+	for (;;) {
+		int len;
+		len = packet_read_line(fd[0], line, sizeof(line));
+		if (!len)
+			break;
+		add_graft_info(line);
+	}
+	/* And cauterize at --shallow=<sha1> */
+	sprintf(line, "%s\n", sha1_to_hex(ref->old_sha1));
+	add_graft_info(line);
+
+	/* tell ours */
+	packet_write(fd[1], "custom\n");
+	send_graft_info(fd[1]);
+	packet_flush(fd[1]);
+
+	/* write out ours */
+	graft_file = get_graft_file();
+	fp = fopen(graft_file, "w");
+	if (!fp)
+		die("cannot update grafts!");
+
+	for (i = 0; i < commit_graft_nr; i++) {
+		struct commit_graft *g = commit_graft[i];
+		fputs(sha1_to_hex(g->sha1), fp);
+		for (j = 0; j < g->nr_parent; j++) {
+			fputc(' ', fp);
+			fputs(sha1_to_hex(g->parent[j]), fp);
+		}
+		fputc('\n', fp);
+	}
+	fclose(fp);
+}
 
 static void clone_handshake(int fd[2], struct ref *ref)
 {
 	unsigned char sha1[20];
 
+	if (shallow)
+		shallow_exchange(fd, ref);
+
 	while (ref) {
 		packet_write(fd[1], "want %s\n", sha1_to_hex(ref->old_sha1));
 		ref = ref->next;
@@ -160,6 +221,10 @@ int main(int argc, char **argv)
 				exec = arg + 7;
 				continue;
 			}
+			if (!strncmp("--shallow=", arg, 10)) {
+				shallow = arg + 10;
+				continue;
+			}
 			usage(clone_pack_usage);
 		}
 		dest = arg;
@@ -167,6 +232,8 @@ int main(int argc, char **argv)
 		nr_heads = argc - i - 1;
 		break;
 	}
+	if (shallow && !nr_heads)
+		die("shallow clone needs an explicit head name");
 	if (!dest)
 		usage(clone_pack_usage);
 	pid = git_connect(fd, dest, exec);
diff --git a/commit-tree.c b/commit-tree.c
index 4634b50..cbf2979 100644
--- a/commit-tree.c
+++ b/commit-tree.c
@@ -53,11 +53,6 @@ static void check_valid(unsigned char *s
 	free(buf);
 }
 
-/*
- * Having more than two parents is not strange at all, and this is
- * how multi-way merges are represented.
- */
-#define MAXPARENT (16)
 static unsigned char parent_sha1[MAXPARENT][20];
 
 static const char commit_tree_usage[] = "git-commit-tree <sha1> [-p <sha1>]* < changelog";
diff --git a/commit.c b/commit.c
index 97205bf..a862287 100644
--- a/commit.c
+++ b/commit.c
@@ -102,12 +102,8 @@ static unsigned long parse_commit_date(c
 	return date;
 }
 
-static struct commit_graft {
-	unsigned char sha1[20];
-	int nr_parent;
-	unsigned char parent[0][20]; /* more */
-} **commit_graft;
-static int commit_graft_alloc, commit_graft_nr;
+struct commit_graft **commit_graft;
+int commit_graft_alloc, commit_graft_nr;
 
 static int commit_graft_pos(const unsigned char *sha1)
 {
@@ -128,62 +124,104 @@ static int commit_graft_pos(const unsign
 	return -lo - 1;
 }
 
-static void prepare_commit_graft(void)
+int add_graft_info(char *buf)
 {
-	char *graft_file = get_graft_file();
-	FILE *fp = fopen(graft_file, "r");
+	/* The format is just "Commit Parent1 Parent2 ...\n" */
+	int len = strlen(buf);
+	int i;
+	struct commit_graft *graft = NULL;
+
+	if (buf[len-1] == '\n')
+		buf[--len] = 0;
+	if (buf[0] == '#')
+		return 0;
+	if ((len + 1) % 41) {
+	bad_graft_data:
+		error("bad graft data: %s", buf);
+		free(graft);
+		return -1;
+	}
+	i = (len + 1) / 41 - 1;
+	graft = xmalloc(sizeof(*graft) + 20 * i);
+	graft->nr_parent = i;
+	if (get_sha1_hex(buf, graft->sha1))
+		goto bad_graft_data;
+	for (i = 40; i < len; i += 41) {
+		if (buf[i] != ' ')
+			goto bad_graft_data;
+		if (get_sha1_hex(buf + i + 1, graft->parent[i/41]))
+			goto bad_graft_data;
+	}
+	i = commit_graft_pos(graft->sha1);
+	if (0 <= i) {
+		free(commit_graft[i]);
+		commit_graft[i] = graft;
+		return 0;
+	}
+	i = -i - 1;
+	if (commit_graft_alloc <= ++commit_graft_nr) {
+		commit_graft_alloc = alloc_nr(commit_graft_alloc);
+		commit_graft = xrealloc(commit_graft,
+					sizeof(*commit_graft) *
+					commit_graft_alloc);
+	}
+	if (i < commit_graft_nr)
+		memmove(commit_graft + i + 1,
+			commit_graft + i,
+			(commit_graft_nr - i - 1) *
+			sizeof(*commit_graft));
+	commit_graft[i] = graft;
+	return 0;
+}
+
+void clear_commit_graft(void)
+{
+	int i;
+	for (i = 0; i < commit_graft_nr; i++)
+		free(commit_graft[i]);
+	free(commit_graft);
+	commit_graft_nr = commit_graft_alloc = 0;
+	commit_graft = NULL;
+}
+
+void prepare_commit_graft(void)
+{
+	char *graft_file;
+	FILE *fp;
 	char buf[1024];
+
+	if (getenv(GRAFT_INFO_ENVIRONMENT)) {
+		char *cp, *ep;
+		for (cp = getenv(GRAFT_INFO_ENVIRONMENT);
+		     *cp;
+		     cp = ep) {
+			int more = 0;
+			ep = strchr(cp, '\n');
+			if (ep) {
+				more = 1;
+				*ep = '\0';
+			}
+			else {
+				ep = cp + strlen(cp);
+			}
+			if (ep != cp)
+				add_graft_info(cp);
+			if (!more)
+				break;
+			*ep = '\n';
+			ep++;
+		}
+		return;
+	}
+	graft_file = get_graft_file();
+	fp = fopen(graft_file, "r");
 	if (!fp) {
-		commit_graft = (struct commit_graft **) "hack";
+		commit_graft = (struct commit_graft **) xmalloc(1);
 		return;
 	}
-	while (fgets(buf, sizeof(buf), fp)) {
-		/* The format is just "Commit Parent1 Parent2 ...\n" */
-		int len = strlen(buf);
-		int i;
-		struct commit_graft *graft = NULL;
+	while (fgets(buf, sizeof(buf), fp))
+		add_graft_info(buf);
 
-		if (buf[len-1] == '\n')
-			buf[--len] = 0;
-		if (buf[0] == '#')
-			continue;
-		if ((len + 1) % 41) {
-		bad_graft_data:
-			error("bad graft data: %s", buf);
-			free(graft);
-			continue;
-		}
-		i = (len + 1) / 41 - 1;
-		graft = xmalloc(sizeof(*graft) + 20 * i);
-		graft->nr_parent = i;
-		if (get_sha1_hex(buf, graft->sha1))
-			goto bad_graft_data;
-		for (i = 40; i < len; i += 41) {
-			if (buf[i] != ' ')
-				goto bad_graft_data;
-			if (get_sha1_hex(buf + i + 1, graft->parent[i/41]))
-				goto bad_graft_data;
-		}
-		i = commit_graft_pos(graft->sha1);
-		if (0 <= i) {
-			error("duplicate graft data: %s", buf);
-			free(graft);
-			continue;
-		}
-		i = -i - 1;
-		if (commit_graft_alloc <= ++commit_graft_nr) {
-			commit_graft_alloc = alloc_nr(commit_graft_alloc);
-			commit_graft = xrealloc(commit_graft,
-						sizeof(*commit_graft) *
-						commit_graft_alloc);
-		}
-		if (i < commit_graft_nr)
-			memmove(commit_graft + i + 1,
-				commit_graft + i,
-				(commit_graft_nr - i - 1) *
-				sizeof(*commit_graft));
-		commit_graft[i] = graft;
-	}
 	fclose(fp);
 }
 
@@ -288,6 +326,30 @@ int parse_commit(struct commit *item)
 	return ret;
 }
 
+static void reparse_commit_parents(struct object *o)
+{
+	struct commit *c;
+	struct commit_list *parents;
+	if ((o->type != commit_type) || !o->parsed)
+		return;
+	c = (struct commit *)o;
+	parents = c->parents;
+	o->parsed = 0;
+	while (parents) {
+		struct commit_list *next = parents->next;
+		free(parents);
+		parents = next;
+	}
+	c->parents = NULL;
+	free(c->buffer);
+	c->buffer = NULL;
+}
+
+void reparse_all_parsed_commits(void)
+{
+	for_each_object(reparse_commit_parents);
+}
+
 struct commit_list *commit_list_insert(struct commit *item, struct commit_list **list_p)
 {
 	struct commit_list *new_list = xmalloc(sizeof(struct commit_list));
diff --git a/commit.h b/commit.h
index 986b22d..abc5b9e 100644
--- a/commit.h
+++ b/commit.h
@@ -17,6 +17,20 @@ struct commit {
 	char *buffer;
 };
 
+struct commit_graft {
+	unsigned char sha1[20];
+	int nr_parent;
+	unsigned char parent[0][20]; /* more */
+};
+
+extern struct commit_graft **commit_graft;
+extern int commit_graft_alloc, commit_graft_nr;
+
+extern void prepare_commit_graft(void);
+extern void clear_commit_graft(void);
+extern int add_graft_info(char *);
+extern void reparse_all_parsed_commits(void);
+
 extern int save_commit_buffer;
 extern const char *commit_type;
 
diff --git a/connect.c b/connect.c
index 3f2d65c..046d1da 100644
--- a/connect.c
+++ b/connect.c
@@ -3,6 +3,7 @@
 #include "pkt-line.h"
 #include "quote.h"
 #include "refs.h"
+#include "commit.h"
 #include <sys/wait.h>
 #include <sys/socket.h>
 #include <netinet/in.h>
@@ -298,6 +299,29 @@ int match_refs(struct ref *src, struct r
 	return 0;
 }
 
+void send_graft_info(int outfd)
+{
+	int i, j;
+	char packet_buf[41*MAXPARENT], *buf;
+
+	for (i = 0; i < commit_graft_nr; i++) {
+		struct commit_graft *g = commit_graft[i];
+		buf = packet_buf;
+		memcpy(buf, sha1_to_hex(g->sha1), 40);
+		buf += 40;
+		if (MAXPARENT <= g->nr_parent)
+			die("insanely big octopus graft with %d parents: %s",
+			    g->nr_parent, sha1_to_hex(g->sha1));
+		for (j = 0; j < g->nr_parent; j++) {
+			*buf++ = ' ';
+			memcpy(buf, sha1_to_hex(g->parent[j]), 40);
+			buf += 40;
+		}
+		*buf = 0;
+		packet_write(outfd, "%s\n", packet_buf);
+	}
+}
+
 enum protocol {
 	PROTO_LOCAL = 1,
 	PROTO_SSH,
diff --git a/object.c b/object.c
index 1577f74..bbcfcd8 100644
--- a/object.c
+++ b/object.c
@@ -252,3 +252,10 @@ int object_list_contains(struct object_l
 	}
 	return 0;
 }
+
+void for_each_object(void (*fn)(struct object *))
+{
+	int i;
+	for (i = 0; i < nr_objs; i++)
+		fn(objs[i]);
+}
diff --git a/object.h b/object.h
index 0e76182..b4c9729 100644
--- a/object.h
+++ b/object.h
@@ -55,4 +55,6 @@ unsigned object_list_length(struct objec
 
 int object_list_contains(struct object_list *list, struct object *obj);
 
+void for_each_object(void (*)(struct object *));
+
 #endif /* OBJECT_H */
diff --git a/upload-pack.c b/upload-pack.c
index d198055..90ea549 100644
--- a/upload-pack.c
+++ b/upload-pack.c
@@ -13,11 +13,16 @@ static const char upload_pack_usage[] = 
 #define WANTED (1U << 2)
 #define MAX_HAS 256
 #define MAX_NEEDS 256
-static int nr_has = 0, nr_needs = 0, multi_ack = 0, nr_our_refs = 0;
+#define MAX_PARENTS 20
+static int nr_has = 0, nr_needs = 0, nr_our_refs = 0;
 static unsigned char has_sha1[MAX_HAS][20];
 static unsigned char needs_sha1[MAX_NEEDS][20];
 static unsigned int timeout = 0;
 
+/* protocol extensions */
+static int multi_ack = 0;
+static int using_custom_graft = 0;
+
 static void reset_timeout(void)
 {
 	alarm(timeout);
@@ -163,6 +168,77 @@ static int get_common_commits(void)
 	}
 }
 
+static void exchange_grafts(void)
+{
+	int len;
+	char line[41*MAX_PARENTS];
+
+	/* We heard "shallow"; drop up to the next flush */
+	for (;;) {
+		len = packet_read_line(0, line, sizeof(line));
+		reset_timeout();
+		if (!len)
+			break;
+	}
+
+	/* Send our graft */
+	prepare_commit_graft();
+	send_graft_info(1);
+	packet_flush(1);
+
+	/* For precise common commits discovery, we need to use
+	 * the graft information we received from them.
+	 * But this is expensive, so the downloader first says
+	 * if it wants to use our graft as is.
+	 */
+	len = packet_read_line(0, line, sizeof(line));
+	reset_timeout();
+	if (!len)
+		; /* use ours as is */
+	else if (!strcmp(line, "custom\n")) {
+		using_custom_graft = 1;
+		clear_commit_graft();
+		for (;;) {
+			len = packet_read_line(0, line, sizeof(line));
+			reset_timeout();
+			if (!len)
+				break;
+			if (add_graft_info(line))
+				die("Bad graft line %s", line);
+		}
+		/* And using that, we prepare our end. */
+		reparse_all_parsed_commits();
+	}
+	else
+		die("expected 'custom', got '%s'", line);
+}
+
+static void setup_custom_graft(void)
+{
+	char *graft_env = strdup(GRAFT_INFO_ENVIRONMENT "=");
+	int envlen = strlen(graft_env);
+	int i, j;
+
+	for (i = 0; i < commit_graft_nr; i++) {
+		struct commit_graft *g = commit_graft[i];
+		char buf[41*MAX_PARENTS], *ptr;
+		ptr = buf;
+		memcpy(ptr, sha1_to_hex(g->sha1), 40);
+		ptr += 40;
+		for (j = 0; j < g->nr_parent; j++) {
+			*ptr++ = ' ';
+			memcpy(ptr, sha1_to_hex(g->parent[j]), 40);
+			ptr += 40;
+		}
+		*ptr++ = '\n';
+		*ptr = 0;
+		graft_env = xrealloc(graft_env, envlen + (ptr - buf));
+		memcpy(graft_env + envlen, buf, ptr - buf + 1);
+		envlen += ptr - buf;
+	}
+	putenv(graft_env);
+}
+
 static int receive_needs(void)
 {
 	static char line[1000];
@@ -180,16 +256,22 @@ static int receive_needs(void)
 		sha1_buf = dummy;
 		if (needs == MAX_NEEDS) {
 			fprintf(stderr,
-				"warning: supporting only a max of %d requests. "
+				"warning: supporting only a max of "
+				"%d requests. "
 				"sending everything instead.\n",
 				MAX_NEEDS);
 		}
 		else if (needs < MAX_NEEDS)
 			sha1_buf = needs_sha1[needs];
 
-		if (strncmp("want ", line, 5) || get_sha1_hex(line+5, sha1_buf))
+		if (!strcmp("shallow\n", line)) {
+			exchange_grafts();
+			continue;
+		}
+		if (strncmp("want ", line, 5) ||
+		    get_sha1_hex(line+5, sha1_buf))
 			die("git-upload-pack: protocol error, "
-			    "expected to get sha, not '%s'", line);
+			    "expected to get want-sha1, not '%s'", line);
 		if (strstr(line+45, "multi_ack"))
 			multi_ack = 1;
 
@@ -213,7 +295,7 @@ static int receive_needs(void)
 
 static int send_ref(const char *refname, const unsigned char *sha1)
 {
-	static char *capabilities = "multi_ack";
+	static char *capabilities = "multi_ack shallow";
 	struct object *o = parse_object(sha1);
 
 	if (capabilities)
@@ -243,6 +325,8 @@ static int upload_pack(void)
 	if (!nr_needs)
 		return 0;
 	get_common_commits();
+	if (using_custom_graft)
+		setup_custom_graft();
 	create_pack_file();
 	return 0;
 }
-- 
1.1.6.gefef

  reply	other threads:[~2006-01-31 11:02 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-30  7:18 [RFC] shallow clone Junio C Hamano
2006-01-30 11:39 ` Johannes Schindelin
2006-01-30 11:58   ` Simon Richter
2006-01-30 12:13     ` Johannes Schindelin
2006-01-30 13:25       ` Simon Richter
2006-01-30 19:25       ` Junio C Hamano
2006-01-31 11:28         ` Johannes Schindelin
2006-01-31 13:05           ` Simon Richter
2006-01-31 13:31             ` Johannes Schindelin
2006-01-31 14:23               ` Simon Richter
2006-01-30 19:25     ` Junio C Hamano
2006-01-31  8:37       ` Franck
2006-01-31  8:51         ` Junio C Hamano
2006-01-31 11:11           ` Franck
2006-01-30 18:46   ` Junio C Hamano
2006-01-31 11:02     ` Junio C Hamano [this message]
2006-01-31 13:58       ` [PATCH] Shallow clone: low level machinery Johannes Schindelin
2006-01-31 17:49         ` Junio C Hamano
2006-01-31 18:06           ` Johannes Schindelin
2006-01-31 18:22             ` Junio C Hamano
2006-02-01 14:33               ` Johannes Schindelin
2006-02-01 20:27                 ` Junio C Hamano
2006-02-02  0:48                   ` Johannes Schindelin
2006-02-02  1:17                     ` Junio C Hamano
2006-02-02 18:44                       ` Johannes Schindelin
2006-02-02 19:31                         ` Junio C Hamano
2006-01-31 14:20     ` [RFC] shallow clone Johannes Schindelin
2006-01-31 20:59     ` Junio C Hamano
2006-02-01 14:47       ` Johannes Schindelin
     [not found] ` <43DF1F1D.1060704@innova-card.com>
2006-01-31  9:00   ` Franck

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7vmzhc1wz6.fsf_-_@assigned-by-dhcp.cox.net \
    --to=junkio@cox.net \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=git@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).