git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 0/3] Parallel pull for SSH
@ 2005-08-02 23:45 barkalow
  2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:45 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

This series pipelines pulling by ssh; objects are requested as soon as 
possible, and read once as many hashes as possible have been stuffed in 
the queue. This seems to be a major improvement in throughput, although it 
doesn't do any packing, so the total data transferred is higher than it 
could be.

 1: Object library additions
 2: Parallel pull algorithm
 3: Pipelined SSH support

It replaces these commits from -pu:
 9d469633827c3ac9736ff5cca620db0491ecb200
 7e11bade2166d11e3ce0b8635eb9f61716cddcd6

	-Daniel
*This .sig left intentionally blank*

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] Object library enhancements
  2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
@ 2005-08-02 23:45 ` barkalow
  2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
  2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow
  2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:45 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

Add function to look up an object which is entirely unknown, so that
it can be put in a list. Various other functions related to lists of
objects.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---

 object.c |   55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 object.h |   10 ++++++++++
 tree.h   |    1 +
 3 files changed, 65 insertions(+), 1 deletions(-)

6f610ea3a0118fd7bcda89b75b538d3711418b72
diff --git a/object.c b/object.c
--- a/object.c
+++ b/object.c
@@ -99,7 +99,9 @@ void mark_reachable(struct object *obj, 
 
 struct object *lookup_object_type(const unsigned char *sha1, const char *type)
 {
-	if (!strcmp(type, blob_type)) {
+	if (!type) {
+		return lookup_unknown_object(sha1);
+	} else if (!strcmp(type, blob_type)) {
 		return &lookup_blob(sha1)->object;
 	} else if (!strcmp(type, tree_type)) {
 		return &lookup_tree(sha1)->object;
@@ -113,6 +115,27 @@ struct object *lookup_object_type(const 
 	}
 }
 
+union any_object {
+	struct object object;
+	struct commit commit;
+	struct tree tree;
+	struct blob blob;
+	struct tag tag;
+};
+
+struct object *lookup_unknown_object(const unsigned char *sha1)
+{
+	struct object *obj = lookup_object(sha1);
+	if (!obj) {
+		union any_object *ret = xmalloc(sizeof(*ret));
+		memset(ret, 0, sizeof(*ret));
+		created_object(sha1, &ret->object);
+		ret->object.type = NULL;
+		return &ret->object;
+	}
+	return obj;
+}
+
 struct object *parse_object(const unsigned char *sha1)
 {
 	unsigned long size;
@@ -150,3 +173,33 @@ struct object *parse_object(const unsign
 	}
 	return NULL;
 }
+
+struct object_list *object_list_insert(struct object *item,
+				       struct object_list **list_p)
+{
+	struct object_list *new_list = xmalloc(sizeof(struct object_list));
+        new_list->item = item;
+        new_list->next = *list_p;
+        *list_p = new_list;
+        return new_list;
+}
+
+unsigned object_list_length(struct object_list *list)
+{
+	unsigned ret = 0;
+	while (list) {
+		list = list->next;
+		ret++;
+	}
+	return ret;
+}
+
+int object_list_contains(struct object_list *list, struct object *obj)
+{
+	while (list) {
+		if (list->item == obj)
+			return 1;
+		list = list->next;
+	}
+	return 0;
+}
diff --git a/object.h b/object.h
--- a/object.h
+++ b/object.h
@@ -31,8 +31,18 @@ void created_object(const unsigned char 
 /** Returns the object, having parsed it to find out what it is. **/
 struct object *parse_object(const unsigned char *sha1);
 
+/** Returns the object, with potentially excess memory allocated. **/
+struct object *lookup_unknown_object(const unsigned  char *sha1);
+
 void add_ref(struct object *refer, struct object *target);
 
 void mark_reachable(struct object *obj, unsigned int mask);
 
+struct object_list *object_list_insert(struct object *item, 
+				       struct object_list **list_p);
+
+unsigned object_list_length(struct object_list *list);
+
+int object_list_contains(struct object_list *list, struct object *obj);
+
 #endif /* OBJECT_H */
diff --git a/tree.h b/tree.h
--- a/tree.h
+++ b/tree.h
@@ -14,6 +14,7 @@ struct tree_entry_list {
 	unsigned int mode;
 	char *name;
 	union {
+		struct object *any;
 		struct tree *tree;
 		struct blob *blob;
 	} item;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 2/3] Parallelize the pull algorithm
  2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
  2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
@ 2005-08-02 23:46 ` barkalow
  2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow
  2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:46 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

This processes objects in two simultaneous passes. Each object will
first be given to prefetch(), as soon as it is possible to tell that
it will be needed, and then will be given to fetch(), when it is the
next object that needs to be parsed. Unless an implementation does
something with prefetch(), this should have no effect.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---

 http-pull.c  |    4 ++
 local-pull.c |    4 ++
 pull.c       |  132 ++++++++++++++++++++++++++++++++++------------------------
 pull.h       |    7 +++
 ssh-pull.c   |    4 ++
 5 files changed, 97 insertions(+), 54 deletions(-)

adf8aa039f2db3fb3bfc4dee0a7354b662cd5422
diff --git a/http-pull.c b/http-pull.c
--- a/http-pull.c
+++ b/http-pull.c
@@ -67,6 +67,10 @@ static size_t fwrite_sha1_file(void *ptr
 	return size;
 }
 
+void prefetch(unsigned char *sha1)
+{
+}
+
 static int got_indices = 0;
 
 static struct packed_git *packs = NULL;
diff --git a/local-pull.c b/local-pull.c
--- a/local-pull.c
+++ b/local-pull.c
@@ -11,6 +11,10 @@ static int use_filecopy = 1;
 
 static char *path; /* "Remote" git repository */
 
+void prefetch(unsigned char *sha1)
+{
+}
+
 int fetch(unsigned char *sha1)
 {
 	static int object_name_start = -1;
diff --git a/pull.c b/pull.c
--- a/pull.c
+++ b/pull.c
@@ -17,11 +17,8 @@ int get_all = 0;
 int get_verbosely = 0;
 static unsigned char current_commit_sha1[20];
 
-static const char commitS[] = "commit";
-static const char treeS[] = "tree";
-static const char blobS[] = "blob";
-
-void pull_say(const char *fmt, const char *hex) {
+void pull_say(const char *fmt, const char *hex) 
+{
 	if (get_verbosely)
 		fprintf(stderr, fmt, hex);
 }
@@ -48,93 +45,118 @@ static int make_sure_we_have_it(const ch
 	return status;
 }
 
-static int process_unknown(unsigned char *sha1);
+static int process(unsigned char *sha1, const char *type);
 
-static int process_tree(unsigned char *sha1)
+static int process_tree(struct tree *tree)
 {
-	struct tree *tree = lookup_tree(sha1);
 	struct tree_entry_list *entries;
 
 	if (parse_tree(tree))
 		return -1;
 
 	for (entries = tree->entries; entries; entries = entries->next) {
-		const char *what = entries->directory ? treeS : blobS;
-		if (make_sure_we_have_it(what, entries->item.tree->object.sha1))
+		if (process(entries->item.any->sha1,
+			    entries->directory ? tree_type : blob_type))
 			return -1;
-		if (entries->directory) {
-			if (process_tree(entries->item.tree->object.sha1))
-				return -1;
-		}
 	}
 	return 0;
 }
 
-static int process_commit(unsigned char *sha1)
+static int process_commit(struct commit *commit)
 {
-	struct commit *obj = lookup_commit(sha1);
-
-	if (make_sure_we_have_it(commitS, sha1))
+	if (parse_commit(commit))
 		return -1;
 
-	if (parse_commit(obj))
-		return -1;
+	memcpy(current_commit_sha1, commit->object.sha1, 20);
 
 	if (get_tree) {
-		if (make_sure_we_have_it(treeS, obj->tree->object.sha1))
-			return -1;
-		if (process_tree(obj->tree->object.sha1))
+		if (process(commit->tree->object.sha1, tree_type))
 			return -1;
 		if (!get_all)
 			get_tree = 0;
 	}
 	if (get_history) {
-		struct commit_list *parents = obj->parents;
+		struct commit_list *parents = commit->parents;
 		for (; parents; parents = parents->next) {
 			if (has_sha1_file(parents->item->object.sha1))
 				continue;
-			if (make_sure_we_have_it(NULL,
-						 parents->item->object.sha1)) {
-				/* The server might not have it, and
-				 * we don't mind. 
-				 */
-				continue;
-			}
-			if (process_commit(parents->item->object.sha1))
+			if (process(parents->item->object.sha1,
+				    commit_type))
 				return -1;
-			memcpy(current_commit_sha1, sha1, 20);
 		}
 	}
 	return 0;
 }
 
-static int process_tag(unsigned char *sha1)
+static int process_tag(struct tag *tag)
 {
-	struct tag *obj = lookup_tag(sha1);
-
-	if (parse_tag(obj))
+	if (parse_tag(tag))
 		return -1;
-	return process_unknown(obj->tagged->sha1);
+	return process(tag->tagged->sha1, NULL);
 }
 
-static int process_unknown(unsigned char *sha1)
+static struct object_list *process_queue = NULL;
+static struct object_list **process_queue_end = &process_queue;
+
+static int process(unsigned char *sha1, const char *type)
 {
 	struct object *obj;
-	if (make_sure_we_have_it("object", sha1))
-		return -1;
-	obj = parse_object(sha1);
-	if (!obj)
-		return error("Unable to parse object %s", sha1_to_hex(sha1));
-	if (obj->type == commit_type)
-		return process_commit(sha1);
-	if (obj->type == tree_type)
-		return process_tree(sha1);
-	if (obj->type == blob_type)
+	if (has_sha1_file(sha1))
 		return 0;
-	if (obj->type == tag_type)
-		return process_tag(sha1);
-	return error("Unable to determine requirement of type %s for %s",
-		     obj->type, sha1_to_hex(sha1));
+	obj = lookup_object_type(sha1, type);
+	if (object_list_contains(process_queue, obj))
+		return 0;
+	object_list_insert(obj, process_queue_end);
+	process_queue_end = &(*process_queue_end)->next;
+
+	//fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
+	prefetch(sha1);
+		
+	return 0;
+}
+
+static int loop(void)
+{
+	while (process_queue) {
+		struct object *obj = process_queue->item;
+		/*
+		fprintf(stderr, "%d objects to pull\n", 
+			object_list_length(process_queue));
+		*/
+		process_queue = process_queue->next;
+		if (!process_queue)
+			process_queue_end = &process_queue;
+
+		//fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));
+		
+		if (make_sure_we_have_it(obj->type ?: "object", 
+					 obj->sha1))
+			return -1;
+		if (!obj->type)
+			parse_object(obj->sha1);
+		if (obj->type == commit_type) {
+			if (process_commit((struct commit *)obj))
+				return -1;
+			continue;
+		}
+		if (obj->type == tree_type) {
+			if (process_tree((struct tree *)obj))
+				return -1;
+			continue;
+		}
+		if (obj->type == blob_type) {
+			continue;
+		}
+		if (obj->type == tag_type) {
+			if (process_tag((struct tag *)obj))
+				return -1;
+			continue;
+		}
+		return error("Unable to determine requirements "
+			     "of type %s for %s",
+			     obj->type, sha1_to_hex(obj->sha1));
+	}
+	return 0;
 }
 
 static int interpret_target(char *target, unsigned char *sha1)
@@ -164,7 +186,9 @@ int pull(char *target)
 	if (interpret_target(target, sha1))
 		return error("Could not interpret %s as something to pull",
 			     target);
-	if (process_unknown(sha1))
+	if (process(sha1, NULL))
+		return -1;
+	if (loop())
 		return -1;
 	
 	if (write_ref) {
diff --git a/pull.h b/pull.h
--- a/pull.h
+++ b/pull.h
@@ -9,6 +9,13 @@
 extern int fetch(unsigned char *sha1);
 
 /*
+ * Fetch the specified object and store it locally; fetch() will be
+ * called later to determine success. To be provided by the particular
+ * implementation.
+ */
+extern void prefetch(unsigned char *sha1);
+
+/*
  * Fetch ref (relative to $GIT_DIR/refs) from the remote, and store
  * the 20-byte SHA1 in sha1.  Return 0 on success, -1 on failure.  To
  * be provided by the particular implementation.
diff --git a/ssh-pull.c b/ssh-pull.c
--- a/ssh-pull.c
+++ b/ssh-pull.c
@@ -10,6 +10,10 @@ static int fd_out;
 static unsigned char remote_version = 0;
 static unsigned char local_version = 1;
 
+void prefetch(unsigned char *sha1)
+{
+}
+
 int fetch(unsigned char *sha1)
 {
 	int ret;

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 3/3] Parallelize pulling by ssh
  2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
  2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
  2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
@ 2005-08-02 23:46 ` barkalow
  2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:46 UTC (permalink / raw
  To: Junio C Hamano; +Cc: git

This causes ssh-pull to request objects in prefetch() and read then in
fetch(), such that it reduces the unpipelined round-trip time.

This also makes sha1_write_from_fd() support having a buffer of data
which it accidentally read from the fd after the object; this was
formerly not a problem, because it would always get a short read at
the end of an object, because the next object had not been
requested. This is no longer true.

Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---

 cache.h     |    3 ++-
 sha1_file.c |   37 ++++++++++++++++++++++---------------
 ssh-pull.c  |   44 ++++++++++++++++++++++++++++++++++++--------
 3 files changed, 60 insertions(+), 24 deletions(-)

9bd15230cb65acc78a97550c9467f98a04720ee8
diff --git a/cache.h b/cache.h
--- a/cache.h
+++ b/cache.h
@@ -198,7 +198,8 @@ extern int check_sha1_signature(const un
 /* Read a tree into the cache */
 extern int read_tree(void *buffer, unsigned long size, int stage, const char **paths);
 
-extern int write_sha1_from_fd(const unsigned char *sha1, int fd);
+extern int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
+			      size_t bufsize, size_t *bufposn);
 extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
 
 extern int has_sha1_pack(const unsigned char *sha1);
diff --git a/sha1_file.c b/sha1_file.c
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1389,14 +1389,14 @@ int write_sha1_to_fd(int fd, const unsig
 	return 0;
 }
 
-int write_sha1_from_fd(const unsigned char *sha1, int fd)
+int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
+		       size_t bufsize, size_t *bufposn)
 {
 	char *filename = sha1_file_name(sha1);
 
 	int local;
 	z_stream stream;
 	unsigned char real_sha1[20];
-	unsigned char buf[4096];
 	unsigned char discard[4096];
 	int ret;
 	SHA_CTX c;
@@ -1414,7 +1414,24 @@ int write_sha1_from_fd(const unsigned ch
 
 	do {
 		ssize_t size;
-		size = read(fd, buf, 4096);
+		if (*bufposn) {
+			stream.avail_in = *bufposn;
+			stream.next_in = buffer;
+			do {
+				stream.next_out = discard;
+				stream.avail_out = sizeof(discard);
+				ret = inflate(&stream, Z_SYNC_FLUSH);
+				SHA1_Update(&c, discard, sizeof(discard) -
+					    stream.avail_out);
+			} while (stream.avail_in && ret == Z_OK);
+			write(local, buffer, *bufposn - stream.avail_in);
+			memmove(buffer, buffer + *bufposn - stream.avail_in,
+				stream.avail_in);
+			*bufposn = stream.avail_in;
+			if (ret != Z_OK)
+				break;
+		}
+		size = read(fd, buffer + *bufposn, bufsize - *bufposn);
 		if (size <= 0) {
 			close(local);
 			unlink(filename);
@@ -1423,18 +1440,8 @@ int write_sha1_from_fd(const unsigned ch
 			perror("Reading from connection");
 			return -1;
 		}
-		write(local, buf, size);
-		stream.avail_in = size;
-		stream.next_in = buf;
-		do {
-			stream.next_out = discard;
-			stream.avail_out = sizeof(discard);
-			ret = inflate(&stream, Z_SYNC_FLUSH);
-			SHA1_Update(&c, discard, sizeof(discard) -
-				    stream.avail_out);
-		} while (stream.avail_in && ret == Z_OK);
-		
-	} while (ret == Z_OK);
+		*bufposn += size;
+	} while (1);
 	inflateEnd(&stream);
 
 	close(local);
diff --git a/ssh-pull.c b/ssh-pull.c
--- a/ssh-pull.c
+++ b/ssh-pull.c
@@ -10,24 +10,49 @@ static int fd_out;
 static unsigned char remote_version = 0;
 static unsigned char local_version = 1;
 
+ssize_t force_write(int fd, void *buffer, size_t length)
+{
+	ssize_t ret = 0;
+	while (ret < length) {
+		ssize_t size = write(fd, buffer + ret, length - ret);
+		if (size < 0) {
+			return size;
+		}
+		if (size == 0) {
+			return ret;
+		}
+		ret += size;
+	}
+	return ret;
+}
+
 void prefetch(unsigned char *sha1)
 {
+	char type = 'o';
+	force_write(fd_out, &type, 1);
+	force_write(fd_out, sha1, 20);
+	//memcpy(requested + 20 * prefetches++, sha1, 20);
 }
 
+static char conn_buf[4096];
+static size_t conn_buf_posn = 0;
+
 int fetch(unsigned char *sha1)
 {
 	int ret;
 	signed char remote;
-	char type = 'o';
-	if (has_sha1_file(sha1))
-		return 0;
-	write(fd_out, &type, 1);
-	write(fd_out, sha1, 20);
-	if (read(fd_in, &remote, 1) < 1)
-		return -1;
+
+	if (conn_buf_posn) {
+		remote = conn_buf[0];
+		memmove(conn_buf, conn_buf + 1, --conn_buf_posn);
+	} else {
+		if (read(fd_in, &remote, 1) < 1)
+			return -1;
+	}
+	//fprintf(stderr, "Got %d\n", remote);
 	if (remote < 0)
 		return remote;
-	ret = write_sha1_from_fd(sha1, fd_in);
+	ret = write_sha1_from_fd(sha1, fd_in, conn_buf, 4096, &conn_buf_posn);
 	if (!ret)
 		pull_say("got %s\n", sha1_to_hex(sha1));
 	return ret;

^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2005-08-02 23:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).