* [PATCH 0/3] Parallel pull for SSH
@ 2005-08-02 23:45 barkalow
2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:45 UTC (permalink / raw
To: Junio C Hamano; +Cc: git
This series pipelines pulling by ssh; objects are requested as soon as
possible, and read once as many hashes as possible have been stuffed in
the queue. This seems to be a major improvement in throughput, although it
doesn't do any packing, so the total data transferred is higher than it
could be.
1: Object library additions
2: Parallel pull algorithm
3: Pipelined SSH support
It replaces these commits from -pu:
9d469633827c3ac9736ff5cca620db0491ecb200
7e11bade2166d11e3ce0b8635eb9f61716cddcd6
-Daniel
*This .sig left intentionally blank*
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 1/3] Object library enhancements
2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
@ 2005-08-02 23:45 ` barkalow
2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow
2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:45 UTC (permalink / raw
To: Junio C Hamano; +Cc: git
Add function to look up an object which is entirely unknown, so that
it can be put in a list. Various other functions related to lists of
objects.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
object.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
object.h | 10 ++++++++++
tree.h | 1 +
3 files changed, 65 insertions(+), 1 deletions(-)
6f610ea3a0118fd7bcda89b75b538d3711418b72
diff --git a/object.c b/object.c
--- a/object.c
+++ b/object.c
@@ -99,7 +99,9 @@ void mark_reachable(struct object *obj,
struct object *lookup_object_type(const unsigned char *sha1, const char *type)
{
- if (!strcmp(type, blob_type)) {
+ if (!type) {
+ return lookup_unknown_object(sha1);
+ } else if (!strcmp(type, blob_type)) {
return &lookup_blob(sha1)->object;
} else if (!strcmp(type, tree_type)) {
return &lookup_tree(sha1)->object;
@@ -113,6 +115,27 @@ struct object *lookup_object_type(const
}
}
+union any_object {
+ struct object object;
+ struct commit commit;
+ struct tree tree;
+ struct blob blob;
+ struct tag tag;
+};
+
+struct object *lookup_unknown_object(const unsigned char *sha1)
+{
+ struct object *obj = lookup_object(sha1);
+ if (!obj) {
+ union any_object *ret = xmalloc(sizeof(*ret));
+ memset(ret, 0, sizeof(*ret));
+ created_object(sha1, &ret->object);
+ ret->object.type = NULL;
+ return &ret->object;
+ }
+ return obj;
+}
+
struct object *parse_object(const unsigned char *sha1)
{
unsigned long size;
@@ -150,3 +173,33 @@ struct object *parse_object(const unsign
}
return NULL;
}
+
+struct object_list *object_list_insert(struct object *item,
+ struct object_list **list_p)
+{
+ struct object_list *new_list = xmalloc(sizeof(struct object_list));
+ new_list->item = item;
+ new_list->next = *list_p;
+ *list_p = new_list;
+ return new_list;
+}
+
+unsigned object_list_length(struct object_list *list)
+{
+ unsigned ret = 0;
+ while (list) {
+ list = list->next;
+ ret++;
+ }
+ return ret;
+}
+
+int object_list_contains(struct object_list *list, struct object *obj)
+{
+ while (list) {
+ if (list->item == obj)
+ return 1;
+ list = list->next;
+ }
+ return 0;
+}
diff --git a/object.h b/object.h
--- a/object.h
+++ b/object.h
@@ -31,8 +31,18 @@ void created_object(const unsigned char
/** Returns the object, having parsed it to find out what it is. **/
struct object *parse_object(const unsigned char *sha1);
+/** Returns the object, with potentially excess memory allocated. **/
+struct object *lookup_unknown_object(const unsigned char *sha1);
+
void add_ref(struct object *refer, struct object *target);
void mark_reachable(struct object *obj, unsigned int mask);
+struct object_list *object_list_insert(struct object *item,
+ struct object_list **list_p);
+
+unsigned object_list_length(struct object_list *list);
+
+int object_list_contains(struct object_list *list, struct object *obj);
+
#endif /* OBJECT_H */
diff --git a/tree.h b/tree.h
--- a/tree.h
+++ b/tree.h
@@ -14,6 +14,7 @@ struct tree_entry_list {
unsigned int mode;
char *name;
union {
+ struct object *any;
struct tree *tree;
struct blob *blob;
} item;
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 2/3] Parallelize the pull algorithm
2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
@ 2005-08-02 23:46 ` barkalow
2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow
2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:46 UTC (permalink / raw
To: Junio C Hamano; +Cc: git
This processes objects in two simultaneous passes. Each object will
first be given to prefetch(), as soon as it is possible to tell that
it will be needed, and then will be given to fetch(), when it is the
next object that needs to be parsed. Unless an implementation does
something with prefetch(), this should have no effect.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
http-pull.c | 4 ++
local-pull.c | 4 ++
pull.c | 132 ++++++++++++++++++++++++++++++++++------------------------
pull.h | 7 +++
ssh-pull.c | 4 ++
5 files changed, 97 insertions(+), 54 deletions(-)
adf8aa039f2db3fb3bfc4dee0a7354b662cd5422
diff --git a/http-pull.c b/http-pull.c
--- a/http-pull.c
+++ b/http-pull.c
@@ -67,6 +67,10 @@ static size_t fwrite_sha1_file(void *ptr
return size;
}
+void prefetch(unsigned char *sha1)
+{
+}
+
static int got_indices = 0;
static struct packed_git *packs = NULL;
diff --git a/local-pull.c b/local-pull.c
--- a/local-pull.c
+++ b/local-pull.c
@@ -11,6 +11,10 @@ static int use_filecopy = 1;
static char *path; /* "Remote" git repository */
+void prefetch(unsigned char *sha1)
+{
+}
+
int fetch(unsigned char *sha1)
{
static int object_name_start = -1;
diff --git a/pull.c b/pull.c
--- a/pull.c
+++ b/pull.c
@@ -17,11 +17,8 @@ int get_all = 0;
int get_verbosely = 0;
static unsigned char current_commit_sha1[20];
-static const char commitS[] = "commit";
-static const char treeS[] = "tree";
-static const char blobS[] = "blob";
-
-void pull_say(const char *fmt, const char *hex) {
+void pull_say(const char *fmt, const char *hex)
+{
if (get_verbosely)
fprintf(stderr, fmt, hex);
}
@@ -48,93 +45,118 @@ static int make_sure_we_have_it(const ch
return status;
}
-static int process_unknown(unsigned char *sha1);
+static int process(unsigned char *sha1, const char *type);
-static int process_tree(unsigned char *sha1)
+static int process_tree(struct tree *tree)
{
- struct tree *tree = lookup_tree(sha1);
struct tree_entry_list *entries;
if (parse_tree(tree))
return -1;
for (entries = tree->entries; entries; entries = entries->next) {
- const char *what = entries->directory ? treeS : blobS;
- if (make_sure_we_have_it(what, entries->item.tree->object.sha1))
+ if (process(entries->item.any->sha1,
+ entries->directory ? tree_type : blob_type))
return -1;
- if (entries->directory) {
- if (process_tree(entries->item.tree->object.sha1))
- return -1;
- }
}
return 0;
}
-static int process_commit(unsigned char *sha1)
+static int process_commit(struct commit *commit)
{
- struct commit *obj = lookup_commit(sha1);
-
- if (make_sure_we_have_it(commitS, sha1))
+ if (parse_commit(commit))
return -1;
- if (parse_commit(obj))
- return -1;
+ memcpy(current_commit_sha1, commit->object.sha1, 20);
if (get_tree) {
- if (make_sure_we_have_it(treeS, obj->tree->object.sha1))
- return -1;
- if (process_tree(obj->tree->object.sha1))
+ if (process(commit->tree->object.sha1, tree_type))
return -1;
if (!get_all)
get_tree = 0;
}
if (get_history) {
- struct commit_list *parents = obj->parents;
+ struct commit_list *parents = commit->parents;
for (; parents; parents = parents->next) {
if (has_sha1_file(parents->item->object.sha1))
continue;
- if (make_sure_we_have_it(NULL,
- parents->item->object.sha1)) {
- /* The server might not have it, and
- * we don't mind.
- */
- continue;
- }
- if (process_commit(parents->item->object.sha1))
+ if (process(parents->item->object.sha1,
+ commit_type))
return -1;
- memcpy(current_commit_sha1, sha1, 20);
}
}
return 0;
}
-static int process_tag(unsigned char *sha1)
+static int process_tag(struct tag *tag)
{
- struct tag *obj = lookup_tag(sha1);
-
- if (parse_tag(obj))
+ if (parse_tag(tag))
return -1;
- return process_unknown(obj->tagged->sha1);
+ return process(tag->tagged->sha1, NULL);
}
-static int process_unknown(unsigned char *sha1)
+static struct object_list *process_queue = NULL;
+static struct object_list **process_queue_end = &process_queue;
+
+static int process(unsigned char *sha1, const char *type)
{
struct object *obj;
- if (make_sure_we_have_it("object", sha1))
- return -1;
- obj = parse_object(sha1);
- if (!obj)
- return error("Unable to parse object %s", sha1_to_hex(sha1));
- if (obj->type == commit_type)
- return process_commit(sha1);
- if (obj->type == tree_type)
- return process_tree(sha1);
- if (obj->type == blob_type)
+ if (has_sha1_file(sha1))
return 0;
- if (obj->type == tag_type)
- return process_tag(sha1);
- return error("Unable to determine requirement of type %s for %s",
- obj->type, sha1_to_hex(sha1));
+ obj = lookup_object_type(sha1, type);
+ if (object_list_contains(process_queue, obj))
+ return 0;
+ object_list_insert(obj, process_queue_end);
+ process_queue_end = &(*process_queue_end)->next;
+
+ //fprintf(stderr, "prefetch %s\n", sha1_to_hex(sha1));
+ prefetch(sha1);
+
+ return 0;
+}
+
+static int loop(void)
+{
+ while (process_queue) {
+ struct object *obj = process_queue->item;
+ /*
+ fprintf(stderr, "%d objects to pull\n",
+ object_list_length(process_queue));
+ */
+ process_queue = process_queue->next;
+ if (!process_queue)
+ process_queue_end = &process_queue;
+
+ //fprintf(stderr, "fetch %s\n", sha1_to_hex(obj->sha1));
+
+ if (make_sure_we_have_it(obj->type ?: "object",
+ obj->sha1))
+ return -1;
+ if (!obj->type)
+ parse_object(obj->sha1);
+ if (obj->type == commit_type) {
+ if (process_commit((struct commit *)obj))
+ return -1;
+ continue;
+ }
+ if (obj->type == tree_type) {
+ if (process_tree((struct tree *)obj))
+ return -1;
+ continue;
+ }
+ if (obj->type == blob_type) {
+ continue;
+ }
+ if (obj->type == tag_type) {
+ if (process_tag((struct tag *)obj))
+ return -1;
+ continue;
+ }
+ return error("Unable to determine requirements "
+ "of type %s for %s",
+ obj->type, sha1_to_hex(obj->sha1));
+ }
+ return 0;
}
static int interpret_target(char *target, unsigned char *sha1)
@@ -164,7 +186,9 @@ int pull(char *target)
if (interpret_target(target, sha1))
return error("Could not interpret %s as something to pull",
target);
- if (process_unknown(sha1))
+ if (process(sha1, NULL))
+ return -1;
+ if (loop())
return -1;
if (write_ref) {
diff --git a/pull.h b/pull.h
--- a/pull.h
+++ b/pull.h
@@ -9,6 +9,13 @@
extern int fetch(unsigned char *sha1);
/*
+ * Fetch the specified object and store it locally; fetch() will be
+ * called later to determine success. To be provided by the particular
+ * implementation.
+ */
+extern void prefetch(unsigned char *sha1);
+
+/*
* Fetch ref (relative to $GIT_DIR/refs) from the remote, and store
* the 20-byte SHA1 in sha1. Return 0 on success, -1 on failure. To
* be provided by the particular implementation.
diff --git a/ssh-pull.c b/ssh-pull.c
--- a/ssh-pull.c
+++ b/ssh-pull.c
@@ -10,6 +10,10 @@ static int fd_out;
static unsigned char remote_version = 0;
static unsigned char local_version = 1;
+void prefetch(unsigned char *sha1)
+{
+}
+
int fetch(unsigned char *sha1)
{
int ret;
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH 3/3] Parallelize pulling by ssh
2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
@ 2005-08-02 23:46 ` barkalow
2 siblings, 0 replies; 4+ messages in thread
From: barkalow @ 2005-08-02 23:46 UTC (permalink / raw
To: Junio C Hamano; +Cc: git
This causes ssh-pull to request objects in prefetch() and read then in
fetch(), such that it reduces the unpipelined round-trip time.
This also makes sha1_write_from_fd() support having a buffer of data
which it accidentally read from the fd after the object; this was
formerly not a problem, because it would always get a short read at
the end of an object, because the next object had not been
requested. This is no longer true.
Signed-off-by: Daniel Barkalow <barkalow@iabervon.org>
---
cache.h | 3 ++-
sha1_file.c | 37 ++++++++++++++++++++++---------------
ssh-pull.c | 44 ++++++++++++++++++++++++++++++++++++--------
3 files changed, 60 insertions(+), 24 deletions(-)
9bd15230cb65acc78a97550c9467f98a04720ee8
diff --git a/cache.h b/cache.h
--- a/cache.h
+++ b/cache.h
@@ -198,7 +198,8 @@ extern int check_sha1_signature(const un
/* Read a tree into the cache */
extern int read_tree(void *buffer, unsigned long size, int stage, const char **paths);
-extern int write_sha1_from_fd(const unsigned char *sha1, int fd);
+extern int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
+ size_t bufsize, size_t *bufposn);
extern int write_sha1_to_fd(int fd, const unsigned char *sha1);
extern int has_sha1_pack(const unsigned char *sha1);
diff --git a/sha1_file.c b/sha1_file.c
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1389,14 +1389,14 @@ int write_sha1_to_fd(int fd, const unsig
return 0;
}
-int write_sha1_from_fd(const unsigned char *sha1, int fd)
+int write_sha1_from_fd(const unsigned char *sha1, int fd, char *buffer,
+ size_t bufsize, size_t *bufposn)
{
char *filename = sha1_file_name(sha1);
int local;
z_stream stream;
unsigned char real_sha1[20];
- unsigned char buf[4096];
unsigned char discard[4096];
int ret;
SHA_CTX c;
@@ -1414,7 +1414,24 @@ int write_sha1_from_fd(const unsigned ch
do {
ssize_t size;
- size = read(fd, buf, 4096);
+ if (*bufposn) {
+ stream.avail_in = *bufposn;
+ stream.next_in = buffer;
+ do {
+ stream.next_out = discard;
+ stream.avail_out = sizeof(discard);
+ ret = inflate(&stream, Z_SYNC_FLUSH);
+ SHA1_Update(&c, discard, sizeof(discard) -
+ stream.avail_out);
+ } while (stream.avail_in && ret == Z_OK);
+ write(local, buffer, *bufposn - stream.avail_in);
+ memmove(buffer, buffer + *bufposn - stream.avail_in,
+ stream.avail_in);
+ *bufposn = stream.avail_in;
+ if (ret != Z_OK)
+ break;
+ }
+ size = read(fd, buffer + *bufposn, bufsize - *bufposn);
if (size <= 0) {
close(local);
unlink(filename);
@@ -1423,18 +1440,8 @@ int write_sha1_from_fd(const unsigned ch
perror("Reading from connection");
return -1;
}
- write(local, buf, size);
- stream.avail_in = size;
- stream.next_in = buf;
- do {
- stream.next_out = discard;
- stream.avail_out = sizeof(discard);
- ret = inflate(&stream, Z_SYNC_FLUSH);
- SHA1_Update(&c, discard, sizeof(discard) -
- stream.avail_out);
- } while (stream.avail_in && ret == Z_OK);
-
- } while (ret == Z_OK);
+ *bufposn += size;
+ } while (1);
inflateEnd(&stream);
close(local);
diff --git a/ssh-pull.c b/ssh-pull.c
--- a/ssh-pull.c
+++ b/ssh-pull.c
@@ -10,24 +10,49 @@ static int fd_out;
static unsigned char remote_version = 0;
static unsigned char local_version = 1;
+ssize_t force_write(int fd, void *buffer, size_t length)
+{
+ ssize_t ret = 0;
+ while (ret < length) {
+ ssize_t size = write(fd, buffer + ret, length - ret);
+ if (size < 0) {
+ return size;
+ }
+ if (size == 0) {
+ return ret;
+ }
+ ret += size;
+ }
+ return ret;
+}
+
void prefetch(unsigned char *sha1)
{
+ char type = 'o';
+ force_write(fd_out, &type, 1);
+ force_write(fd_out, sha1, 20);
+ //memcpy(requested + 20 * prefetches++, sha1, 20);
}
+static char conn_buf[4096];
+static size_t conn_buf_posn = 0;
+
int fetch(unsigned char *sha1)
{
int ret;
signed char remote;
- char type = 'o';
- if (has_sha1_file(sha1))
- return 0;
- write(fd_out, &type, 1);
- write(fd_out, sha1, 20);
- if (read(fd_in, &remote, 1) < 1)
- return -1;
+
+ if (conn_buf_posn) {
+ remote = conn_buf[0];
+ memmove(conn_buf, conn_buf + 1, --conn_buf_posn);
+ } else {
+ if (read(fd_in, &remote, 1) < 1)
+ return -1;
+ }
+ //fprintf(stderr, "Got %d\n", remote);
if (remote < 0)
return remote;
- ret = write_sha1_from_fd(sha1, fd_in);
+ ret = write_sha1_from_fd(sha1, fd_in, conn_buf, 4096, &conn_buf_posn);
if (!ret)
pull_say("got %s\n", sha1_to_hex(sha1));
return ret;
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2005-08-02 23:43 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-08-02 23:45 [PATCH 0/3] Parallel pull for SSH barkalow
2005-08-02 23:45 ` [PATCH 1/3] Object library enhancements barkalow
2005-08-02 23:46 ` [PATCH 2/3] Parallelize the pull algorithm barkalow
2005-08-02 23:46 ` [PATCH 3/3] Parallelize pulling by ssh barkalow
Code repositories for project(s) associated with this public inbox
https://80x24.org/mirrors/git.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).