git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [RFC 0/4]
@ 2012-06-04 17:20 Florian Achleitner
  2012-06-04 17:20 ` [RFC 1/4] Implement a basic remote helper vor svn in C Florian Achleitner
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
  0 siblings, 2 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-04 17:20 UTC (permalink / raw)
  To: git

Series of patches creating a very basic remote helper in C.

[RFC 1/4] Implement a basic remote helper vor svn in C.
[RFC 2/4] Integrate remote-svn into svn-fe/Makefile.
[RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary
[RFC 4/4] Add cat-blob report pipe from fast-import to

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [RFC 1/4] Implement a basic remote helper vor svn in C.
  2012-06-04 17:20 [RFC 0/4] Florian Achleitner
@ 2012-06-04 17:20 ` Florian Achleitner
  2012-06-04 17:20   ` [RFC 2/4] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
  1 sibling, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-06-04 17:20 UTC (permalink / raw)
  To: git; +Cc: Florian Achleitner

Experimental implementation.
Inspired by the existing shell script at divanorama/remote-svn-alpha.
It doesn't use marks or notes yet, always imports the full history.

svnrdump is started as a subprocess while svn-fe's functions
are called directly.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 contrib/svn-fe/remote-svn.c |  188 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)
 create mode 100644 contrib/svn-fe/remote-svn.c

diff --git a/contrib/svn-fe/remote-svn.c b/contrib/svn-fe/remote-svn.c
new file mode 100644
index 0000000..ac09cc5
--- /dev/null
+++ b/contrib/svn-fe/remote-svn.c
@@ -0,0 +1,188 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "cache.h"
+#include "remote.h"
+#include "strbuf.h"
+#include "url.h"
+#include "exec_cmd.h"
+#include "run-command.h"
+#include "svndump.h"
+
+static int debug = 0;
+
+static inline void printd(const char* fmt, ...)
+{
+	if(debug) {
+		va_list vargs;
+		va_start(vargs, fmt);
+		fprintf(stderr, "rhsvn debug: ");
+		vfprintf(stderr, fmt, vargs);
+		fprintf(stderr, "\n");
+		va_end(vargs);
+	}
+}
+
+static struct remote* remote;
+static const char* url;
+const char* private_refs = "refs/remote-svn/";		/* + remote->name. */
+const char* remote_ref = "refs/heads/master";
+
+enum cmd_result cmd_capabilities(struct strbuf* line);
+enum cmd_result cmd_import(struct strbuf* line);
+enum cmd_result cmd_list(struct strbuf* line);
+
+enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };
+typedef enum cmd_result (*command)(struct strbuf*);
+
+const command command_list[] = {
+		cmd_capabilities, cmd_import, cmd_list, NULL
+};
+
+enum cmd_result cmd_capabilities(struct strbuf* line)
+{
+	if(strcmp(line->buf, "capabilities"))
+		return NOT_HANDLED;
+
+	printf("import\n");
+	printf("\n");
+	fflush(stdout);
+	return SUCCESS;
+}
+
+enum cmd_result cmd_import(struct strbuf* line)
+{
+	const char* revs = "-r0:HEAD";
+	int code;
+	struct child_process svndump_proc = {
+			.argv = NULL,		/* comes later .. */
+			/* we want a pipe to the child's stdout, but stdin, stderr inherited.
+			 The user can be asked for e.g. a password */
+			.in = 0, .out = -1, .err = 0,
+			.no_stdin = 0, .no_stdout = 0, .no_stderr = 0,
+			.git_cmd = 0,
+			.silent_exec_failure = 0,
+			.stdout_to_stderr = 0,
+			.use_shell = 0,
+			.clean_on_exit = 0,
+			.preexec_cb = NULL,
+			.env = NULL,
+			.dir = NULL
+	};
+
+	if(prefixcmp(line->buf, "import"))
+		return NOT_HANDLED;
+
+	svndump_proc.argv = xcalloc(5, sizeof(char*));
+	svndump_proc.argv[0] = "svnrdump";
+	svndump_proc.argv[1] = "dump";
+	svndump_proc.argv[2] = url;
+	svndump_proc.argv[3] = revs;
+
+	code = start_command(&svndump_proc);
+	if(code)
+		die("Unable to start %s, code %d", svndump_proc.argv[0], code);
+
+	svndump_init_fd(svndump_proc.out);
+	svndump_read(url);
+	svndump_deinit();
+	svndump_reset();
+
+	close(svndump_proc.out);
+
+	code = finish_command(&svndump_proc);
+	if(code)
+		warning("Something went wrong with termination of %s, code %d", svndump_proc.argv[0], code);
+	free(svndump_proc.argv);
+
+	printf("done\n");
+	return SUCCESS;
+
+
+
+}
+
+enum cmd_result cmd_list(struct strbuf* line)
+{
+	if(strcmp(line->buf, "list"))
+		return NOT_HANDLED;
+
+	printf("? HEAD\n");
+	printf("? %s\n", remote_ref);
+	printf("\n");
+	fflush(stdout);
+	return SUCCESS;
+}
+
+enum cmd_result do_command(struct strbuf* line)
+{
+	const command* p = command_list;
+	enum cmd_result ret;
+	printd("command line '%s'", line->buf);
+	while(*p) {
+		ret = (*p)(line);
+		if(ret != NOT_HANDLED)
+			return ret;
+		p++;
+	}
+	warning("Unknown command '%s'\n", line->buf);
+	return ret;
+}
+
+int main(int argc, const char **argv)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int nongit;
+
+	if (getenv("GIT_TRANSPORT_HELPER_DEBUG"))
+		debug = 1;
+
+	git_extract_argv0_path(argv[0]);
+	setup_git_directory_gently(&nongit);
+	if (argc < 2) {
+		fprintf(stderr, "Remote needed\n");
+		return 1;
+	}
+
+	remote = remote_get(argv[1]);
+	if (argc == 3) {
+		end_url_with_slash(&buf, argv[2]);
+	} else if (argc == 2) {
+		end_url_with_slash(&buf, remote->url[0]);
+	} else {
+		warning("Excess arguments!");
+	}
+
+	url = strbuf_detach(&buf, NULL);
+
+	printd("remote-svn starting with url %s", url);
+
+	/* build private ref namespace path for this svn remote. */
+	strbuf_init(&buf, 0);
+	strbuf_addstr(&buf, private_refs);
+	strbuf_addstr(&buf, remote->name);
+	strbuf_addch(&buf, '/');
+	private_refs = strbuf_detach(&buf, NULL);
+
+	while(1) {
+		if (strbuf_getline(&buf, stdin, '\n') == EOF) {
+			if (ferror(stdin))
+				fprintf(stderr, "Error reading command stream\n");
+			else
+				fprintf(stderr, "Unexpected end of command stream\n");
+			return 1;
+		}
+		/* an empty line terminates the command stream */
+		if(buf.len == 0)
+			break;
+
+		do_command(&buf);
+		strbuf_reset(&buf);
+	}
+
+	strbuf_release(&buf);
+	free((void*)url);
+	free((void*)private_refs);
+	return 0;
+}
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 2/4] Integrate remote-svn into svn-fe/Makefile.
  2012-06-04 17:20 ` [RFC 1/4] Implement a basic remote helper vor svn in C Florian Achleitner
@ 2012-06-04 17:20   ` Florian Achleitner
  2012-06-04 17:20     ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-06-04 17:20 UTC (permalink / raw)
  To: git; +Cc: Florian Achleitner

Requires some sha.h to be used and the libraries
to be linked, this is currently hardcoded.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 contrib/svn-fe/Makefile |   14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/contrib/svn-fe/Makefile b/contrib/svn-fe/Makefile
index 360d8da..253324f 100644
--- a/contrib/svn-fe/Makefile
+++ b/contrib/svn-fe/Makefile
@@ -1,14 +1,14 @@
-all:: svn-fe$X
+all:: svn-fe$X remote-svn$X
 
 CC = gcc
 RM = rm -f
 MV = mv
 
-CFLAGS = -g -O2 -Wall
+CFLAGS = -g -O2 -Wall -DSHA1_HEADER='<openssl/sha.h>'
 LDFLAGS =
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
-EXTLIBS =
+EXTLIBS = -lssl -lcrypto -lpthread ../../xdiff/lib.a
 
 GIT_LIB = ../../libgit.a
 VCSSVN_LIB = ../../vcs-svn/lib.a
@@ -37,8 +37,12 @@ svn-fe$X: svn-fe.o $(VCSSVN_LIB) $(GIT_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ svn-fe.o \
 		$(ALL_LDFLAGS) $(LIBS)
 
-svn-fe.o: svn-fe.c ../../vcs-svn/svndump.h
-	$(QUIET_CC)$(CC) -I../../vcs-svn -o $*.o -c $(ALL_CFLAGS) $<
+remote-svn$X: remote-svn.o $(VCSSVN_LIB) $(GIT_LIB)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ remote-svn.o \
+		$(ALL_LDFLAGS) $(LIBS)
+		
+%.o: %.c ../../vcs-svn/svndump.h
+	$(QUIET_CC)$(CC) -I../../vcs-svn -I../../ -o $*.o -c $(ALL_CFLAGS) $<
 
 svn-fe.html: svn-fe.txt
 	$(QUIET_SUBDIR0)../../Documentation $(QUIET_SUBDIR1) \
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs.
  2012-06-04 17:20   ` [RFC 2/4] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
@ 2012-06-04 17:20     ` Florian Achleitner
  2012-06-04 17:20       ` [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper Florian Achleitner
  2012-06-05  1:21       ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs David Michael Barr
  0 siblings, 2 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-04 17:20 UTC (permalink / raw)
  To: git; +Cc: Florian Achleitner

The existing function only allowed reading from a filename or
from stdin.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 vcs-svn/svndump.c |   20 +++++++++++++++++---
 vcs-svn/svndump.h |    1 +
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 0899790..2f0089f 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -465,10 +465,8 @@ void svndump_read(const char *url)
 		end_revision();
 }
 
-int svndump_init(const char *filename)
+static void init()
 {
-	if (buffer_init(&input, filename))
-		return error("cannot open %s: %s", filename, strerror(errno));
 	fast_export_init(REPORT_FILENO);
 	strbuf_init(&dump_ctx.uuid, 4096);
 	strbuf_init(&dump_ctx.url, 4096);
@@ -479,6 +477,22 @@ int svndump_init(const char *filename)
 	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
+	return;
+}
+
+int svndump_init(const char *filename)
+{
+	if (buffer_init(&input, filename))
+		return error("cannot open %s: %s", filename, strerror(errno));
+	init();
+	return 0;
+}
+
+int svndump_init_fd(int in_fd)
+{
+	if(buffer_fdinit(&input, in_fd))
+		return error("cannot open fd %d: %s", in_fd, strerror(errno));
+	init();
 	return 0;
 }
 
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
index df9ceb0..24e7beb 100644
--- a/vcs-svn/svndump.h
+++ b/vcs-svn/svndump.h
@@ -2,6 +2,7 @@
 #define SVNDUMP_H_
 
 int svndump_init(const char *filename);
+int svndump_init_fd(int in_fd);
 void svndump_read(const char *url);
 void svndump_deinit(void);
 void svndump_reset(void);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-04 17:20     ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
@ 2012-06-04 17:20       ` Florian Achleitner
  2012-06-05  1:33         ` David Michael Barr
  2012-06-05  6:56         ` Jeff King
  2012-06-05  1:21       ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs David Michael Barr
  1 sibling, 2 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-04 17:20 UTC (permalink / raw)
  To: git; +Cc: Florian Achleitner

On invocation of a helper a new pipe is opened.
To close the other end after fork, the prexec_cb feature
of the run_command api is used.
If the helper is not used with fast-import later the pipe
is unused.
The FD is passed to the remote-helper via it's environment,
helpers that don't use fast-import can simply ignore it.
fast-import has an argv for that.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 transport-helper.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.c  |    9 ++++++++-
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/transport-helper.c b/transport-helper.c
index 61c928f..b438040 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -17,6 +17,7 @@ struct helper_data {
 	const char *name;
 	struct child_process *helper;
 	FILE *out;
+	int fast_import_backchannel_pipe[2];
 	unsigned fetch : 1,
 		import : 1,
 		export : 1,
@@ -98,19 +99,30 @@ static void do_take_over(struct transport *transport)
 	free(data);
 }
 
+static int fd_to_close;
+void close_fd_prexec_cb(void)
+{
+	if(debug)
+		fprintf(stderr, "close_fd_prexec_cb closing %d\n", fd_to_close);
+	close(fd_to_close);
+}
+
 static struct child_process *get_helper(struct transport *transport)
 {
 	struct helper_data *data = transport->data;
 	struct strbuf buf = STRBUF_INIT;
+	struct strbuf envbuf = STRBUF_INIT;
 	struct child_process *helper;
 	const char **refspecs = NULL;
 	int refspec_nr = 0;
 	int refspec_alloc = 0;
 	int duped;
 	int code;
+	int err;
 	char git_dir_buf[sizeof(GIT_DIR_ENVIRONMENT) + PATH_MAX + 1];
 	const char *helper_env[] = {
 		git_dir_buf,
+		NULL,	/* placeholder */
 		NULL
 	};
 
@@ -133,6 +145,24 @@ static struct child_process *get_helper(struct transport *transport)
 	snprintf(git_dir_buf, sizeof(git_dir_buf), "%s=%s", GIT_DIR_ENVIRONMENT, get_git_dir());
 	helper->env = helper_env;
 
+
+	/* create an additional pipe from fast-import to the helper */
+	err = pipe(data->fast_import_backchannel_pipe);
+	if(err)
+		die("cannot create fast_import_backchannel_pipe: %s", strerror(errno));
+
+	if(debug)
+		fprintf(stderr, "Remote helper created fast_import_backchannel_pipe { %d, %d }\n",
+				data->fast_import_backchannel_pipe[0], data->fast_import_backchannel_pipe[1]);
+
+	strbuf_addf(&envbuf, "GIT_REPORT_FILENO=%d", data->fast_import_backchannel_pipe[0]);
+	helper_env[1] = strbuf_detach(&envbuf, NULL);
+
+	/* after the fork, we need to close the write end in the helper */
+	fd_to_close = data->fast_import_backchannel_pipe[1];
+	/* the prexec callback is run just before exec */
+	helper->preexec_cb = close_fd_prexec_cb;
+
 	code = start_command(helper);
 	if (code < 0 && errno == ENOENT)
 		die("Unable to find remote helper for '%s'", data->name);
@@ -141,6 +171,7 @@ static struct child_process *get_helper(struct transport *transport)
 
 	data->helper = helper;
 	data->no_disconnect_req = 0;
+	free((void*)helper_env[1]);
 
 	/*
 	 * Open the output as FILE* so strbuf_getline() can be used.
@@ -237,6 +268,10 @@ static int disconnect_helper(struct transport *transport)
 			xwrite(data->helper->in, "\n", 1);
 			sigchain_pop(SIGPIPE);
 		}
+		/* close the pipe, it is still open if it wasn't used for fast-import. */
+		close(data->fast_import_backchannel_pipe[0]);
+		close(data->fast_import_backchannel_pipe[1]);
+
 		close(data->helper->in);
 		close(data->helper->out);
 		fclose(data->out);
@@ -376,13 +411,20 @@ static int fetch_with_fetch(struct transport *transport,
 static int get_importer(struct transport *transport, struct child_process *fastimport)
 {
 	struct child_process *helper = get_helper(transport);
+	struct helper_data *data = transport->data;
+	struct strbuf buf = STRBUF_INIT;
 	memset(fastimport, 0, sizeof(*fastimport));
 	fastimport->in = helper->out;
 	fastimport->argv = xcalloc(5, sizeof(*fastimport->argv));
 	fastimport->argv[0] = "fast-import";
 	fastimport->argv[1] = "--quiet";
+	strbuf_addf(&buf, "--cat-blob-fd=%d", data->fast_import_backchannel_pipe[1]);
+	fastimport->argv[2] = strbuf_detach(&buf, NULL);
 
 	fastimport->git_cmd = 1;
+
+	fd_to_close = data->fast_import_backchannel_pipe[0];
+	fastimport->preexec_cb = close_fd_prexec_cb;
 	return start_command(fastimport);
 }
 
@@ -427,6 +469,11 @@ static int fetch_with_import(struct transport *transport,
 	if (get_importer(transport, &fastimport))
 		die("Couldn't run fast-import");
 
+
+	/* in the parent process we close both pipe ends. */
+	close(data->fast_import_backchannel_pipe[0]);
+	close(data->fast_import_backchannel_pipe[1]);
+
 	for (i = 0; i < nr_heads; i++) {
 		posn = to_fetch[i];
 		if (posn->status & REF_STATUS_UPTODATE)
@@ -441,6 +488,7 @@ static int fetch_with_import(struct transport *transport,
 
 	if (finish_command(&fastimport))
 		die("Error while running fast-import");
+	free((void*)fastimport.argv[2]);
 	free(fastimport.argv);
 	fastimport.argv = NULL;
 
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 2f0089f..b1fe03f 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -467,7 +467,14 @@ void svndump_read(const char *url)
 
 static void init()
 {
-	fast_export_init(REPORT_FILENO);
+	int report_fd;
+	char* back_fd_env = getenv("GIT_REPORT_FILENO");
+	if(!back_fd_env || sscanf(back_fd_env, "%d", &report_fd) != 1) {
+		warning("Cannot get cat-blob fd from environment, using default!");
+		report_fd = REPORT_FILENO;
+	}
+
+	fast_export_init(report_fd);
 	strbuf_init(&dump_ctx.uuid, 4096);
 	strbuf_init(&dump_ctx.url, 4096);
 	strbuf_init(&rev_ctx.log, 4096);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs.
  2012-06-04 17:20     ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
  2012-06-04 17:20       ` [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper Florian Achleitner
@ 2012-06-05  1:21       ` David Michael Barr
  1 sibling, 0 replies; 57+ messages in thread
From: David Michael Barr @ 2012-06-05  1:21 UTC (permalink / raw)
  To: Florian Achleitner; +Cc: git

On Tue, Jun 5, 2012 at 3:20 AM, Florian Achleitner
<florian.achleitner.2.6.31@gmail.com> wrote:
> The existing function only allowed reading from a filename or
> from stdin.
>
> Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
> ---
>  vcs-svn/svndump.c |   20 +++++++++++++++++---
>  vcs-svn/svndump.h |    1 +
>  2 files changed, 18 insertions(+), 3 deletions(-)
>
> diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
> index 0899790..2f0089f 100644
> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -465,10 +465,8 @@ void svndump_read(const char *url)
>                end_revision();
>  }
>
> -int svndump_init(const char *filename)
> +static void init()
>  {
> -       if (buffer_init(&input, filename))
> -               return error("cannot open %s: %s", filename, strerror(errno));
>        fast_export_init(REPORT_FILENO);
>        strbuf_init(&dump_ctx.uuid, 4096);
>        strbuf_init(&dump_ctx.url, 4096);
> @@ -479,6 +477,22 @@ int svndump_init(const char *filename)
>        reset_dump_ctx(NULL);
>        reset_rev_ctx(0);
>        reset_node_ctx(NULL);
> +       return;
> +}
> +
> +int svndump_init(const char *filename)
> +{
> +       if (buffer_init(&input, filename))
> +               return error("cannot open %s: %s", filename, strerror(errno));

Note: filename is allowed to be NULL here.
This is a bug in the existing code that you just moved.

I suggest moving error printing into buffer_init().
This way the basis for the message is clearer.

For bonus points, we should split buffer_init().
Plain buffer_init() should use stdin.
The new buff_init_path() should take a filename.

> +       init();
> +       return 0;
> +}
> +
> +int svndump_init_fd(int in_fd)
> +{
> +       if(buffer_fdinit(&input, in_fd))
> +               return error("cannot open fd %d: %s", in_fd, strerror(errno));
> +       init();
>        return 0;
>  }
>
> diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
> index df9ceb0..24e7beb 100644
> --- a/vcs-svn/svndump.h
> +++ b/vcs-svn/svndump.h
> @@ -2,6 +2,7 @@
>  #define SVNDUMP_H_
>
>  int svndump_init(const char *filename);
> +int svndump_init_fd(int in_fd);
>  void svndump_read(const char *url);
>  void svndump_deinit(void);
>  void svndump_reset(void);
> --
> 1.7.9.5

Otherwise, I like the direction of this patch.

--
David Barr

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-04 17:20       ` [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper Florian Achleitner
@ 2012-06-05  1:33         ` David Michael Barr
  2012-06-05  6:56         ` Jeff King
  1 sibling, 0 replies; 57+ messages in thread
From: David Michael Barr @ 2012-06-05  1:33 UTC (permalink / raw)
  To: Florian Achleitner; +Cc: git, Sverre Rabbelier, Jeff King

On Tue, Jun 5, 2012 at 3:20 AM, Florian Achleitner
<florian.achleitner.2.6.31@gmail.com> wrote:
> On invocation of a helper a new pipe is opened.
> To close the other end after fork, the prexec_cb feature
> of the run_command api is used.
> If the helper is not used with fast-import later the pipe
> is unused.
> The FD is passed to the remote-helper via it's environment,
> helpers that don't use fast-import can simply ignore it.
> fast-import has an argv for that.
>
> Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
> ---
>  transport-helper.c |   48 ++++++++++++++++++++++++++++++++++++++++++++++++
>  vcs-svn/svndump.c  |    9 ++++++++-
>  2 files changed, 56 insertions(+), 1 deletion(-)
>
> diff --git a/transport-helper.c b/transport-helper.c
> index 61c928f..b438040 100644
> --- a/transport-helper.c
> +++ b/transport-helper.c
> @@ -17,6 +17,7 @@ struct helper_data {
>        const char *name;
>        struct child_process *helper;
>        FILE *out;
> +       int fast_import_backchannel_pipe[2];
>        unsigned fetch : 1,
>                import : 1,
>                export : 1,
> @@ -98,19 +99,30 @@ static void do_take_over(struct transport *transport)
>        free(data);
>  }
>
> +static int fd_to_close;
> +void close_fd_prexec_cb(void)
> +{
> +       if(debug)
> +               fprintf(stderr, "close_fd_prexec_cb closing %d\n", fd_to_close);
> +       close(fd_to_close);
> +}
> +
>  static struct child_process *get_helper(struct transport *transport)
>  {
>        struct helper_data *data = transport->data;
>        struct strbuf buf = STRBUF_INIT;
> +       struct strbuf envbuf = STRBUF_INIT;
>        struct child_process *helper;
>        const char **refspecs = NULL;
>        int refspec_nr = 0;
>        int refspec_alloc = 0;
>        int duped;
>        int code;
> +       int err;
>        char git_dir_buf[sizeof(GIT_DIR_ENVIRONMENT) + PATH_MAX + 1];
>        const char *helper_env[] = {
>                git_dir_buf,
> +               NULL,   /* placeholder */
>                NULL
>        };
>
> @@ -133,6 +145,24 @@ static struct child_process *get_helper(struct transport *transport)
>        snprintf(git_dir_buf, sizeof(git_dir_buf), "%s=%s", GIT_DIR_ENVIRONMENT, get_git_dir());
>        helper->env = helper_env;
>
> +
> +       /* create an additional pipe from fast-import to the helper */
> +       err = pipe(data->fast_import_backchannel_pipe);
> +       if(err)
> +               die("cannot create fast_import_backchannel_pipe: %s", strerror(errno));
> +
> +       if(debug)
> +               fprintf(stderr, "Remote helper created fast_import_backchannel_pipe { %d, %d }\n",
> +                               data->fast_import_backchannel_pipe[0], data->fast_import_backchannel_pipe[1]);
> +
> +       strbuf_addf(&envbuf, "GIT_REPORT_FILENO=%d", data->fast_import_backchannel_pipe[0]);
> +       helper_env[1] = strbuf_detach(&envbuf, NULL);
> +
> +       /* after the fork, we need to close the write end in the helper */
> +       fd_to_close = data->fast_import_backchannel_pipe[1];
> +       /* the prexec callback is run just before exec */
> +       helper->preexec_cb = close_fd_prexec_cb;
> +
>        code = start_command(helper);
>        if (code < 0 && errno == ENOENT)
>                die("Unable to find remote helper for '%s'", data->name);
> @@ -141,6 +171,7 @@ static struct child_process *get_helper(struct transport *transport)
>
>        data->helper = helper;
>        data->no_disconnect_req = 0;
> +       free((void*)helper_env[1]);
>
>        /*
>         * Open the output as FILE* so strbuf_getline() can be used.
> @@ -237,6 +268,10 @@ static int disconnect_helper(struct transport *transport)
>                        xwrite(data->helper->in, "\n", 1);
>                        sigchain_pop(SIGPIPE);
>                }
> +               /* close the pipe, it is still open if it wasn't used for fast-import. */
> +               close(data->fast_import_backchannel_pipe[0]);
> +               close(data->fast_import_backchannel_pipe[1]);
> +
>                close(data->helper->in);
>                close(data->helper->out);
>                fclose(data->out);
> @@ -376,13 +411,20 @@ static int fetch_with_fetch(struct transport *transport,
>  static int get_importer(struct transport *transport, struct child_process *fastimport)
>  {
>        struct child_process *helper = get_helper(transport);
> +       struct helper_data *data = transport->data;
> +       struct strbuf buf = STRBUF_INIT;
>        memset(fastimport, 0, sizeof(*fastimport));
>        fastimport->in = helper->out;
>        fastimport->argv = xcalloc(5, sizeof(*fastimport->argv));
>        fastimport->argv[0] = "fast-import";
>        fastimport->argv[1] = "--quiet";
> +       strbuf_addf(&buf, "--cat-blob-fd=%d", data->fast_import_backchannel_pipe[1]);
> +       fastimport->argv[2] = strbuf_detach(&buf, NULL);
>
>        fastimport->git_cmd = 1;
> +
> +       fd_to_close = data->fast_import_backchannel_pipe[0];
> +       fastimport->preexec_cb = close_fd_prexec_cb;
>        return start_command(fastimport);
>  }
>
> @@ -427,6 +469,11 @@ static int fetch_with_import(struct transport *transport,
>        if (get_importer(transport, &fastimport))
>                die("Couldn't run fast-import");
>
> +
> +       /* in the parent process we close both pipe ends. */
> +       close(data->fast_import_backchannel_pipe[0]);
> +       close(data->fast_import_backchannel_pipe[1]);
> +
>        for (i = 0; i < nr_heads; i++) {
>                posn = to_fetch[i];
>                if (posn->status & REF_STATUS_UPTODATE)
> @@ -441,6 +488,7 @@ static int fetch_with_import(struct transport *transport,
>
>        if (finish_command(&fastimport))
>                die("Error while running fast-import");
> +       free((void*)fastimport.argv[2]);
>        free(fastimport.argv);
>        fastimport.argv = NULL;
>
> diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
> index 2f0089f..b1fe03f 100644
> --- a/vcs-svn/svndump.c
> +++ b/vcs-svn/svndump.c
> @@ -467,7 +467,14 @@ void svndump_read(const char *url)
>
>  static void init()
>  {
> -       fast_export_init(REPORT_FILENO);
> +       int report_fd;
> +       char* back_fd_env = getenv("GIT_REPORT_FILENO");
> +       if(!back_fd_env || sscanf(back_fd_env, "%d", &report_fd) != 1) {
> +               warning("Cannot get cat-blob fd from environment, using default!");
> +               report_fd = REPORT_FILENO;
> +       }
> +
> +       fast_export_init(report_fd);
>        strbuf_init(&dump_ctx.uuid, 4096);
>        strbuf_init(&dump_ctx.url, 4096);
>        strbuf_init(&rev_ctx.log, 4096);
> --
> 1.7.9.5

+cc Sverre Rabbelier and Jeff King, as they have been active in
transport-helper.c.

--
David Barr

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-04 17:20       ` [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper Florian Achleitner
  2012-06-05  1:33         ` David Michael Barr
@ 2012-06-05  6:56         ` Jeff King
  2012-06-05  7:07           ` David Michael Barr
  2012-06-05  8:51           ` Johannes Sixt
  1 sibling, 2 replies; 57+ messages in thread
From: Jeff King @ 2012-06-05  6:56 UTC (permalink / raw)
  To: Florian Achleitner; +Cc: git

On Mon, Jun 04, 2012 at 07:20:55PM +0200, Florian Achleitner wrote:

> On invocation of a helper a new pipe is opened.
> To close the other end after fork, the prexec_cb feature
> of the run_command api is used.
> If the helper is not used with fast-import later the pipe
> is unused.
> The FD is passed to the remote-helper via it's environment,
> helpers that don't use fast-import can simply ignore it.
> fast-import has an argv for that.

I don't keep up on fast-import development, so I have no clue how useful
this extra pipe is, or whether this patch is a good idea overall. But a
few comments on the transport.c half of things:

> +static int fd_to_close;
> +void close_fd_prexec_cb(void)
> +{
> +	if(debug)
> +		fprintf(stderr, "close_fd_prexec_cb closing %d\n", fd_to_close);
> +	close(fd_to_close);
> +}

Note that preexec_cb does not work at all on Windows, as it assumes a
forking model (rather than a spawn, which leaves no room to execute
arbitrary code in the child). If all you want to do is open an extra
pipe, then probably run-command should be extended to handle this
(though I have no idea how complex that would be for the Windows side of
things, it is at least _possible_, as opposed to preexec_cb, which will
never be possible).

> @@ -376,13 +411,20 @@ static int fetch_with_fetch(struct transport *transport,
>  static int get_importer(struct transport *transport, struct child_process *fastimport)
>  {
>  	struct child_process *helper = get_helper(transport);
> +	struct helper_data *data = transport->data;
> +	struct strbuf buf = STRBUF_INIT;
>  	memset(fastimport, 0, sizeof(*fastimport));
>  	fastimport->in = helper->out;
>  	fastimport->argv = xcalloc(5, sizeof(*fastimport->argv));
>  	fastimport->argv[0] = "fast-import";
>  	fastimport->argv[1] = "--quiet";
> +	strbuf_addf(&buf, "--cat-blob-fd=%d", data->fast_import_backchannel_pipe[1]);
> +	fastimport->argv[2] = strbuf_detach(&buf, NULL);

Consider converting this to use argv_array. You can drop the magic
numbers, and "argv_array_pushf" handles the strbuf bits for you
automatically.

And this grossness can go away:

> @@ -441,6 +488,7 @@ static int fetch_with_import(struct transport *transport,
>  
>  	if (finish_command(&fastimport))
>  		die("Error while running fast-import");
> +	free((void*)fastimport.argv[2]);
>  	free(fastimport.argv);
>  	fastimport.argv = NULL;

(you'd instead want to free everything; it would probably make sense to
add an argv_array_free_detached() function to do so).

> @@ -427,6 +469,11 @@ static int fetch_with_import(struct transport *transport,
>  	if (get_importer(transport, &fastimport))
>  		die("Couldn't run fast-import");
>  
> +
> +	/* in the parent process we close both pipe ends. */
> +	close(data->fast_import_backchannel_pipe[0]);
> +	close(data->fast_import_backchannel_pipe[1]);

I'm confused. We close both ends? Who is actually reading and writing to
this pipe, then?

-Peff

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  6:56         ` Jeff King
@ 2012-06-05  7:07           ` David Michael Barr
  2012-06-05  8:14             ` Jeff King
  2012-06-05  8:51           ` Johannes Sixt
  1 sibling, 1 reply; 57+ messages in thread
From: David Michael Barr @ 2012-06-05  7:07 UTC (permalink / raw)
  To: Jeff King; +Cc: Florian Achleitner, Sverre Rabbelier, git

On Tue, Jun 5, 2012 at 4:56 PM, Jeff King <peff@peff.net> wrote:
> On Mon, Jun 04, 2012 at 07:20:55PM +0200, Florian Achleitner wrote:
>> @@ -427,6 +469,11 @@ static int fetch_with_import(struct transport *transport,
>>       if (get_importer(transport, &fastimport))
>>               die("Couldn't run fast-import");
>>
>> +
>> +     /* in the parent process we close both pipe ends. */
>> +     close(data->fast_import_backchannel_pipe[0]);
>> +     close(data->fast_import_backchannel_pipe[1]);
>
> I'm confused. We close both ends? Who is actually reading and writing to
> this pipe, then?

One child, git-fast-import writes to one end.
The other child, git-remote-* reads from the other end.

--
David Barr

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  7:07           ` David Michael Barr
@ 2012-06-05  8:14             ` Jeff King
  2012-06-05 22:16               ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jeff King @ 2012-06-05  8:14 UTC (permalink / raw)
  To: David Michael Barr; +Cc: Florian Achleitner, Sverre Rabbelier, git

On Tue, Jun 05, 2012 at 05:07:53PM +1000, David Michael Barr wrote:

> On Tue, Jun 5, 2012 at 4:56 PM, Jeff King <peff@peff.net> wrote:
> > On Mon, Jun 04, 2012 at 07:20:55PM +0200, Florian Achleitner wrote:
> >> @@ -427,6 +469,11 @@ static int fetch_with_import(struct transport *transport,
> >>       if (get_importer(transport, &fastimport))
> >>               die("Couldn't run fast-import");
> >>
> >> +
> >> +     /* in the parent process we close both pipe ends. */
> >> +     close(data->fast_import_backchannel_pipe[0]);
> >> +     close(data->fast_import_backchannel_pipe[1]);
> >
> > I'm confused. We close both ends? Who is actually reading and writing to
> > this pipe, then?
> 
> One child, git-fast-import writes to one end.
> The other child, git-remote-* reads from the other end.

Ah, thanks. I missed where the write end was going, but now I see it.
Overall, the point of the patch makes sense to me (it would have been
nice if the commit message described the rationale a bit more
completely).

Is there a reason that the patch unconditionally creates the pipe in
get_helper? I.e., isn't it specific to the get_importer code path? It
feels a little hacky to have it infect the other code paths.

-Peff

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  6:56         ` Jeff King
  2012-06-05  7:07           ` David Michael Barr
@ 2012-06-05  8:51           ` Johannes Sixt
  2012-06-05  9:07             ` Jeff King
  2012-06-05  9:09             ` Johannes Sixt
  1 sibling, 2 replies; 57+ messages in thread
From: Johannes Sixt @ 2012-06-05  8:51 UTC (permalink / raw)
  To: Jeff King; +Cc: Florian Achleitner, git

Am 6/5/2012 8:56, schrieb Jeff King:
> On Mon, Jun 04, 2012 at 07:20:55PM +0200, Florian Achleitner wrote:
>> +static int fd_to_close;
>> +void close_fd_prexec_cb(void)
>> +{
>> +	if(debug)
>> +		fprintf(stderr, "close_fd_prexec_cb closing %d\n", fd_to_close);
>> +	close(fd_to_close);
>> +}
> 
> Note that preexec_cb does not work at all on Windows, as it assumes a
> forking model (rather than a spawn, which leaves no room to execute
> arbitrary code in the child). If all you want to do is open an extra
> pipe, then probably run-command should be extended to handle this
> (though I have no idea how complex that would be for the Windows side of
> things, it is at least _possible_, as opposed to preexec_cb, which will
> never be possible).

The lack of support for preexec_cb on Windows is actually not the problem
in this case. Our emulation of pipe() actually creates file handles that
are not inherited by child processes. (For the standard channels 0,1,2 we
rely on that dup() creates duplicates that *can* be inherited; so they
still work.)

The first problem with the new infrastructure in this patch is that dup()
is not called anywhere after pipe(). To solve this, we would have to
extend run-command in some way to allow passing along arbitrary pipes and
handles.

The second problem is more severe and is at the lowest level of our
infrastructure: We set up our child processes so that they know only about
file descriptors other than 0,1,2 to the child process. Even if the first
problem were solved, the child process does not receive sufficient
information to know that there are open file descriptors other than 0,1,2.
There is a facility to pass along this information from the parent to the
child, but we simply do not implement it.

IOW: Everything that uses --cat-blob-fd or a similar facility cannot work
on Windows without considerable additional effort.

-- Hannes

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  8:51           ` Johannes Sixt
@ 2012-06-05  9:07             ` Jeff King
  2012-06-05 22:17               ` Florian Achleitner
  2012-06-05  9:09             ` Johannes Sixt
  1 sibling, 1 reply; 57+ messages in thread
From: Jeff King @ 2012-06-05  9:07 UTC (permalink / raw)
  To: Florian Achleitner; +Cc: Johannes Sixt, git

On Tue, Jun 05, 2012 at 10:51:32AM +0200, Johannes Sixt wrote:

> > Note that preexec_cb does not work at all on Windows, as it assumes a
> [... a very nice explanation of the pipe issues ...]
> 
> IOW: Everything that uses --cat-blob-fd or a similar facility cannot work
> on Windows without considerable additional effort.

Thanks, Johannes, that makes sense to me.

Florian, does that mean that making the svn helper start to use
--cat-blob-fd at all is a potential regression for Windows?  The
fast-import documentation says that the cat-blob output will go to
stdout now. Does it even work at all now? I don't really know or
understand all of the reasons for cat-blob-fd to exist in the first
place.

I expect one answer might be "well, the svn remote helper does not work
at all on Windows already, so there's no regression". But this affects
_all_ fast-import calls that git's transport-helper makes. Are there
other ones that use import, and would they be affected by this? 

For that matter, isn't this a backwards-incompatible change for other
third-party helpers? Won't they need to respect the new
GIT_REPORT_FILENO environment variable? Do we need the helper to specify
"yes, I am ready to handle cat-blob-fd" in its capabilities list?

-Peff

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  8:51           ` Johannes Sixt
  2012-06-05  9:07             ` Jeff King
@ 2012-06-05  9:09             ` Johannes Sixt
  1 sibling, 0 replies; 57+ messages in thread
From: Johannes Sixt @ 2012-06-05  9:09 UTC (permalink / raw)
  To: Jeff King; +Cc: Florian Achleitner, git

Am 6/5/2012 10:51, schrieb Johannes Sixt:
> The second problem is more severe and is at the lowest level of our
> infrastructure: We set up our child processes so that they know only about
> file descriptors other than 0,1,2 to the child process.

That should read:

We set up our child processes so that they know only about file
descriptors 0,1,2.

> Even if the first
> problem were solved, the child process does not receive sufficient
> information to know that there are open file descriptors other than 0,1,2.
> There is a facility to pass along this information from the parent to the
> child, but we simply do not implement it.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  8:14             ` Jeff King
@ 2012-06-05 22:16               ` Florian Achleitner
  2012-06-06 13:43                 ` Jeff King
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-06-05 22:16 UTC (permalink / raw)
  To: Jeff King; +Cc: David Michael Barr, Florian Achleitner, Sverre Rabbelier, git

On Tuesday 05 June 2012 04:14:02 Jeff King wrote:
> Is there a reason that the patch unconditionally creates the pipe in
> get_helper? I.e., isn't it specific to the get_importer code path? It
> feels a little hacky to have it infect the other code paths.

I agree, it's a bit hacky. For me as a newbee, it was just a way to make fast-
import have the pipe it needs. I didn't know about the history of the 
preexec_cb as a fix for a bug in less.

The pipe is created unconditionally, because at the fork-time of the remote-
helper it is not known whether the import command will be used later together 
with fast-import, or not. (and later, there's no way, I think).
Helpers that don't use the pipe could simply ignore it.

--
Florian Achleitner

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05  9:07             ` Jeff King
@ 2012-06-05 22:17               ` Florian Achleitner
  0 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-05 22:17 UTC (permalink / raw)
  To: Jeff King; +Cc: Florian Achleitner, Johannes Sixt, git

On Tuesday 05 June 2012 05:07:02 Jeff King wrote:
> Florian, does that mean that making the svn helper start to use
> --cat-blob-fd at all is a potential regression for Windows? 

Actually I don't know. I'm don't know a lot about Windows support in git at 
all. 
I created the pipe, because the current vcs-svn/* code requires it for 
importing svn dumps. I can't explain yet why it requires it.

> The
> fast-import documentation says that the cat-blob output will go to
> stdout now. Does it even work at all now? 

stdout is connected to the parent, while the new pipe connects it's two 
childs..
It works.

> I don't really know or
> understand all of the reasons for cat-blob-fd to exist in the first
> place.

Good point.

> 
> I expect one answer might be "well, the svn remote helper does not work
> at all on Windows already, so there's no regression". But this affects
> all fast-import calls that git's transport-helper makes. Are there
> other ones that use import, and would they be affected by this? 
> 
> For that matter, isn't this a backwards-incompatible change for other
> third-party helpers? Won't they need to respect the new
> GIT_REPORT_FILENO environment variable? Do we need the helper to specify
> "yes, I am ready to handle cat-blob-fd" in its capabilities list?

I think whenever the remote helper uses some specific commands of fast-import 
it needs the cat-blob-fd to read feedback, but I haven't digged into that 
yet..

--
Florian Achleitner

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-05 22:16               ` Florian Achleitner
@ 2012-06-06 13:43                 ` Jeff King
  2012-06-06 21:04                   ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jeff King @ 2012-06-06 13:43 UTC (permalink / raw)
  To: Florian Achleitner; +Cc: David Michael Barr, Sverre Rabbelier, git

On Wed, Jun 06, 2012 at 12:16:25AM +0200, Florian Achleitner wrote:

> On Tuesday 05 June 2012 04:14:02 Jeff King wrote:
> > Is there a reason that the patch unconditionally creates the pipe in
> > get_helper? I.e., isn't it specific to the get_importer code path? It
> > feels a little hacky to have it infect the other code paths.
> 
> I agree, it's a bit hacky. For me as a newbee, it was just a way to make fast-
> import have the pipe it needs. I didn't know about the history of the 
> preexec_cb as a fix for a bug in less.
> 
> The pipe is created unconditionally, because at the fork-time of the remote-
> helper it is not known whether the import command will be used later together 
> with fast-import, or not. (and later, there's no way, I think).
> Helpers that don't use the pipe could simply ignore it.

Good point. I think we really are stuck with doing it in every case,
unless we want to turn to something that can be opened after the fact
(like a fifo).

-Peff

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper.
  2012-06-06 13:43                 ` Jeff King
@ 2012-06-06 21:04                   ` Florian Achleitner
  0 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-06 21:04 UTC (permalink / raw)
  To: Jeff King; +Cc: Florian Achleitner, David Michael Barr, Sverre Rabbelier, git

On Wednesday 06 June 2012 09:43:21 you wrote:
> Good point. I think we really are stuck with doing it in every case,
> unless we want to turn to something that can be opened after the fact
> (like a fifo).

A fifo could be clever. There's also something similar on windows, maybe we can 
use named pipes there too.

> 
> -Peff

-- Flo

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [RFC 0/4 v2]
  2012-06-04 17:20 [RFC 0/4] Florian Achleitner
  2012-06-04 17:20 ` [RFC 1/4] Implement a basic remote helper vor svn in C Florian Achleitner
@ 2012-06-29  7:49 ` Florian Achleitner
  2012-06-29  7:54   ` [RFC 1/4 v2] Implement a basic remote helper for svn in C Florian Achleitner
                     ` (3 more replies)
  1 sibling, 4 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-29  7:49 UTC (permalink / raw)
  To: David Michael Barr, Sverre Rabbelier, git, Jeff King,
	Johannes Sixt, jrnieder

This new version uses a fifo instead of a pipe and addresses other issues 
discussed in this thread.
To pass the name of the fifo to fast-import, it gets a new cmd-line-arg.
It no longer requires the prexec_cb and should be more portable, as there 
exist pipes on windows too.



On Monday 04 June 2012 19:20:51 Florian Achleitner wrote:
> Series of patches creating a very basic remote helper in C.
> 
> [RFC 1/4] Implement a basic remote helper vor svn in C.
> [RFC 2/4] Integrate remote-svn into svn-fe/Makefile.
> [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary
> [RFC 4/4] Add cat-blob report pipe from fast-import to

^ permalink raw reply	[flat|nested] 57+ messages in thread

* [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
@ 2012-06-29  7:54   ` Florian Achleitner
  2012-07-02 11:07     ` Jonathan Nieder
  2012-06-29  7:58   ` [RFC 2/4 v2] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-06-29  7:54 UTC (permalink / raw)
  To: git
  Cc: David Michael Barr, Sverre Rabbelier, Jeff King, Johannes Sixt,
	jrnieder

Experimental implementation.
Inspired by the existing shell script at divanorama/remote-svn-alpha.
It doesn't use marks or notes yet, always imports the full history.

svnrdump is started as a subprocess while svn-fe's functions
are called directly.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
diff: Use fifo instead of pipe: Retrieve the name of the pipe from env and open it
for svndump.

 contrib/svn-fe/remote-svn.c |  207 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 207 insertions(+)
 create mode 100644 contrib/svn-fe/remote-svn.c

diff --git a/contrib/svn-fe/remote-svn.c b/contrib/svn-fe/remote-svn.c
new file mode 100644
index 0000000..5ec7fbb
--- /dev/null
+++ b/contrib/svn-fe/remote-svn.c
@@ -0,0 +1,207 @@
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "cache.h"
+#include "remote.h"
+#include "strbuf.h"
+#include "url.h"
+#include "exec_cmd.h"
+#include "run-command.h"
+#include "svndump.h"
+
+static int debug = 0;
+
+static inline void printd(const char* fmt, ...)
+{
+	if(debug) {
+		va_list vargs;
+		va_start(vargs, fmt);
+		fprintf(stderr, "rhsvn debug: ");
+		vfprintf(stderr, fmt, vargs);
+		fprintf(stderr, "\n");
+		va_end(vargs);
+	}
+}
+
+static struct remote* remote;
+static const char* url;
+const char* private_refs = "refs/remote-svn/";		/* + remote->name. */
+const char* remote_ref = "refs/heads/master";
+
+enum cmd_result cmd_capabilities(struct strbuf* line);
+enum cmd_result cmd_import(struct strbuf* line);
+enum cmd_result cmd_list(struct strbuf* line);
+
+enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };
+typedef enum cmd_result (*command)(struct strbuf*);
+
+const command command_list[] = {
+		cmd_capabilities, cmd_import, cmd_list, NULL
+};
+
+enum cmd_result cmd_capabilities(struct strbuf* line)
+{
+	if(strcmp(line->buf, "capabilities"))
+		return NOT_HANDLED;
+
+	printf("import\n");
+	printf("\n");
+	fflush(stdout);
+	return SUCCESS;
+}
+
+enum cmd_result cmd_import(struct strbuf* line)
+{
+	const char* revs = "-r0:HEAD";
+	int code, report_fd;
+	char* back_pipe_env;
+	struct child_process svndump_proc = {
+			.argv = NULL,		/* comes later .. */
+			/* we want a pipe to the child's stdout, but stdin, stderr inherited.
+			 The user can be asked for e.g. a password */
+			.in = 0, .out = -1, .err = 0,
+			.no_stdin = 0, .no_stdout = 0, .no_stderr = 0,
+			.git_cmd = 0,
+			.silent_exec_failure = 0,
+			.stdout_to_stderr = 0,
+			.use_shell = 0,
+			.clean_on_exit = 0,
+			.preexec_cb = NULL,
+			.env = NULL,
+			.dir = NULL
+	};
+
+	if(prefixcmp(line->buf, "import"))
+		return NOT_HANDLED;
+
+	back_pipe_env = getenv("GIT_REPORT_FIFO");
+	if(!back_pipe_env) {
+		die("Cannot get cat-blob-pipe from environment!");
+	}
+
+	/* opening a fifo for usually reading blocks until a writer has opened it too.
+	 * Therefore, we open with RDWR.
+	 */
+	report_fd = open(back_pipe_env, O_RDWR);
+	if(report_fd < 0) {
+		die("Unable to open fast-import back-pipe! %s", strerror(errno));
+	}
+
+	printd("Opened fast-import back-pipe %s for reading.", back_pipe_env);
+
+	svndump_proc.argv = xcalloc(5, sizeof(char*));
+	svndump_proc.argv[0] = "svnrdump";
+	svndump_proc.argv[1] = "dump";
+	svndump_proc.argv[2] = url;
+	svndump_proc.argv[3] = revs;
+
+	code = start_command(&svndump_proc);
+	if(code)
+		die("Unable to start %s, code %d", svndump_proc.argv[0], code);
+
+
+
+	svndump_init_fd(svndump_proc.out, report_fd);
+	svndump_read(url);
+	svndump_deinit();
+	svndump_reset();
+
+	close(svndump_proc.out);
+	close(report_fd);
+
+	code = finish_command(&svndump_proc);
+	if(code)
+		warning("Something went wrong with termination of %s, code %d", svndump_proc.argv[0], code);
+	free(svndump_proc.argv);
+
+	printf("done\n");
+	return SUCCESS;
+
+
+
+}
+
+enum cmd_result cmd_list(struct strbuf* line)
+{
+	if(strcmp(line->buf, "list"))
+		return NOT_HANDLED;
+
+	printf("? HEAD\n");
+	printf("? %s\n", remote_ref);
+	printf("\n");
+	fflush(stdout);
+	return SUCCESS;
+}
+
+enum cmd_result do_command(struct strbuf* line)
+{
+	const command* p = command_list;
+	enum cmd_result ret;
+	printd("command line '%s'", line->buf);
+	while(*p) {
+		ret = (*p)(line);
+		if(ret != NOT_HANDLED)
+			return ret;
+		p++;
+	}
+	warning("Unknown command '%s'\n", line->buf);
+	return ret;
+}
+
+int main(int argc, const char **argv)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int nongit;
+
+	if (getenv("GIT_TRANSPORT_HELPER_DEBUG"))
+		debug = 1;
+
+	git_extract_argv0_path(argv[0]);
+	setup_git_directory_gently(&nongit);
+	if (argc < 2) {
+		fprintf(stderr, "Remote needed\n");
+		return 1;
+	}
+
+	remote = remote_get(argv[1]);
+	if (argc == 3) {
+		end_url_with_slash(&buf, argv[2]);
+	} else if (argc == 2) {
+		end_url_with_slash(&buf, remote->url[0]);
+	} else {
+		warning("Excess arguments!");
+	}
+
+	url = strbuf_detach(&buf, NULL);
+
+	printd("remote-svn starting with url %s", url);
+
+	/* build private ref namespace path for this svn remote. */
+	strbuf_init(&buf, 0);
+	strbuf_addstr(&buf, private_refs);
+	strbuf_addstr(&buf, remote->name);
+	strbuf_addch(&buf, '/');
+	private_refs = strbuf_detach(&buf, NULL);
+
+	while(1) {
+		if (strbuf_getline(&buf, stdin, '\n') == EOF) {
+			if (ferror(stdin))
+				fprintf(stderr, "Error reading command stream\n");
+			else
+				fprintf(stderr, "Unexpected end of command stream\n");
+			return 1;
+		}
+		/* an empty line terminates the command stream */
+		if(buf.len == 0)
+			break;
+
+		do_command(&buf);
+		strbuf_reset(&buf);
+	}
+
+	strbuf_release(&buf);
+	free((void*)url);
+	free((void*)private_refs);
+	return 0;
+}
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 2/4 v2] Integrate remote-svn into svn-fe/Makefile.
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
  2012-06-29  7:54   ` [RFC 1/4 v2] Implement a basic remote helper for svn in C Florian Achleitner
@ 2012-06-29  7:58   ` Florian Achleitner
  2012-06-29  7:59   ` [RFC 3/4 v2] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
  2012-06-29  8:00   ` [RFC 4/4 v2] Add cat-blob report fifo from fast-import to remote-helper Florian Achleitner
  3 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-29  7:58 UTC (permalink / raw)
  To: git
  Cc: David Michael Barr, Sverre Rabbelier, Jeff King, Johannes Sixt,
	jrnieder

Requires some sha.h to be used and the libraries
to be linked, this is currently hardcoded.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
diff: add  -Wdeclaration-after-statement to CFLAGS

 contrib/svn-fe/Makefile |   14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/contrib/svn-fe/Makefile b/contrib/svn-fe/Makefile
index 360d8da..49b91e6 100644
--- a/contrib/svn-fe/Makefile
+++ b/contrib/svn-fe/Makefile
@@ -1,14 +1,14 @@
-all:: svn-fe$X
+all:: svn-fe$X remote-svn$X
 
 CC = gcc
 RM = rm -f
 MV = mv
 
-CFLAGS = -g -O2 -Wall
+CFLAGS = -g -O2 -Wall -DSHA1_HEADER='<openssl/sha.h>' -Wdeclaration-after-statement
 LDFLAGS =
 ALL_CFLAGS = $(CFLAGS)
 ALL_LDFLAGS = $(LDFLAGS)
-EXTLIBS =
+EXTLIBS = -lssl -lcrypto -lpthread ../../xdiff/lib.a
 
 GIT_LIB = ../../libgit.a
 VCSSVN_LIB = ../../vcs-svn/lib.a
@@ -37,8 +37,12 @@ svn-fe$X: svn-fe.o $(VCSSVN_LIB) $(GIT_LIB)
 	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ svn-fe.o \
 		$(ALL_LDFLAGS) $(LIBS)
 
-svn-fe.o: svn-fe.c ../../vcs-svn/svndump.h
-	$(QUIET_CC)$(CC) -I../../vcs-svn -o $*.o -c $(ALL_CFLAGS) $<
+remote-svn$X: remote-svn.o $(VCSSVN_LIB) $(GIT_LIB)
+	$(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ remote-svn.o \
+		$(ALL_LDFLAGS) $(LIBS)
+		
+%.o: %.c ../../vcs-svn/svndump.h
+	$(QUIET_CC)$(CC) -I../../vcs-svn -I../../ -o $*.o -c $(ALL_CFLAGS) $<
 
 svn-fe.html: svn-fe.txt
 	$(QUIET_SUBDIR0)../../Documentation $(QUIET_SUBDIR1) \
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 3/4 v2] Add svndump_init_fd to allow reading dumps from arbitrary FDs.
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
  2012-06-29  7:54   ` [RFC 1/4 v2] Implement a basic remote helper for svn in C Florian Achleitner
  2012-06-29  7:58   ` [RFC 2/4 v2] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
@ 2012-06-29  7:59   ` Florian Achleitner
  2012-06-29  8:00   ` [RFC 4/4 v2] Add cat-blob report fifo from fast-import to remote-helper Florian Achleitner
  3 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-06-29  7:59 UTC (permalink / raw)
  To: git
  Cc: David Michael Barr, Sverre Rabbelier, Jeff King, Johannes Sixt,
	jrnieder

The existing function only allowed reading from a filename or
from stdin. Allow passing of a FD and an additional FD for
the back report pipe. This allows us to retrieve the name of
the pipe in the caller.

Fixes the filename could be NULL bug.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 vcs-svn/svndump.c |   22 ++++++++++++++++++----
 vcs-svn/svndump.h |    1 +
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 0899790..eb76bf8 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -465,11 +465,9 @@ void svndump_read(const char *url)
 		end_revision();
 }
 
-int svndump_init(const char *filename)
+static void init(int report_fd)
 {
-	if (buffer_init(&input, filename))
-		return error("cannot open %s: %s", filename, strerror(errno));
-	fast_export_init(REPORT_FILENO);
+	fast_export_init(report_fd);
 	strbuf_init(&dump_ctx.uuid, 4096);
 	strbuf_init(&dump_ctx.url, 4096);
 	strbuf_init(&rev_ctx.log, 4096);
@@ -479,6 +477,22 @@ int svndump_init(const char *filename)
 	reset_dump_ctx(NULL);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);
+	return;
+}
+
+int svndump_init(const char *filename)
+{
+	if (buffer_init(&input, filename))
+		return error("cannot open %s: %s", filename ? filename : "NULL", strerror(errno));
+	init(REPORT_FILENO);
+	return 0;
+}
+
+int svndump_init_fd(int in_fd, int back_fd)
+{
+	if(buffer_fdinit(&input, in_fd))
+		return error("cannot open fd %d: %s", in_fd, strerror(errno));
+	init(back_fd);
 	return 0;
 }
 
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
index df9ceb0..acb5b47 100644
--- a/vcs-svn/svndump.h
+++ b/vcs-svn/svndump.h
@@ -2,6 +2,7 @@
 #define SVNDUMP_H_
 
 int svndump_init(const char *filename);
+int svndump_init_fd(int in_fd, int back_fd);
 void svndump_read(const char *url);
 void svndump_deinit(void);
 void svndump_reset(void);
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* [RFC 4/4 v2] Add cat-blob report fifo from fast-import to remote-helper.
  2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
                     ` (2 preceding siblings ...)
  2012-06-29  7:59   ` [RFC 3/4 v2] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
@ 2012-06-29  8:00   ` Florian Achleitner
  2012-07-21 12:45     ` [RFC 4/4 v3] " Florian Achleitner
  3 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-06-29  8:00 UTC (permalink / raw)
  To: git
  Cc: David Michael Barr, Sverre Rabbelier, Jeff King, Johannes Sixt,
	jrnieder

For some fast-import commands (e.g. cat-blob) an answer-channel
is required. For this purpose a fifo (aka named pipe) (mkfifo)
is created (.git/fast-import-report-fifo) by the transport-helper
when fetch via import is requested. The remote-helper and
fast-import open the ends of the pipe.

The filename of the fifo is passed to the remote-helper via
it's environment, helpers that don't use fast-import can
simply ignore it.
Add a new command line option --cat-blob-pipe to fast-import,
for this purpose.

Use argv_arrays in get_helper and get_importer.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
 fast-import.c      |   15 ++++++++++++
 transport-helper.c |   64 ++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index eed97c8..44cb124 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -3180,6 +3180,16 @@ static void option_cat_blob_fd(const char *fd)
 	cat_blob_fd = (int) n;
 }
 
+static void option_cat_blob_pipe(const char *name)
+{
+	int report_fd = open(name, O_WRONLY);
+	warning("Opened pipe %s.", name);
+	if(report_fd < 0) {
+		die("Unable to open fast-import back-pipe! %s", strerror(errno));
+	}
+	cat_blob_fd = report_fd;
+}
+
 static void option_export_pack_edges(const char *edges)
 {
 	if (pack_edges)
@@ -3337,6 +3347,11 @@ static void parse_argv(void)
 			continue;
 		}
 
+		if(!prefixcmp(a + 2, "cat-blob-pipe=")) {
+			option_cat_blob_pipe(a + 2 + strlen("cat-blob-pipe="));
+			continue;
+		}
+
 		die("unknown option %s", a);
 	}
 	if (i != global_argc)
diff --git a/transport-helper.c b/transport-helper.c
index 61c928f..616db91 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -10,6 +10,7 @@
 #include "string-list.h"
 #include "thread-utils.h"
 #include "sigchain.h"
+#include "argv-array.h"
 
 static int debug;
 
@@ -17,6 +18,7 @@ struct helper_data {
 	const char *name;
 	struct child_process *helper;
 	FILE *out;
+	char *report_fifo;
 	unsigned fetch : 1,
 		import : 1,
 		export : 1,
@@ -101,6 +103,7 @@ static void do_take_over(struct transport *transport)
 static struct child_process *get_helper(struct transport *transport)
 {
 	struct helper_data *data = transport->data;
+	struct argv_array argv = ARGV_ARRAY_INIT;
 	struct strbuf buf = STRBUF_INIT;
 	struct child_process *helper;
 	const char **refspecs = NULL;
@@ -111,6 +114,7 @@ static struct child_process *get_helper(struct transport *transport)
 	char git_dir_buf[sizeof(GIT_DIR_ENVIRONMENT) + PATH_MAX + 1];
 	const char *helper_env[] = {
 		git_dir_buf,
+		NULL,	/* placeholder */
 		NULL
 	};
 
@@ -122,17 +126,23 @@ static struct child_process *get_helper(struct transport *transport)
 	helper->in = -1;
 	helper->out = -1;
 	helper->err = 0;
-	helper->argv = xcalloc(4, sizeof(*helper->argv));
-	strbuf_addf(&buf, "git-remote-%s", data->name);
-	helper->argv[0] = strbuf_detach(&buf, NULL);
-	helper->argv[1] = transport->remote->name;
-	helper->argv[2] = remove_ext_force(transport->url);
+	argv_array_pushf(&argv, "git-remote-%s", data->name);
+	argv_array_push(&argv, transport->remote->name);
+	argv_array_push(&argv, remove_ext_force(transport->url));
+	helper->argv = argv.argv;
 	helper->git_cmd = 0;
 	helper->silent_exec_failure = 1;
 
 	snprintf(git_dir_buf, sizeof(git_dir_buf), "%s=%s", GIT_DIR_ENVIRONMENT, get_git_dir());
 	helper->env = helper_env;
 
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/fast-import-report-fifo", get_git_dir());
+	data->report_fifo = strbuf_detach(&buf, NULL);
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "GIT_REPORT_FIFO=%s", data->report_fifo);
+	helper_env[1] = strbuf_detach(&buf, NULL);
+
 	code = start_command(helper);
 	if (code < 0 && errno == ENOENT)
 		die("Unable to find remote helper for '%s'", data->name);
@@ -141,6 +151,8 @@ static struct child_process *get_helper(struct transport *transport)
 
 	data->helper = helper;
 	data->no_disconnect_req = 0;
+	free((void*) helper_env[1]);
+	argv_array_clear(&argv);
 
 	/*
 	 * Open the output as FILE* so strbuf_getline() can be used.
@@ -237,13 +249,13 @@ static int disconnect_helper(struct transport *transport)
 			xwrite(data->helper->in, "\n", 1);
 			sigchain_pop(SIGPIPE);
 		}
+
 		close(data->helper->in);
 		close(data->helper->out);
 		fclose(data->out);
 		res = finish_command(data->helper);
-		free((char *)data->helper->argv[0]);
-		free(data->helper->argv);
 		free(data->helper);
+		free(data->report_fifo);
 		data->helper = NULL;
 	}
 	return res;
@@ -373,16 +385,18 @@ static int fetch_with_fetch(struct transport *transport,
 	return 0;
 }
 
-static int get_importer(struct transport *transport, struct child_process *fastimport)
+static int get_importer(struct transport *transport, struct child_process *fastimport, struct argv_array *argv)
 {
 	struct child_process *helper = get_helper(transport);
+	struct helper_data *data = transport->data;
 	memset(fastimport, 0, sizeof(*fastimport));
 	fastimport->in = helper->out;
-	fastimport->argv = xcalloc(5, sizeof(*fastimport->argv));
-	fastimport->argv[0] = "fast-import";
-	fastimport->argv[1] = "--quiet";
-
+	argv_array_push(argv, "fast-import");
+	argv_array_push(argv, "--quiet");
+	argv_array_pushf(argv, "--cat-blob-pipe=%s", data->report_fifo);
+	fastimport->argv = argv->argv;
 	fastimport->git_cmd = 1;
+
 	return start_command(fastimport);
 }
 
@@ -421,10 +435,30 @@ static int fetch_with_import(struct transport *transport,
 	int i;
 	struct ref *posn;
 	struct strbuf buf = STRBUF_INIT;
+	struct argv_array importer_argv = ARGV_ARRAY_INIT;
+	struct stat fifostat;
+
+	/* create a fifo for back-reporting of fast-import to the remote helper,
+	 * if it doesn't exist. */
+	if(!stat(data->report_fifo, &fifostat)) {
+		if(S_ISFIFO(fifostat.st_mode)) {		/* exists and is fifo, unlink and recreate, to be sure that permissions are ok.. */
+			if (debug)
+				fprintf(stderr, "Debug: Remote helper: Unlinked existing fifo.\n");
+			if(unlink(data->report_fifo))
+				die_errno("Couldn't unlink fifo %s", data->report_fifo);
+		}
+		else
+			die("Fifo %s used by some other file.", data->report_fifo);
+	}
+	if(mkfifo(data->report_fifo, 0660))
+		die_errno("Couldn't create fifo %s", data->report_fifo);
+	if (debug)
+		fprintf(stderr, "Debug: Remote helper: Mkfifo %s\n", data->report_fifo);
+
 
 	get_helper(transport);
 
-	if (get_importer(transport, &fastimport))
+	if (get_importer(transport, &fastimport, &importer_argv))
 		die("Couldn't run fast-import");
 
 	for (i = 0; i < nr_heads; i++) {
@@ -441,8 +475,8 @@ static int fetch_with_import(struct transport *transport,
 
 	if (finish_command(&fastimport))
 		die("Error while running fast-import");
-	free(fastimport.argv);
-	fastimport.argv = NULL;
+
+	argv_array_clear(&importer_argv);
 
 	for (i = 0; i < nr_heads; i++) {
 		char *private;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-06-29  7:54   ` [RFC 1/4 v2] Implement a basic remote helper for svn in C Florian Achleitner
@ 2012-07-02 11:07     ` Jonathan Nieder
  2012-07-06  0:30       ` Jonathan Nieder
                         ` (3 more replies)
  0 siblings, 4 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-02 11:07 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi,

Florian Achleitner wrote:

> Experimental implementation.

Ok, so this adds a new program named "remote-svn".  How do I build it?
What does it do?  Will it make my life better?

[...]
> diff: Use fifo instead of pipe: Retrieve the name of the pipe from env and open it
> for svndump.

I'd prefer to avoid this if possible, since it means having to decide
where the pipe goes on the filesystem.  Can you summarize the
discussion in the commit message so future readers understand why
we're doing it?

[...]
> --- /dev/null
> +++ b/contrib/svn-fe/remote-svn.c
> @@ -0,0 +1,207 @@
> +
> +#include <stdlib.h>
> +#include <string.h>
> +#include <stdio.h>

git-compat-util.h (or some header that includes it) must be the first
header included so the appropriate feature test macros can be defined.
See Documentation/CodingGuidelines for more on that.

> +#include "cache.h"
> +#include "remote.h"
> +#include "strbuf.h"
> +#include "url.h"
> +#include "exec_cmd.h"
> +#include "run-command.h"
> +#include "svndump.h"
> +
> +static int debug = 0;

Small nit: please drop the redundant "= 0" here.  Or:

> +
> +static inline void printd(const char* fmt, ...)
> +{
> +	if(debug) {
> +		va_list vargs;
> +		va_start(vargs, fmt);
> +		fprintf(stderr, "rhsvn debug: ");
> +		vfprintf(stderr, fmt, vargs);
> +		fprintf(stderr, "\n");
> +		va_end(vargs);
> +	}
> +}

Why not use trace_printf and avoid the complication?

[...]

> +
> +static struct remote* remote;
> +static const char* url;
> +const char* private_refs = "refs/remote-svn/";		/* + remote->name. */
> +const char* remote_ref = "refs/heads/master";

Style: '*' attaches to the variable name, to avoid making declarations
like

	char *p, c;

confusing.

> +
> +enum cmd_result cmd_capabilities(struct strbuf* line);
> +enum cmd_result cmd_import(struct strbuf* line);
> +enum cmd_result cmd_list(struct strbuf* line);

What's a cmd_result?  '*' sticks to variable name.

> +
> +enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };

Oh, that's what a cmd_result is. :)  Why not define the type before
using it to avoid keeping the reader in suspense?

What does each result represent?  If this is a convention like

 1: handled
 0: not handled
 -1: error, callee takes care of printing the error message

then please document it in a comment near the caller so the reader can
understand what is happening without too much confusion.  Given such a
comment, does the enum add clarity?

> +typedef enum cmd_result (*command)(struct strbuf*);

When I first read this, I wonder what is being commanded.  Are these
commands passed on the remote helper's standard input, commands passed
on its output, or commands run at some point in the process?  What is
the effect and return value of associated function?  Does the function
always return some success/failure value, or does it sometimes exit?

Maybe a more specific type name would be clearer?

[...]
> +
> +const command command_list[] = {
> +		cmd_capabilities, cmd_import, cmd_list, NULL
> +};

First association is to functions like cmd_fetch() which implement git
subcommands.  So I thought these were going to implement subcommands
like "git remote-svn capabilities", "git remote-svn import" and would
use the same cmd_foo(argc, argv, prefix) calling convention that git
subcommands do.  Maybe a different naming convention could avoid
confusion.

[...]
> +enum cmd_result cmd_capabilities(struct strbuf* line)
> +{
> +	if(strcmp(line->buf, "capabilities"))
> +		return NOT_HANDLED;

Style: missing SP after keyword.

> +
> +	printf("import\n");
> +	printf("\n");
> +	fflush(stdout);
> +	return SUCCESS;
> +}

Why the multiple printf?  Is the flush needed?

[...]
> +
> +enum cmd_result cmd_import(struct strbuf* line)
> +{
> +	const char* revs = "-r0:HEAD";

Style: * goes with ... (I won't point out the rest of these.

> +	int code, report_fd;
> +	char* back_pipe_env;
> +	struct child_process svndump_proc = {
> +			.argv = NULL,		/* comes later .. */

I don't understand this comment.

> +			/* we want a pipe to the child's stdout, but stdin, stderr inherited.
> +			 The user can be asked for e.g. a password */
> +			.in = 0, .out = -1, .err = 0,

Style: comments in git are spelled like this:

			/*
			 * Here I put a sentence or two explaining some
			 * relevant design decision or fact about the world
			 * that will provide useful context for
			 * understanding the following code.
			 */
> +			.no_stdin = 0, .no_stdout = 0, .no_stderr = 0,

I couldn't parse the above comment, so I'm skipping it for now.

[...]
> +			.git_cmd = 0,
> +			.silent_exec_failure = 0,
> +			.stdout_to_stderr = 0,
> +			.use_shell = 0,
> +			.clean_on_exit = 0,
> +			.preexec_cb = NULL,
> +			.env = NULL,
> +			.dir = NULL

Style: C99-style initializers are (unfortunately) not supported in
some compilers we want to support.

No need to initialize all fields --- any trailing unlisted fields
are automatically initialized to zero.

> +	};
> +
> +	if(prefixcmp(line->buf, "import"))

Style: missing SP after keyword (I won't point out the rest of these).

> +		return NOT_HANDLED;
> +
> +	back_pipe_env = getenv("GIT_REPORT_FIFO");
> +	if(!back_pipe_env) {
> +		die("Cannot get cat-blob-pipe from environment!");
> +	}

Does this mean that expected usage is something like

	GIT_REPORT_FIFO=/tmp/foo/bar git clone svn::foo/bar/baz

?  And if I don't do that, I get

	fatal: Cannot get cat-blob-pipe from environment!

and am somehow supposed to understand what to do?

> +
> +	/* opening a fifo for usually reading blocks until a writer has opened it too.
> +	 * Therefore, we open with RDWR.
> +	 */
> +	report_fd = open(back_pipe_env, O_RDWR);
> +	if(report_fd < 0) {
> +		die("Unable to open fast-import back-pipe! %s", strerror(errno));
> +	}

Is this necessary?  Why shouldn't we fork the writer first and wait
for it here?

> +
> +	printd("Opened fast-import back-pipe %s for reading.", back_pipe_env);
> +
> +	svndump_proc.argv = xcalloc(5, sizeof(char*));
> +	svndump_proc.argv[0] = "svnrdump";
> +	svndump_proc.argv[1] = "dump";
> +	svndump_proc.argv[2] = url;
> +	svndump_proc.argv[3] = revs;

Style: could simplify by using struct argv_array.

> +
> +	code = start_command(&svndump_proc);
> +	if(code)
> +		die("Unable to start %s, code %d", svndump_proc.argv[0], code);

start_command() is supposed to have printed a message already when it
fails, unless errno == ENOENT and silent_exec_failure was set.

> +
> +
> +

Style: looks like some stray carriage returns snuck in.

> +	svndump_init_fd(svndump_proc.out, report_fd);
> +	svndump_read(url);
> +	svndump_deinit();
> +	svndump_reset();

Not your fault: this API looks a little overcomplicated.

> +
> +	close(svndump_proc.out);

Important?  Wouldn't finish_command do this?

> +	close(report_fd);

What is the purpose of this step?

> +
> +	code = finish_command(&svndump_proc);
> +	if(code)
> +		warning("Something went wrong with termination of %s, code %d", svndump_proc.argv[0], code);

finish_command() is supposed to print a message when it fails.

> +	free(svndump_proc.argv);
> +
> +	printf("done\n");
> +	return SUCCESS;

Success even if it failed?

> +
> +
> +

Blank lines seem to have snuck in.

> +}
> +
> +enum cmd_result cmd_list(struct strbuf* line)
> +{
> +	if(strcmp(line->buf, "list"))
> +		return NOT_HANDLED;
> +
> +	printf("? HEAD\n");
> +	printf("? %s\n", remote_ref);

Why is this variable?

> +	printf("\n");
> +	fflush(stdout);

Why the flush?

> +	return SUCCESS;
> +}
> +
> +enum cmd_result do_command(struct strbuf* line)
> +{
> +	const command* p = command_list;
> +	enum cmd_result ret;
> +	printd("command line '%s'", line->buf);
> +	while(*p) {
> +		ret = (*p)(line);
> +		if(ret != NOT_HANDLED)
> +			return ret;
> +		p++;
> +	}

If possible, matching commands by name (like git.c does) would make
the behavior easier to predict.

[...]
> +	if (argc < 2) {
> +		fprintf(stderr, "Remote needed\n");
> +		return 1;
> +	}

usage() can be used to write a clearer error message.

[...]
> +
> +	remote = remote_get(argv[1]);
> +	if (argc == 3) {
> +		end_url_with_slash(&buf, argv[2]);
> +	} else if (argc == 2) {
> +		end_url_with_slash(&buf, remote->url[0]);
> +	} else {
> +		warning("Excess arguments!");
> +	}

Style: no need for these braces.  usage() could be used to make it
clearer to the user what she can do next.

[...]
> +	/* build private ref namespace path for this svn remote. */
> +	strbuf_init(&buf, 0);
> +	strbuf_addstr(&buf, private_refs);
> +	strbuf_addstr(&buf, remote->name);
> +	strbuf_addch(&buf, '/');
> +	private_refs = strbuf_detach(&buf, NULL);

What is a private ref namespace path?  An example would make the comment
clearer.

> +
> +	while(1) {
> +		if (strbuf_getline(&buf, stdin, '\n') == EOF) {
> +			if (ferror(stdin))
> +				fprintf(stderr, "Error reading command stream\n");

errno will be meaningful; the message can be made clearer by using it.

Maybe this could use error() or die().

[...]
> +	free((void*)url);
> +	free((void*)private_refs);

Won't this crash?

It would also be nice to add a test case to the t/ directory to make others
changing this code do not accidentally break your new functionality.

Hope that helps,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-02 11:07     ` Jonathan Nieder
@ 2012-07-06  0:30       ` Jonathan Nieder
  2012-07-06 10:39         ` Florian Achleitner
  2012-07-26  8:31       ` Florian Achleitner
                         ` (2 subsequent siblings)
  3 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-06  0:30 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Jonathan Nieder wrote:
> Florian Achleitner wrote:

>> Experimental implementation.
>
> Ok, so this adds a new program named "remote-svn".  How do I build it?
> What does it do?  Will it make my life better?
[...]

I forgot to say: thanks for working on this!  I hope my comments are
not demoralizing.  They are meant in the opposite vein --- if I
expected you to be a one-time contributor then I would just take what
is useful from the patch and let it be, but I would be happy to see
more changes from you in the future so I gave some hints to explain
how.

The next step is to work with the list to figure out what a second
version of the patch should do, and then to send that with something
like "RFC/PATCH v2" in the subject line to clarify that it supersedes
this one.

If you have any questions about the review or the codebase in general,
please don't hesitate to ask.  Especially, if you get stuck on
something and documentation is unhelpful, please do complain. :)

Hope that helps,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-06  0:30       ` Jonathan Nieder
@ 2012-07-06 10:39         ` Florian Achleitner
  0 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-07-06 10:39 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

Hi Jonathan!

Thanks for your review! I will come up with a new version later after finishing 
some other feature.. 
Now I've done all exams (last 2 yesterday :) ) and submissions and commit 
myself to git and gsoc much more.

On Thursday 05 July 2012 19:30:24 Jonathan Nieder wrote:
> Jonathan Nieder wrote:
> > Florian Achleitner wrote:
> >> Experimental implementation.
> > 
> > Ok, so this adds a new program named "remote-svn".  How do I build it?
> > What does it do?  Will it make my life better?
> 
> [...]

I need to emphasize that it is in no way something complete and truely useful.
It's a starting point for the rest of my project. It will change.
Your review will help to make it a good basis!

> 
> I forgot to say: thanks for working on this!  I hope my comments are
> not demoralizing.  They are meant in the opposite vein --- if I
> expected you to be a one-time contributor then I would just take what
> is useful from the patch and let it be, but I would be happy to see
> more changes from you in the future so I gave some hints to explain
> how.
> 
> The next step is to work with the list to figure out what a second
> version of the patch should do, and then to send that with something
> like "RFC/PATCH v2" in the subject line to clarify that it supersedes
> this one.

I will come back to that soon..

> 
> If you have any questions about the review or the codebase in general,
> please don't hesitate to ask.  Especially, if you get stuck on
> something and documentation is unhelpful, please do complain. :)
> 
> Hope that helps,
> Jonathan

Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-06-29  8:00   ` [RFC 4/4 v2] Add cat-blob report fifo from fast-import to remote-helper Florian Achleitner
@ 2012-07-21 12:45     ` Florian Achleitner
  2012-07-21 14:48       ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-21 12:45 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt, jrnieder

For some fast-import commands (e.g. cat-blob) an answer-channel
is required. For this purpose a fifo (aka named pipe) (mkfifo)
is created (.git/fast-import-report-fifo) by the transport-helper
when fetch via import is requested. The remote-helper and
fast-import open the ends of the pipe.

The filename of the fifo is passed to the remote-helper via
it's environment, helpers that don't use fast-import can
simply ignore it.
Add a new command line option --cat-blob-pipe to fast-import,
for this purpose.

Use argv_arrays in get_helper and get_importer.

Opening the pipe with O_RDWR prevents blocking open calls on both ends.

Signed-off-by: Florian Achleitner <florian.achleitner.2.6.31@gmail.com>
---
diff: Opening the pipe with O_RDWR prevents blocking open calls on both ends.
 fast-import.c      |   15 ++++++++++++
 transport-helper.c |   64 ++++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 64 insertions(+), 15 deletions(-)

diff --git a/fast-import.c b/fast-import.c
index eed97c8..65a9341 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -3180,6 +3180,16 @@ static void option_cat_blob_fd(const char *fd)
 	cat_blob_fd = (int) n;
 }
 
+static void option_cat_blob_pipe(const char *name)
+{
+	int report_fd = open(name, O_RDWR);
+	warning("Opened pipe %s.", name);
+	if(report_fd < 0) {
+		die("Unable to open fast-import back-pipe! %s", strerror(errno));
+	}
+	cat_blob_fd = report_fd;
+}
+
 static void option_export_pack_edges(const char *edges)
 {
 	if (pack_edges)
@@ -3337,6 +3347,11 @@ static void parse_argv(void)
 			continue;
 		}
 
+		if(!prefixcmp(a + 2, "cat-blob-pipe=")) {
+			option_cat_blob_pipe(a + 2 + strlen("cat-blob-pipe="));
+			continue;
+		}
+
 		die("unknown option %s", a);
 	}
 	if (i != global_argc)
diff --git a/transport-helper.c b/transport-helper.c
index 61c928f..616db91 100644
--- a/transport-helper.c
+++ b/transport-helper.c
@@ -10,6 +10,7 @@
 #include "string-list.h"
 #include "thread-utils.h"
 #include "sigchain.h"
+#include "argv-array.h"
 
 static int debug;
 
@@ -17,6 +18,7 @@ struct helper_data {
 	const char *name;
 	struct child_process *helper;
 	FILE *out;
+	char *report_fifo;
 	unsigned fetch : 1,
 		import : 1,
 		export : 1,
@@ -101,6 +103,7 @@ static void do_take_over(struct transport *transport)
 static struct child_process *get_helper(struct transport *transport)
 {
 	struct helper_data *data = transport->data;
+	struct argv_array argv = ARGV_ARRAY_INIT;
 	struct strbuf buf = STRBUF_INIT;
 	struct child_process *helper;
 	const char **refspecs = NULL;
@@ -111,6 +114,7 @@ static struct child_process *get_helper(struct transport *transport)
 	char git_dir_buf[sizeof(GIT_DIR_ENVIRONMENT) + PATH_MAX + 1];
 	const char *helper_env[] = {
 		git_dir_buf,
+		NULL,	/* placeholder */
 		NULL
 	};
 
@@ -122,17 +126,23 @@ static struct child_process *get_helper(struct transport *transport)
 	helper->in = -1;
 	helper->out = -1;
 	helper->err = 0;
-	helper->argv = xcalloc(4, sizeof(*helper->argv));
-	strbuf_addf(&buf, "git-remote-%s", data->name);
-	helper->argv[0] = strbuf_detach(&buf, NULL);
-	helper->argv[1] = transport->remote->name;
-	helper->argv[2] = remove_ext_force(transport->url);
+	argv_array_pushf(&argv, "git-remote-%s", data->name);
+	argv_array_push(&argv, transport->remote->name);
+	argv_array_push(&argv, remove_ext_force(transport->url));
+	helper->argv = argv.argv;
 	helper->git_cmd = 0;
 	helper->silent_exec_failure = 1;
 
 	snprintf(git_dir_buf, sizeof(git_dir_buf), "%s=%s", GIT_DIR_ENVIRONMENT, get_git_dir());
 	helper->env = helper_env;
 
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "%s/fast-import-report-fifo", get_git_dir());
+	data->report_fifo = strbuf_detach(&buf, NULL);
+	strbuf_init(&buf, 0);
+	strbuf_addf(&buf, "GIT_REPORT_FIFO=%s", data->report_fifo);
+	helper_env[1] = strbuf_detach(&buf, NULL);
+
 	code = start_command(helper);
 	if (code < 0 && errno == ENOENT)
 		die("Unable to find remote helper for '%s'", data->name);
@@ -141,6 +151,8 @@ static struct child_process *get_helper(struct transport *transport)
 
 	data->helper = helper;
 	data->no_disconnect_req = 0;
+	free((void*) helper_env[1]);
+	argv_array_clear(&argv);
 
 	/*
 	 * Open the output as FILE* so strbuf_getline() can be used.
@@ -237,13 +249,13 @@ static int disconnect_helper(struct transport *transport)
 			xwrite(data->helper->in, "\n", 1);
 			sigchain_pop(SIGPIPE);
 		}
+
 		close(data->helper->in);
 		close(data->helper->out);
 		fclose(data->out);
 		res = finish_command(data->helper);
-		free((char *)data->helper->argv[0]);
-		free(data->helper->argv);
 		free(data->helper);
+		free(data->report_fifo);
 		data->helper = NULL;
 	}
 	return res;
@@ -373,16 +385,18 @@ static int fetch_with_fetch(struct transport *transport,
 	return 0;
 }
 
-static int get_importer(struct transport *transport, struct child_process *fastimport)
+static int get_importer(struct transport *transport, struct child_process *fastimport, struct argv_array *argv)
 {
 	struct child_process *helper = get_helper(transport);
+	struct helper_data *data = transport->data;
 	memset(fastimport, 0, sizeof(*fastimport));
 	fastimport->in = helper->out;
-	fastimport->argv = xcalloc(5, sizeof(*fastimport->argv));
-	fastimport->argv[0] = "fast-import";
-	fastimport->argv[1] = "--quiet";
-
+	argv_array_push(argv, "fast-import");
+	argv_array_push(argv, "--quiet");
+	argv_array_pushf(argv, "--cat-blob-pipe=%s", data->report_fifo);
+	fastimport->argv = argv->argv;
 	fastimport->git_cmd = 1;
+
 	return start_command(fastimport);
 }
 
@@ -421,10 +435,30 @@ static int fetch_with_import(struct transport *transport,
 	int i;
 	struct ref *posn;
 	struct strbuf buf = STRBUF_INIT;
+	struct argv_array importer_argv = ARGV_ARRAY_INIT;
+	struct stat fifostat;
+
+	/* create a fifo for back-reporting of fast-import to the remote helper,
+	 * if it doesn't exist. */
+	if(!stat(data->report_fifo, &fifostat)) {
+		if(S_ISFIFO(fifostat.st_mode)) {		/* exists and is fifo, unlink and recreate, to be sure that permissions are ok.. */
+			if (debug)
+				fprintf(stderr, "Debug: Remote helper: Unlinked existing fifo.\n");
+			if(unlink(data->report_fifo))
+				die_errno("Couldn't unlink fifo %s", data->report_fifo);
+		}
+		else
+			die("Fifo %s used by some other file.", data->report_fifo);
+	}
+	if(mkfifo(data->report_fifo, 0660))
+		die_errno("Couldn't create fifo %s", data->report_fifo);
+	if (debug)
+		fprintf(stderr, "Debug: Remote helper: Mkfifo %s\n", data->report_fifo);
+
 
 	get_helper(transport);
 
-	if (get_importer(transport, &fastimport))
+	if (get_importer(transport, &fastimport, &importer_argv))
 		die("Couldn't run fast-import");
 
 	for (i = 0; i < nr_heads; i++) {
@@ -441,8 +475,8 @@ static int fetch_with_import(struct transport *transport,
 
 	if (finish_command(&fastimport))
 		die("Error while running fast-import");
-	free(fastimport.argv);
-	fastimport.argv = NULL;
+
+	argv_array_clear(&importer_argv);
 
 	for (i = 0; i < nr_heads; i++) {
 		char *private;
-- 
1.7.9.5

^ permalink raw reply related	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-21 12:45     ` [RFC 4/4 v3] " Florian Achleitner
@ 2012-07-21 14:48       ` Jonathan Nieder
  2012-07-21 15:24         ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-21 14:48 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi,

Florian Achleitner wrote:

> [Subject: Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to
> remote-helper.]

Is this on top of patches 1, 2, and 3 from v2 of the series?

*checks* Looks like it doesn't overlap with any of the files from
those patches, so I don't have to understand them first.  Phew.  My
suggestion for next time would be to submit patches that can be
understood on their own independently instead of as part of a series.

> For some fast-import commands (e.g. cat-blob) an answer-channel
> is required. For this purpose a fifo (aka named pipe) (mkfifo)
> is created (.git/fast-import-report-fifo) by the transport-helper
> when fetch via import is requested. The remote-helper and
> fast-import open the ends of the pipe.

Motivation described!  But it's odd --- it seems like this is
doing at least two things:

 1) adding to the fast-import interface
 2) using the new fast-import feature in some in-tree callers

Those really want to be separate patches.  That way, the fast-import
change can be studied by other implementers of the fast-import
interface (hg-fast-import, bzr-fast-import).  As a side-benefit, it
gives an easy check that any changes to fast-import were at least
roughly backward-compatible ("did all the in-tree users still work?").

I'll focus on the new fast-import change below, since it's the most
important part.

> The filename of the fifo is passed to the remote-helper via
> it's environment, helpers that don't use fast-import can
> simply ignore it.

My first impression is that I'd rather there be a command to request
the filename instead of using the environment for the first time,
since when debugging people would already be monitoring the command
stream and responses.

> Add a new command line option --cat-blob-pipe to fast-import,
> for this purpose.

This is completely redundant next to --cat-blob-fd, right?  That's
really problematic --- adding new interfaces means new code and
gratuitous incompatibility with all existing fast-import backends,
with no benefit in return.

I imagine that there was some portability reason you were thinking
about, but the above doesn't mention it at all.  Future readers
scratching their heads at the changelog can't read your mind!  Please
please please explain what you're trying to do.

Since if we're lucky fixing that could mean not having to change
fast-import at all, I'm stopping here.

Another quick thought: any finished patch adding a new fast-import
feature should also include

 - documentation in the manpage (Documentation/fast-import.txt)
 - testcases to make sure your careful work does not get broken
   by later changes (somewhere in t/*fast-import*.sh)

But don't worry too much about that now --- sending incomplete patches
for review before then to make sure the direction is sane is a very
good idea, as long as they are marked as such (as you've already done
by marking this as RFC).

To sum up: I think we should just stick to pipes --- why all this fifo
complication?

Hope that helps,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-21 14:48       ` Jonathan Nieder
@ 2012-07-21 15:24         ` Florian Achleitner
  2012-07-21 15:44           ` Jonathan Nieder
  2012-07-21 15:58           ` Jonathan Nieder
  0 siblings, 2 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-07-21 15:24 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Saturday 21 July 2012 09:48:34 Jonathan Nieder wrote:
> To sum up: I think we should just stick to pipes --- why all this fifo
> complication?

People didn't like pipe variant (prexec_cb not being compatible to windows' 
process creation model), so I learned about fifos and implemented a (basic) fifo 
variant. *shrug*

-- 
Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-21 15:24         ` Florian Achleitner
@ 2012-07-21 15:44           ` Jonathan Nieder
  2012-07-22 21:03             ` Florian Achleitner
  2012-07-21 15:58           ` Jonathan Nieder
  1 sibling, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-21 15:44 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:
> On Saturday 21 July 2012 09:48:34 Jonathan Nieder wrote:

>> To sum up: I think we should just stick to pipes --- why all this fifo
>> complication?
>
> People didn't like pipe variant (prexec_cb not being compatible to windows' 
> process creation model), so I learned about fifos and implemented a (basic) fifo 
> variant. *shrug*

Ok, can you elaborate on that?  What does it mean that preexec_cb is
not compatible to windows' process creation model?  Don't the people
of the future working on this code deserve to know about that, too, so
they don't break it?

Come on --- I'm not asking these questions just to make your life
difficult.  Please make it easy to understand your code changes and to
keep them maintained.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-21 15:24         ` Florian Achleitner
  2012-07-21 15:44           ` Jonathan Nieder
@ 2012-07-21 15:58           ` Jonathan Nieder
  1 sibling, 0 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-21 15:58 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt, msysgit

(adding msysgit list to cc for a Windows question)
Hi,

(regarding bidirectional communication between git and fast-import for
remote helpers)
Florian Achleitner wrote:

> People didn't like pipe variant (prexec_cb not being compatible to windows' 
> process creation model), so I learned about fifos and implemented a (basic) fifo 
> variant. *shrug*

Is this meant as a summary of [1]?  Hannes wrote:

| The second problem is more severe and is at the lowest level of our
| infrastructure: We set up our child processes so that they know only about
| file descriptors other than 0,1,2 to the child process. Even if the first
| problem were solved, the child process does not receive sufficient
| information to know that there are open file descriptors other than 0,1,2.
| There is a facility to pass along this information from the parent to the
| child, but we simply do not implement it.

It sounds to me like the pipe model would work fine on Windows, and it
would just require some porting work (out of scope for this summer of
code project).  Am I misunderstanding?

Thanks,
Jonathan

[1] http://thread.gmane.org/gmane.comp.version-control.git/199216

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-21 15:44           ` Jonathan Nieder
@ 2012-07-22 21:03             ` Florian Achleitner
  2012-07-22 21:24               ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-22 21:03 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Saturday 21 July 2012 10:44:37 Jonathan Nieder wrote:
> Florian Achleitner wrote:
> > On Saturday 21 July 2012 09:48:34 Jonathan Nieder wrote:
> >> To sum up: I think we should just stick to pipes --- why all this fifo
> >> complication?
> > 
> > People didn't like pipe variant (prexec_cb not being compatible to
> > windows'
> > process creation model), so I learned about fifos and implemented a
> > (basic) fifo variant. *shrug*
> 
> Ok, can you elaborate on that?  What does it mean that preexec_cb is
> not compatible to windows' process creation model?  Don't the people
> of the future working on this code deserve to know about that, too, so
> they don't break it?

Let's discuss how to describe the solution after we decide which of the two 
variants we choose.

Summarizing the earlier discussion (in this thread as replies to version 1 of 
the patch) more verbosely:
I used the prexec_cb feature of start_command which allows to install a 
callback function that is called just before exec.
It's purpose is to close the other pipe end in each of the processes that 
inherited both ends after fork.
The pipe is created in transport-helper.c, because it forks fast-import as 
well as the remote-helper, which inherit the pipe's file descriptors.

On Windows, processses are not forked but spawned from new and therefore can't 
inherit pipe file descriptors. So we had the idea to use a fifo, which can be 
opened after the creation of the process. Also there are named pipes on 
Windows which are similar to fifos (though I've never used them).

Mostly out of curiosity I played around with fifos and implemented this 
additional cat-blob-pipe feature, like a proof-of-concept.

The first version used the pipes because this feature already exists.

> 
> Come on --- I'm not asking these questions just to make your life
> difficult.  Please make it easy to understand your code changes and to
> keep them maintained.

The reason why I resent the patch in version 3 these days is that I the 
problem of blocking open calls leading to potential deadlocks.
Of course I should tell future maintainers better what its all about, if we 
really want that feature.

Hope that helps,
Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 4/4 v3] Add cat-blob report fifo from fast-import to remote-helper.
  2012-07-22 21:03             ` Florian Achleitner
@ 2012-07-22 21:24               ` Jonathan Nieder
  0 siblings, 0 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-22 21:24 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi,

Florian Achleitner wrote:

> On Windows, processses are not forked but spawned from new and therefore can't 
> inherit pipe file descriptors.

Ok, this is what I didn't understand (and still don't understand).

Yes, on Windows processes are spawned instead of forked.  But does
that mean they can't inherit pipes?  How do pipelines in the shell
work at all, then?

I thought what Hannes said was rather that the current C runtime
used by Git for Windows doesn't take care of inherited file
descriptors, period, other than 0, 1, and 2.  This has nothing to do
with the fork/spawn distinction.  So while --cat-blob-fd or some other
mechanism could be made to work on Windows in the future, *relying* on
--cat-blob-fd on Windows today is a no-go, since it rely on file
descriptors >2 and break the remote helper infrastructure completely
there.

That does not force us into a corner at all.  We have multiple
options.  Here is what I propose:

Remote helpers declare using a capability (e.g., "bidi-import") that
they would like a bidirectional communication channel with
fast-import.  Git tells the remote helper that its request has been
granted by using a different command (e.g., "bidi-import") instead of
"import" to start the import process.

The responses from fast-import come on stdin (file descriptor 0), so
in principle it should be possible to implement this interface on
Windows.  The interface is portable, even if the initial
implementation isn't.

Existing remote helpers using the "import" capability would be
unaffected and would work as before.

On Windows, Git would not take advantage of the bidi-import capability
for now.  Windows support is an added complication and we have enough
to do this summer.

Then interested people using Windows would be able to experiment using
whatever mechanism they please (CRT support for inherited file
descriptors >2, fifos, sockets, some other Windows-specific thing) to
implement the bidi-import capability.  Once that is implemented, they
automatically get support for remote helpers that rely on the
bidirectional communication functionality.

What do you think?

Thanks for explaining,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-02 11:07     ` Jonathan Nieder
  2012-07-06  0:30       ` Jonathan Nieder
@ 2012-07-26  8:31       ` Florian Achleitner
  2012-07-26  9:08         ` Jonathan Nieder
  2012-07-26  9:45       ` Steven Michalske
       [not found]       ` <358E6F1E-8BAD-4F82-B270-0233AB86EF66@gmail.com>
  3 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-26  8:31 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

Hi!

Most of this review went into the new version.. 
For the remaining points, some comments follow.

On Monday 02 July 2012 06:07:41 Jonathan Nieder wrote:
> Hi,
> 
> Florian Achleitner wrote:

> 
> > --- /dev/null
> > +++ b/contrib/svn-fe/remote-svn.c
> > @@ -0,0 +1,207 @@
> > +
> > +#include <stdlib.h>
> > +#include <string.h>
> > +#include <stdio.h>
> 
> git-compat-util.h (or some header that includes it) must be the first
> header included so the appropriate feature test macros can be defined.
> See Documentation/CodingGuidelines for more on that.

check.

> 
> > +#include "cache.h"
> > +#include "remote.h"
> > +#include "strbuf.h"
> > +#include "url.h"
> > +#include "exec_cmd.h"
> > +#include "run-command.h"
> > +#include "svndump.h"
> > +
> > +static int debug = 0;
> 
> Small nit: please drop the redundant "= 0" here.  Or:

check.

> > +
> > +static inline void printd(const char* fmt, ...)
> > +{
> > +	if(debug) {
> > +		va_list vargs;
> > +		va_start(vargs, fmt);
> > +		fprintf(stderr, "rhsvn debug: ");
> > +		vfprintf(stderr, fmt, vargs);
> > +		fprintf(stderr, "\n");
> > +		va_end(vargs);
> > +	}
> > +}
> 
> Why not use trace_printf and avoid the complication?

Hm.. I tried. It wasn't exactly what I wanted. When I use trace_printf, it's 
activated together with all other traces. I can use trace_vprintf and specify 
a key, but I would always have to print the header "rhsvn debug: " and the key 
by hand. So I could replace vfprintf in this function by trace_vprintf to do 
that. But then there's not much simplification. (?)


> > +
> > +enum cmd_result cmd_capabilities(struct strbuf* line);
> > +enum cmd_result cmd_import(struct strbuf* line);
> > +enum cmd_result cmd_list(struct strbuf* line);
> 
> What's a cmd_result?  '*' sticks to variable name.
> 
> > +
> > +enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };
> 
> Oh, that's what a cmd_result is. :)  Why not define the type before
> using it to avoid keeping the reader in suspense?
> 
> What does each result represent?  If this is a convention like
> 
>  1: handled
>  0: not handled
>  -1: error, callee takes care of printing the error message
> 
> then please document it in a comment near the caller so the reader can
> understand what is happening without too much confusion.  Given such a
> comment, does the enum add clarity?

Hm.. the enum now has SUCCESS, NOT_HANDLED, TERMINATE.
It gives the numbers a name, thats it.

> 
> > +typedef enum cmd_result (*command)(struct strbuf*);
> 
> When I first read this, I wonder what is being commanded.  Are these
> commands passed on the remote helper's standard input, commands passed
> on its output, or commands run at some point in the process?  What is
> the effect and return value of associated function?  Does the function
> always return some success/failure value, or does it sometimes exit?
> 
> Maybe a more specific type name would be clearer?

I renamed it to input_command_handler. Unfortunately the remote-helper spec 
calls what is sent to the helper a 'command'.

> 
> [...]
> 
> > +
> > +const command command_list[] = {
> > +		cmd_capabilities, cmd_import, cmd_list, NULL
> > +};
> 
> First association is to functions like cmd_fetch() which implement git
> subcommands.  So I thought these were going to implement subcommands
> like "git remote-svn capabilities", "git remote-svn import" and would
> use the same cmd_foo(argc, argv, prefix) calling convention that git
> subcommands do.  Maybe a different naming convention could avoid
> confusion.

Ok.. same as above, they are kind of commands. Of course I can change the 
names. For me it's not too confusing, because I don't know the git subcommands 
convention very well. You can choose a name.

> 
> [...]
> 
> > +enum cmd_result cmd_capabilities(struct strbuf* line)
> > +{
> > +	if(strcmp(line->buf, "capabilities"))
> > +		return NOT_HANDLED;
> 
> Style: missing SP after keyword.
> 
> > +
> > +	printf("import\n");
> > +	printf("\n");
> > +	fflush(stdout);
> > +	return SUCCESS;
> > +}
> 
> Why the multiple printf?  Is the flush needed?

Excess printf gone.
Flush is needed. Otherwise it doesn't flush and the other end waits forever.
Don't know exactly why. Some pipe-buffer ..

> > +
> > +	/* opening a fifo for usually reading blocks until a writer has opened
> > it too. +	 * Therefore, we open with RDWR.
> > +	 */
> > +	report_fd = open(back_pipe_env, O_RDWR);
> > +	if(report_fd < 0) {
> > +		die("Unable to open fast-import back-pipe! %s", strerror(errno));
> > +	}
> 
> Is this necessary?  Why shouldn't we fork the writer first and wait
> for it here?

Yes, necessary. Blocking on this open call prevents fast-import as well as the 
remote helper from reading and writing on their normal command streams.
This leads to deadlocks.

E.g. If there's have nothing to import, the helper sends only 'done' to fast-
import and quits. That might happen before fast-import opened this pipe.
Then it waits forever because the reader has already closed it.


> > +
> > +	code = start_command(&svndump_proc);
> > +	if(code)
> > +		die("Unable to start %s, code %d", svndump_proc.argv[0], code);
> 
> start_command() is supposed to have printed a message already when it
> fails, unless errno == ENOENT and silent_exec_failure was set.
> 

Yes, but it doesn't die, right?

> > +
> > +	close(svndump_proc.out);
> 
> Important?  Wouldn't finish_command do this?
> 

As far as I understood it, it doesn't close extra created pipes. Probably I 
just didn't find it in the code ..

> > +	close(report_fd);
> 
> What is the purpose of this step?

Close the back-report pipe end of the remote-helper.

> 
> > +
> > +	code = finish_command(&svndump_proc);
> > +	if(code)
> > +		warning("Something went wrong with termination of %s, code %d",
> > svndump_proc.argv[0], code);
> finish_command() is supposed to print a message when it fails.

I changed the message text. It should tell us if svnrdump exited with non-
zero.

> 
> > +	free(svndump_proc.argv);
> > +
> > +	printf("done\n");
> > +	return SUCCESS;
> 
> Success even if it failed?

On fatal errors it dies.

> > +enum cmd_result do_command(struct strbuf* line)
> > +{
> > +	const command* p = command_list;
> > +	enum cmd_result ret;
> > +	printd("command line '%s'", line->buf);
> > +	while(*p) {
> > +		ret = (*p)(line);
> > +		if(ret != NOT_HANDLED)
> > +			return ret;
> > +		p++;
> > +	}
> 
> If possible, matching commands by name (like git.c does) would make
> the behavior easier to predict.
> 

There is some usecase for this. The intention was, that command handlers 
should be able to process more than one 'name'. E.g. an import batch is 
terminated by a newline. This newline is handled by the import handler if it 
is a batch. (This feature wasn't implemented in the version reviewed here.)

So I decided to let the handler functions tell if they handle this line.

> [...]
> 
> > +	if (argc < 2) {
> > +		fprintf(stderr, "Remote needed\n");
> > +		return 1;
> > +	}
> 
> usage() can be used to write a clearer error message.

> [...]
> 
> > +	free((void*)url);
> > +	free((void*)private_refs);
> 
> Won't this crash?

Crash? It frees detached strbuf buffers.

> 
> It would also be nice to add a test case to the t/ directory to make others
> changing this code do not accidentally break your new functionality.

check.

> 
> Hope that helps,
> Jonathan

It helped ;)

thx, Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26  8:31       ` Florian Achleitner
@ 2012-07-26  9:08         ` Jonathan Nieder
  2012-07-26 16:16           ` Florian Achleitner
  2012-07-26 17:29           ` Junio C Hamano
  0 siblings, 2 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-26  9:08 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:

> Most of this review went into the new version.. 
> For the remaining points, some comments follow.

Thanks for this.

> On Monday 02 July 2012 06:07:41 Jonathan Nieder wrote:

[...]
>>> +
>>> +static inline void printd(const char* fmt, ...)
[...]
>> Why not use trace_printf and avoid the complication?
>
> Hm.. I tried. It wasn't exactly what I wanted. When I use trace_printf, it's 
> activated together with all other traces. I can use trace_vprintf and specify 
> a key, but I would always have to print the header "rhsvn debug: " and the key 
> by hand. So I could replace vfprintf in this function by trace_vprintf to do 
> that. But then there's not much simplification. (?)

Hmm.  There's no trace_printf_with_key() but that's presumably because
no one has needed it.  If it existed, you could use

	#define printd(msg) trace_printf_with_key("GIT_TRACE_REMOTE_SVN", "%s", msg)

But now that I check, I don't see how the current printd() calls would
be useful to other people.  Why announce these moments and not others?
They're just temporary debugging cruft, right?

For that, plain trace_printf() works great.

[...]
>>> +
>>> +enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };
[...]
> Hm.. the enum now has SUCCESS, NOT_HANDLED, TERMINATE.

Much nicer.

I think this tristate return value could be avoided entirely because...
[continued at (*) below]

[...]
>>> +
>>> +	printf("import\n");
>>> +	printf("\n");
>>> +	fflush(stdout);
>>> +	return SUCCESS;
>>> +}
>>
>> Why the multiple printf?  Is the flush needed?
>
> Excess printf gone.
> Flush is needed. Otherwise it doesn't flush and the other end waits forever.

Ah, fast-import is ready, remote helper is ready, no one initiates
pumping of data between them.  Maybe the purpose of the flush would
be more obvious if it were moved to the caller.

[...]
>>> +	/* opening a fifo for usually reading blocks until a writer has opened
>>> it too. +	 * Therefore, we open with RDWR.
>>> +	 */
>>> +	report_fd = open(back_pipe_env, O_RDWR);
>>> +	if(report_fd < 0) {
>>> +		die("Unable to open fast-import back-pipe! %s", strerror(errno));
>>> +	}
>>
>> Is this necessary?  Why shouldn't we fork the writer first and wait
>> for it here?
>
> Yes, necessary.

Oh, dear.  I hope not.  E.g., Cygwin doesn't support opening fifos
RDWR (out of scope for the gsoc project, but still).

[...]
> E.g. If there's have nothing to import, the helper sends only 'done' to fast-
> import and quits.

Won't the writer open the pipe and wait for us to open our end before
doing that?

[...]
>>> +
>>> +	code = start_command(&svndump_proc);
>>> +	if(code)
>>> +		die("Unable to start %s, code %d", svndump_proc.argv[0], code);
>>
>> start_command() is supposed to have printed a message already when it
>> fails, unless errno == ENOENT and silent_exec_failure was set.
>
> Yes, but it doesn't die, right?

You can exit without writing a message with exit(), e.g. like so:

	if (code)
		exit(code);

or like so:

	if (code)
		exit(128);

[...]
>>> +
>>> +	close(svndump_proc.out);
>>
>> Important?  Wouldn't finish_command do this?
>
> As far as I understood it, it doesn't close extra created pipes. Probably I 
> just didn't find it in the code ..

So this is to work around a bug in the run-command interface?

[...]
>>> +	close(report_fd);
>>
>> What is the purpose of this step?
>
> Close the back-report pipe end of the remote-helper.

That's just repeating the question. :)  Perhaps it's supposed to
trigger some action on the other end of the pipe?  It would just be
useful to add a comment documenting why one shouldn't remove this
close() call, or else will probably end up removing it and needlessly
suffering.

[...]
>>> +
>>> +	code = finish_command(&svndump_proc);
>>> +	if(code)
>>> +		warning("Something went wrong with termination of %s, code %d",
>>> svndump_proc.argv[0], code);
>> finish_command() is supposed to print a message when it fails.
>
> I changed the message text. It should tell us if svnrdump exited with non-
> zero.

I'd suggest looking at other finish_command() callers for examples.

[...]
>>> +enum cmd_result do_command(struct strbuf* line)
>>> +{
>>> +	const command* p = command_list;
>>> +	enum cmd_result ret;
>>> +	printd("command line '%s'", line->buf);
>>> +	while(*p) {
>>> +		ret = (*p)(line);
>>> +		if(ret != NOT_HANDLED)
>>> +			return ret;
>>> +		p++;
>>> +	}
>>
>> If possible, matching commands by name (like git.c does) would make
>> the behavior easier to predict.
>
> There is some usecase for this. The intention was, that command handlers 
> should be able to process more than one 'name'. E.g. an import batch is 
> terminated by a newline. This newline is handled by the import handler if it 
> is a batch. (This feature wasn't implemented in the version reviewed here.)
>
> So I decided to let the handler functions tell if they handle this line.

[continued from (*) above]
... it isn't needed at the moment.

See http://c2.com/xp/YouArentGonnaNeedIt.html

[...]
>>> +	free((void*)url);
>>> +	free((void*)private_refs);
>>
>> Won't this crash?
>
> Crash? It frees detached strbuf buffers.

I see "url = argv[2];" a few lines above, but it looks like the variable
was reused for two different purposes. :(

I'm not sure why you detach url, by the way.  If the goal is to free it,
why not use strbuf_release()?

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-02 11:07     ` Jonathan Nieder
  2012-07-06  0:30       ` Jonathan Nieder
  2012-07-26  8:31       ` Florian Achleitner
@ 2012-07-26  9:45       ` Steven Michalske
       [not found]       ` <358E6F1E-8BAD-4F82-B270-0233AB86EF66@gmail.com>
  3 siblings, 0 replies; 57+ messages in thread
From: Steven Michalske @ 2012-07-26  9:45 UTC (permalink / raw)
  To: git@vger.kernel.org List


On Jul 2, 2012, at 4:07 AM, Jonathan Nieder <jrnieder@gmail.com> wrote:

> [...]
>> diff: Use fifo instead of pipe: Retrieve the name of the pipe from env and open it
>> for svndump.
> 
> I'd prefer to avoid this if possible, since it means having to decide
> where the pipe goes on the filesystem.  Can you summarize the
> discussion in the commit message so future readers understand why
> we're doing it?

Crazy thought here but would a socket not be a bad choice here?

Imagine being able to ssh tunnel into the SVN server and run the helper with filesystem access to the SVN repo.

Akin to the pushy project use case.
http://packages.python.org/pushy/

SSH into the machine, copy the required components to the machine, and use the RPC.
Nothing needed but SSH and python.  In this case SSH, SVN, and the helper would be needed.

This also would work just fine with the local host too.

Steve

Note: Resent, Sorry it was signed, and rejected before.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
       [not found]       ` <358E6F1E-8BAD-4F82-B270-0233AB86EF66@gmail.com>
@ 2012-07-26 11:40         ` Jonathan Nieder
  2012-07-26 14:28           ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-26 11:40 UTC (permalink / raw)
  To: Steven Michalske
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

Steven Michalske wrote:
> On Jul 2, 2012, at 4:07 AM, Jonathan Nieder <jrnieder@gmail.com> wrote:

>> [...]
>>> diff: Use fifo instead of pipe: Retrieve the name of the pipe from env and open it
>>> for svndump.
>>
>> I'd prefer to avoid this if possible, since it means having to decide
>> where the pipe goes on the filesystem.  Can you summarize the
>> discussion in the commit message so future readers understand why
>> we're doing it?
>
> Crazy thought here but would a socket not be a bad choice here?

Not crazy --- it was already mentioned.  It could probably allow using
--cat-blob-fd even on the platforms that don't inherit file
descriptors >2, though it wuld take some tweaking.  Though I still
think the way forward is to keep using plain pipes internally for now
and to make the bidirectional communication optional, since it
wouldn't close any doors to whatever is most convenient on each
platform.  Hopefully I'll hear more from Florian about this in time.

> Imagine being able to ssh tunnel into the SVN server and run the helper with
> filesystem access to the SVN repo.

We're talking about what communicates between the SVN dump parser the
version control system-specific backend (git fast-import) that reads
the converted result, so that particular socket wouldn't help much.

Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26 11:40         ` Jonathan Nieder
@ 2012-07-26 14:28           ` Florian Achleitner
  2012-07-26 14:54             ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-26 14:28 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Steven Michalske, Florian Achleitner, git, David Michael Barr,
	Sverre Rabbelier, Jeff King, Johannes Sixt

On Thursday 26 July 2012 06:40:39 Jonathan Nieder wrote:
> Steven Michalske wrote:
> > On Jul 2, 2012, at 4:07 AM, Jonathan Nieder <jrnieder@gmail.com> wrote:
> >> [...]
> >> 
> >>> diff: Use fifo instead of pipe: Retrieve the name of the pipe from env
> >>> and open it for svndump.
> >> 
> >> I'd prefer to avoid this if possible, since it means having to decide
> >> where the pipe goes on the filesystem.  Can you summarize the
> >> discussion in the commit message so future readers understand why
> >> we're doing it?
> > 
> > Crazy thought here but would a socket not be a bad choice here?
> 
> Not crazy --- it was already mentioned.  It could probably allow using
> --cat-blob-fd even on the platforms that don't inherit file
> descriptors >2, though it wuld take some tweaking.  Though I still
> think the way forward is to keep using plain pipes internally for now
> and to make the bidirectional communication optional, since it
> wouldn't close any doors to whatever is most convenient on each
> platform.  Hopefully I'll hear more from Florian about this in time.

Would you like to see a new pipe patch?

> 
> > Imagine being able to ssh tunnel into the SVN server and run the helper
> > with filesystem access to the SVN repo.
> 
> We're talking about what communicates between the SVN dump parser the
> version control system-specific backend (git fast-import) that reads
> the converted result, so that particular socket wouldn't help much.
>

Yes .. the network part is already handled quite well by svnrdump.
 

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26 14:28           ` Florian Achleitner
@ 2012-07-26 14:54             ` Jonathan Nieder
  2012-07-27  7:23               ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-26 14:54 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: Steven Michalske, Florian Achleitner, git, David Michael Barr,
	Sverre Rabbelier, Jeff King, Johannes Sixt, Ramkumar Ramachandra

(cc-ing Ram since he's also knowledgeable about remote-helper protocol)
Florian Achleitner wrote:
> On Thursday 26 July 2012 06:40:39 Jonathan Nieder wrote:

>>                                                     Though I still
>> think the way forward is to keep using plain pipes internally for now
>> and to make the bidirectional communication optional, since it
>> wouldn't close any doors to whatever is most convenient on each
>> platform.  Hopefully I'll hear more from Florian about this in time.
>
> Would you like to see a new pipe patch?

Since the svn remote helper relies on this, it seems worth working on,
yeah.  As for how to spend your time (and whether to beg someone else
to work on it instead :)): I'm not sure what's on your plate or where
you are with respect to the original plan for the summer at the
moment, so it would be hard for me to give useful advice about how to
balance things.

What did you think of the suggestion of adding a new bidi-import
capability and command to the remote helper protocol?  I think this
would be clean and avoid causing a regression on Windows, but it's
easily possible I am missing something fundamental.

Thanks,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26  9:08         ` Jonathan Nieder
@ 2012-07-26 16:16           ` Florian Achleitner
  2012-07-28  7:00             ` Jonathan Nieder
  2012-07-26 17:29           ` Junio C Hamano
  1 sibling, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-26 16:16 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Thursday 26 July 2012 04:08:42 Jonathan Nieder wrote:
> Florian Achleitner wrote:

> > On Monday 02 July 2012 06:07:41 Jonathan Nieder wrote:
> [...]
> 
> >>> +
> >>> +static inline void printd(const char* fmt, ...)
> 
> [...]
> 
> >> Why not use trace_printf and avoid the complication?
> > 
> > Hm.. I tried. It wasn't exactly what I wanted. When I use trace_printf,
> > it's activated together with all other traces. I can use trace_vprintf
> > and specify a key, but I would always have to print the header "rhsvn
> > debug: " and the key by hand. So I could replace vfprintf in this
> > function by trace_vprintf to do that. But then there's not much
> > simplification. (?)
> 
> Hmm.  There's no trace_printf_with_key() but that's presumably because
> no one has needed it.  If it existed, you could use
> 
> 	#define printd(msg) trace_printf_with_key("GIT_TRACE_REMOTE_SVN", "%s",
> msg)
> 
> But now that I check, I don't see how the current printd() calls would
> be useful to other people.  Why announce these moments and not others?
> They're just temporary debugging cruft, right?
> 
> For that, plain trace_printf() works great.

Yes, it's for debugging only, I could just delete it all. It's inspired by 
transport-helper.c. The env var GIT_TRANSPORT_HELPER_DEBUG enables it. While 
transport-helper has a lot of if (debug) fprintf(..), I encapsulated it in 
printd.
So I should kick printd out?

> >>> +
> >>> +	printf("import\n");
> >>> +	printf("\n");
> >>> +	fflush(stdout);
> >>> +	return SUCCESS;
> >>> +}
> >> 
> >> Why the multiple printf?  Is the flush needed?
> > 
> > Excess printf gone.
> > Flush is needed. Otherwise it doesn't flush and the other end waits
> > forever.
> Ah, fast-import is ready, remote helper is ready, no one initiates
> pumping of data between them.  Maybe the purpose of the flush would
> be more obvious if it were moved to the caller.

Acutally this goes to the git parent process (not fast-import), waiting for a 
reply to the command. I think I have to call flush on this side of the pipe. 
Can you flush it from the reader? This wouldn't have the desired effect, it 
drops buffered data.

> [...]
> 
> >>> +	/* opening a fifo for usually reading blocks until a writer has opened
> >>> it too. +	 * Therefore, we open with RDWR.
> >>> +	 */
> >>> +	report_fd = open(back_pipe_env, O_RDWR);
> >>> +	if(report_fd < 0) {
> >>> +		die("Unable to open fast-import back-pipe! %s", strerror(errno));
> >>> +	}
> >> 
> >> Is this necessary?  Why shouldn't we fork the writer first and wait
> >> for it here?
> > 
> > Yes, necessary.
> 
> Oh, dear.  I hope not.  E.g., Cygwin doesn't support opening fifos
> RDWR (out of scope for the gsoc project, but still).

I believe it can be solved using RDONLY and WRONLY too. Probably we solve it 
by not using the fifo at all.
Currently the blocking comes from the fact, that fast-import doesn't parse 
it's command line at startup. It rather reads an input line first and decides 
whether to parse the argv after reading the first input line or at the end of 
the input. (don't know why)
remote-svn opens the pipe before sending the first command to fast-import and 
blocks on the open, while fast-import waits for input --> deadlock.
with remote-svn: RDWR, fast-import: WRONLY, this works.

Other scenario: Nothing to import, remote-svn only sends 'done' and closes the 
pipe again. After fast-import reads the first line it parses it's command line 
and tries to open the fifo which is already closed on the other side --> 
blocks.
This is solved by using RDWR on both sides.

If we change the points where the pipes are openend and closed, this could be 
circumvented.

> 
> [...]
> 
> > E.g. If there's have nothing to import, the helper sends only 'done' to
> > fast- import and quits.
> 
> Won't the writer open the pipe and wait for us to open our end before
> doing that?
> 
> [...]
> 
> >>> +
> >>> +	code = start_command(&svndump_proc);
> >>> +	if(code)
> >>> +		die("Unable to start %s, code %d", svndump_proc.argv[0], code);
> >> 
> >> start_command() is supposed to have printed a message already when it
> >> fails, unless errno == ENOENT and silent_exec_failure was set.
> > 
> > Yes, but it doesn't die, right?
> 
> You can exit without writing a message with exit(), e.g. like so:
> 
> 	if (code)
> 		exit(code);
> 
> or like so:
> 
> 	if (code)
> 		exit(128);

ok, why not..

> 
> [...]
> 
> >>> +
> >>> +	close(svndump_proc.out);
> >> 
> >> Important?  Wouldn't finish_command do this?
> > 
> > As far as I understood it, it doesn't close extra created pipes. Probably
> > I
> > just didn't find it in the code ..
> 
> So this is to work around a bug in the run-command interface?

Good question. 

> 
> [...]
> 
> >>> +	close(report_fd);
> >> 
> >> What is the purpose of this step?
> > 
> > Close the back-report pipe end of the remote-helper.
> 
> That's just repeating the question. :)  Perhaps it's supposed to
> trigger some action on the other end of the pipe?  It would just be
> useful to add a comment documenting why one shouldn't remove this
> close() call, or else will probably end up removing it and needlessly
> suffering.

It's just closing files I've openend before. I usually do that, when i no 
longer need them. Isn't that common?
I believe it makes no difference if you only call import once. On subsequent 
imports the pipe is still open when it tries to open it again, that could be a 
problem. It would have to statically store report_fd, but what for??

> 
> [...]
> 
> >>> +
> >>> +	code = finish_command(&svndump_proc);
> >>> +	if(code)
> >>> +		warning("Something went wrong with termination of %s, code %d",
> >>> svndump_proc.argv[0], code);
> >> 
> >> finish_command() is supposed to print a message when it fails.
> > 
> > I changed the message text. It should tell us if svnrdump exited with non-
> > zero.
> 
> I'd suggest looking at other finish_command() callers for examples.
> 
> [...]
> 
> >>> +enum cmd_result do_command(struct strbuf* line)
> >>> +{
> >>> +	const command* p = command_list;
> >>> +	enum cmd_result ret;
> >>> +	printd("command line '%s'", line->buf);
> >>> +	while(*p) {
> >>> +		ret = (*p)(line);
> >>> +		if(ret != NOT_HANDLED)
> >>> +			return ret;
> >>> +		p++;
> >>> +	}
> >> 
> >> If possible, matching commands by name (like git.c does) would make
> >> the behavior easier to predict.
> > 
> > There is some usecase for this. The intention was, that command handlers
> > should be able to process more than one 'name'. E.g. an import batch is
> > terminated by a newline. This newline is handled by the import handler if
> > it is a batch. (This feature wasn't implemented in the version reviewed
> > here.)
> > 
> > So I decided to let the handler functions tell if they handle this line.
> 
> [continued from (*) above]
> ... it isn't needed at the moment.
> 
> See http://c2.com/xp/YouArentGonnaNeedIt.html

Hm.. all three values are used, they're not for the future but for now.
Of course it could be done somehow else.

> 
> [...]
> 
> >>> +	free((void*)url);
> >>> +	free((void*)private_refs);
> >> 
> >> Won't this crash?
> > 
> > Crash? It frees detached strbuf buffers.
> 
> I see "url = argv[2];" a few lines above, but it looks like the variable
> was reused for two different purposes. :(

Ooops. That was introduced by splitting out "[RFC 07/16] Allow reading svn 
dumps from files via file:// urls." It makes url malloced unconditionally. Will 
fix.
> 
> I'm not sure why you detach url, by the way.  If the goal is to free it,
> why not use strbuf_release()?

I used the strbuf because of it's nice formatting functions. But on the other 
hand, e.g. url_decode returns a char *, so I stored it in this type.
It doesn't make a big difference if I use three strbufs or one and detach and 
free the char * at the end. In fact, char * will consume some bytes less than 
a strbuf ;)
Another question is, if calling free just before exit makes sense anyways.
(But you learn it in programming courses, and it pleases valgrind :D )

> 
> Thanks,
> Jonathan

Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26  9:08         ` Jonathan Nieder
  2012-07-26 16:16           ` Florian Achleitner
@ 2012-07-26 17:29           ` Junio C Hamano
  2012-07-30  8:12             ` Florian Achleitner
  1 sibling, 1 reply; 57+ messages in thread
From: Junio C Hamano @ 2012-07-26 17:29 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

Jonathan Nieder <jrnieder@gmail.com> writes:

> [...]
>>>> +
>>>> +enum cmd_result { SUCCESS, NOT_HANDLED, ERROR };
> [...]
>> Hm.. the enum now has SUCCESS, NOT_HANDLED, TERMINATE.
>
> Much nicer.
>
> I think this tristate return value could be avoided entirely because...
> ... it isn't needed at the moment.

I am not sure what you mean by that.

The command dispatcher loop in [Patch v2 1/16] seems to call every
possible input handler with the first line of the input and expect
them to answer "This is not for me", so NOT_HANDLED is needed.

An alternative dispatcher could be written in such a way that the
dispatcher inspects the first line and decide what to call, and in
such a scheme, you do not need NOT_HANDLED. My intuition tells me
that such an arrangement is in general a better organization.

Looking at what cmd_import() does, however, I think the approach the
patch takes might make sense for this application.  Unlike other
handlers like "capabilities" that do not want to handle anything
other than "capabilities", it wants to handle two:

 - "import" that starts an import batch;
 - "" (an empty line), but only when an import batch is in effect.

A centralized dispatcher that does not use NOT_HANDLED could be
written for such an input stream, but then the state information
(i.e. "are we in an import batch?") needs to be global, which may or
may not be desirable (I haven't thought things through on this).

In any case, if you are going to use dispatching based on
NOT_HANDLED, the result may have to be (at least) quadri-state.  In
addition to "I am done successfully, please go back and dispatch
another command" (SUCCESS), "This is not for me" (NOT_HANDLED), and
"I am done successfully, and there is no need to dispatch and
process another command further" (TERMINATE), you may want to be
able to say "This was for me, but I found an error" (ERROR).

Of course, if the dispatch loop has to be rewritten so that a
central dispatcher decides what to call, individual input handlers
do not need to say NOT_HANDLED nor TERMINATE, as the central
dispatcher should keep track of the overall state of the system, and
the usual "0 on success, negative on error" may be sufficient.

One thing I wondered was how an input "capability" (or "list")
should be handled after "import" was issued (hence batch_active
becomes true).  The dispatcher loop in the patch based on
NOT_HANDLED convention will happily call cmd_capabilities(), which
does not have any notion of the batch_active state (because it is a
function scope static inside cmd_import()), and will say "Ah, that
is mine, and let me do my thing."  If we want to diagnose such an
input stream as an error, the dispatch loop needs to become aware of
the overall state of the system _anyway_, so that may be an argument
against the NOT_HANDLED based dispatch system the patch series uses.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26 14:54             ` Jonathan Nieder
@ 2012-07-27  7:23               ` Florian Achleitner
  2012-07-28  6:54                 ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-27  7:23 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Steven Michalske, Florian Achleitner, git, David Michael Barr,
	Sverre Rabbelier, Jeff King, Johannes Sixt, Ramkumar Ramachandra

On Thursday 26 July 2012 09:54:26 Jonathan Nieder wrote:
> 
> Since the svn remote helper relies on this, it seems worth working on,
> yeah.  As for how to spend your time (and whether to beg someone else
> to work on it instead :)): I'm not sure what's on your plate or where
> you are with respect to the original plan for the summer at the
> moment, so it would be hard for me to give useful advice about how to
> balance things.

Btw, the pipe version did already exist before I started, it was added with 
the cat-blob command and already used by Dmitry's remote-svn-alpha.
I didn't search for design discussions in the past ..

> 
> What did you think of the suggestion of adding a new bidi-import
> capability and command to the remote helper protocol?  I think this
> would be clean and avoid causing a regression on Windows, but it's
> easily possible I am missing something fundamental.

I don't have much overview over this topic besides the part I'm working on, 
like other users of fast-import. 
The bidi-import capability/command would have the advantage, that we don't 
have to bother with the pipe/fifo at all, if the remote-helper doesn't use it.

When I implemented the two variants I had the idea to pass it to the 'option' 
command, that fast-import already has. Anyways, specifying cat-blob-fd is not 
allowed via the 'option' command (see Documentation and 85c62395).
It wouldn't make too much sense, because the file descriptor must be set up by 
the parent.

But for the fifo, it would, probably. The backward channel is only used by the 
commands 'cat-blob' and 'ls' of fast-import. If a remote helper wants to use 
them, it would could make fast-import open the pipe by sending an 'option' 
command with the fifo filename, otherwise it defaults to stdout (like now) and 
is rather useless.
This would take the fifo setup out of transport-helper. The remote-helper would 
have to create it, if it needs it.

Apropos stdout. That leads to another idea. You already suggested that it 
would be easiest to only use FDs 0..2. Currently stdout and stderr of fast-
import go to the shell. We could connect stdout to the remote-helper and don't 
need the additional channel at all.
(Probably there's a good reason why they haven't done that ..)
Maybe this requires many changes to fast-import and breaks existing frontends.

--
Florian

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-27  7:23               ` Florian Achleitner
@ 2012-07-28  6:54                 ` Jonathan Nieder
  0 siblings, 0 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-28  6:54 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: Steven Michalske, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt, Ramkumar Ramachandra

Hi,

Some quick details.

Florian Achleitner wrote:

>                                        Anyways, specifying cat-blob-fd is not 
> allowed via the 'option' command (see Documentation and 85c62395).
> It wouldn't make too much sense, because the file descriptor must be set up by 
> the parent.
>
> But for the fifo, it would, probably.

More precisely, requiring that the cat-blob fd go on the command line
instead of in the stream matches a model where fast-import's three
standard streams are abstract:

 - its input, using the INPUT FORMAT described in git-fast-import(1)
 - its standard output, which echoes "progress" commands
 - its backflow stream, where responses to "cat-blob" and "ls" commands go

The stream format is not necessarily pinned to a Unix model where
input and output are based on filesystems and file descriptors.  We
can imagine a fast-import backend that runs on a remote server and the
transport used for these streams is sockets; for fast-import frontends
to be usable in such a context, the streams they produce must not rely
on particular fd numbers, nor on pathnames (except for mark state
saved to relative paths with --relative-marks) representing anything
in particular.

This goes just as much for a fifo set up on the filesystem where the
fast-import backend runs as for an inherited file descriptor.  In the
current model, such backend-specific details of setup go on the
command line.

>                                       The backward channel is only used by the 
> commands 'cat-blob' and 'ls' of fast-import. If a remote helper wants to use 
> them, it would could make fast-import open the pipe by sending an 'option' 
> command with the fifo filename, otherwise it defaults to stdout (like now) and 
> is rather useless.

I'm getting confused by terminology again.  Let's see if I have the cast
of characters straight:

 - the fast-import backend (e.g., "git fast-import" or "hg-fastimport")
 - the fast-import frontend (e.g., "git fast-export" or svn-fe)
 - git's generic foreign VCS support plumbing, also known as
   transport-helper.c
 - the remote helper (e.g., "git remote-svn" or "git remote-testgit")

Why would the fast-import backend ever need to open a pipe?  If I want
it to write backflow to a fifo, I can use

	mkfifo my-favorite-fifo
	git fast-import --cat-blob-fd=3 3>my-favorite-fifo

If I want it to write backflow to a pipe, I can use (using ksh syntax)

	cat |&
	git fast-import --cat-blob-fd=3 3>&p

> This would take the fifo setup out of transport-helper. The remote-helper would 
> have to create it, if it needs it.

We can imagine transport-helper.c learning the name of a fifo set up by
the remote helper by sending it the "capabilities" command:

	git> capabilities
	helper> option
	helper> import
	helper> cat-blob-file my-favorite-fifo
	helper> refspec refs/heads/*:refs/helper/remotename/*
	helper>

transport-helper.c could then use that information to invoke
fast-import appropriately:

	git fast-import --cat-blob-fd=3 3>my-favorite-fifo

But this seems like pushing complication onto the remote helper; since
there is expected to be one remote helper per foreign VCS,
implementing the appropriate logic correctly once and for all in
transport-helper.c for all interested remote helpers to take advantage
of seems to me like a better policy.

> Apropos stdout. That leads to another idea. You already suggested that it 
> would be easiest to only use FDs 0..2. Currently stdout and stderr of fast-
> import go to the shell. We could connect stdout to the remote-helper and don't 
> need the additional channel at all.

The complication that makes this strategy not so easy is "progress"
commands in the fast-import input stream.  (Incidentally, it would be
nice to teach transport-helper.c to display specially formatted
"progress" commands using a progress bar some day.)

Hoping that clarifies,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26 16:16           ` Florian Achleitner
@ 2012-07-28  7:00             ` Jonathan Nieder
  2012-07-30  8:12               ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-28  7:00 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:

> So I should kick printd out?

I think so, yes.

"git log -SGIT_TRANSPORT_HELPER_DEBUG transport-helper.c" tells me
that that option was added to make the transport-helper machinery make
noise to make it obvious at what stage a remote helper has deadlocked.

GIT_TRANSPORT_HELPER_DEBUG already takes care of that, so there would
not be need for an imitation of that in remote-svn, unless I am
missing something (and even if I am missing something, it seems
complicated enough to be worth moving to another patch where it can be
explained more easily).

[...]
>>>>> +
>>>>> +	printf("import\n");
>>>>> +	printf("\n");
>>>>> +	fflush(stdout);
>>>>> +	return SUCCESS;
>>>>> +}
[...]
>>                                Maybe the purpose of the flush would
>> be more obvious if it were moved to the caller.
>
> Acutally this goes to the git parent process (not fast-import), waiting for a 
> reply to the command. I think I have to call flush on this side of the pipe. 
> Can you flush it from the reader? This wouldn't have the desired effect, it 
> drops buffered data.

*slaps head*  This is the "capabilities" command, and it needs to
flush because the reader needs to know what commands it's allowed to
use next before it starts using them.  My brain turned off and I
thought you were emitting an "import" command rather than advertising
that you support it for some reason.

And 'printf("\n")' was a separate printf because that way, patches
like

	 	printf("import\n");
	+	printf("bidi-import\n");
	 	printf("\n");
	 	fflush(stdout);

become simpler.

I'm tempted to suggest a structure like

		const char * const capabilities[] = {"import"};
		int i;

		for (i = 0; i < ARRAY_SIZE(capabilities); i++)
			puts(capabilities[i]);
		puts("");	/* blank line */

		fflush(stdout);

but your original code was fine, too.

[...]
>>>>> +	/* opening a fifo for usually reading blocks until a writer has opened
>>>>> it too. +	 * Therefore, we open with RDWR.
>>>>> +	 */
>>>>> +	report_fd = open(back_pipe_env, O_RDWR);
>>>>> +	if(report_fd < 0) {
>>>>> +		die("Unable to open fast-import back-pipe! %s", strerror(errno));
[...]
> I believe it can be solved using RDONLY and WRONLY too. Probably we solve it 
> by not using the fifo at all.
> Currently the blocking comes from the fact, that fast-import doesn't parse 
> it's command line at startup. It rather reads an input line first and decides 
> whether to parse the argv after reading the first input line or at the end of 
> the input. (don't know why)
> remote-svn opens the pipe before sending the first command to fast-import and 
> blocks on the open, while fast-import waits for input --> deadlock.

Thanks for explaining.  Now we've discussed a few different approproaches,
none of which is perfect.

a. use --cat-blob-fd, no FIFO

   Doing this unconditionally would break platforms that don't support
   --cat-blob-fd=(descriptor >2), like Windows, so we'd have to:

   * Make it conditional --- only do it (1) we are not on Windows and
     (2) the remote helper requests backflow by advertising the
     import-bidi capability.

   * Let the remote helper know what's going on by using
     "import-bidi" instead of "import" in the command stream to
     initiate the import.

b. use envvars to pass around FIFO path

   This complicates the fast-import interface and makes debugging hard.
   It would be nice to avoid this if we can, but in case we can't, it's
   nice to have the option available.

c. transport-helper.c uses FIFO behind the scenes.

   Like (a), except it would require a fast-import tweak (boo) and
   would work on Windows (yea)

d. use --cat-blob-fd with FIFO

   Early scripted remote-svn prototypes did this to fulfill "fetch"
   requests.

   It has no advantage over "use --cat-blob-fd, no FIFO" except being
   easier to implement as a shell script.  I'm listing this just for
   comparison; since (a) looks better in every way, I don't see any
   reason to pursue this one.

Since avoiding deadlocks with bidirectional communication is always a
little subtle, it would be nice for this to be implemented once in
transport-helper.c rather than each remote helper author having to
reimplement it again.  As a result, my knee-jerk ranking is a > c >
b > d.

Sane?
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-28  7:00             ` Jonathan Nieder
@ 2012-07-30  8:12               ` Florian Achleitner
  2012-07-30  8:29                 ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-30  8:12 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Saturday 28 July 2012 02:00:31 Jonathan Nieder wrote:
> Thanks for explaining.  Now we've discussed a few different approproaches,
> none of which is perfect.
> 
> a. use --cat-blob-fd, no FIFO
> 
>    Doing this unconditionally would break platforms that don't support
>    --cat-blob-fd=(descriptor >2), like Windows, so we'd have to:
> 
>    * Make it conditional --- only do it (1) we are not on Windows and
>      (2) the remote helper requests backflow by advertising the
>      import-bidi capability.
> 
>    * Let the remote helper know what's going on by using
>      "import-bidi" instead of "import" in the command stream to
>      initiate the import.

Generally I like your prefered solution.
I think there's one problem:
The pipe needs to be created before the fork, so that the fd can be inherited. 
There is no way of creating it if the remote-helper advertises a capability, 
because it is already forked then. This would work with fifos, though.

We could:
- add a capability: bidi-import. 
- make transport-helper create a fifo if the helper advertises it.
- add a command for remote-helpers, like 'bidi-import <pipename>' that makes 
the remote helper open the fifo at <pipename> and use it.
- fast-import is forked after the helper, so we do already know if there will 
be a back-pipe. If yes, open it in transport-helper and pass the fd as command 
line argument cat-blob-fd. 

--> fast-import wouldn't need to be changed, but we'd use a fifo, and we get 
rid of the env-vars.
(I guess it could work on windows too).

What do you think?

> 
> b. use envvars to pass around FIFO path
> 
>    This complicates the fast-import interface and makes debugging hard.
>    It would be nice to avoid this if we can, but in case we can't, it's
>    nice to have the option available.
> 
> c. transport-helper.c uses FIFO behind the scenes.
> 
>    Like (a), except it would require a fast-import tweak (boo) and
>    would work on Windows (yea)
> 
> d. use --cat-blob-fd with FIFO
> 
>    Early scripted remote-svn prototypes did this to fulfill "fetch"
>    requests.
> 
>    It has no advantage over "use --cat-blob-fd, no FIFO" except being
>    easier to implement as a shell script.  I'm listing this just for
>    comparison; since (a) looks better in every way, I don't see any
>    reason to pursue this one.
> 
> Since avoiding deadlocks with bidirectional communication is always a
> little subtle, it would be nice for this to be implemented once in
> transport-helper.c rather than each remote helper author having to
> reimplement it again.  As a result, my knee-jerk ranking is a > c >
> b > d.
> 
> Sane?
> Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-26 17:29           ` Junio C Hamano
@ 2012-07-30  8:12             ` Florian Achleitner
  0 siblings, 0 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-07-30  8:12 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Jonathan Nieder, Florian Achleitner, git, David Michael Barr,
	Sverre Rabbelier, Jeff King, Johannes Sixt

On Thursday 26 July 2012 10:29:51 Junio C Hamano wrote:
> Of course, if the dispatch loop has to be rewritten so that a
> central dispatcher decides what to call, individual input handlers
> do not need to say NOT_HANDLED nor TERMINATE, as the central
> dispatcher should keep track of the overall state of the system, and
> the usual "0 on success, negative on error" may be sufficient.
> 
> One thing I wondered was how an input "capability" (or "list")
> should be handled after "import" was issued (hence batch_active
> becomes true).  The dispatcher loop in the patch based on
> NOT_HANDLED convention will happily call cmd_capabilities(), which
> does not have any notion of the batch_active state (because it is a
> function scope static inside cmd_import()), and will say "Ah, that
> is mine, and let me do my thing."  If we want to diagnose such an
> input stream as an error, the dispatch loop needs to become aware of
> the overall state of the system _anyway_, so that may be an argument
> against the NOT_HANDLED based dispatch system the patch series uses.

That's a good point. The current implementation allows other commands to 
appear during import batches. This shouldn't be possible according to the 
protocol, I think. But it doesn't do harm. Solving it will require a global 
state and go towards a global displatcher.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-30  8:12               ` Florian Achleitner
@ 2012-07-30  8:29                 ` Jonathan Nieder
  2012-07-30 13:55                   ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-30  8:29 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:
> On Saturday 28 July 2012 02:00:31 Jonathan Nieder wrote:

>> a. use --cat-blob-fd, no FIFO
[...]
>>    * Make it conditional --- only do it (1) we are not on Windows and
>>      (2) the remote helper requests backflow by advertising the
>>      import-bidi capability.
>>
>>    * Let the remote helper know what's going on by using
>>      "import-bidi" instead of "import" in the command stream to
>>      initiate the import.
>
> Generally I like your prefered solution.
> I think there's one problem:
> The pipe needs to be created before the fork, so that the fd can be inherited. 

The relevant pipe already exists at that point: the remote helper's
stdin.

In other words, it could work like this (just like the existing demo
code, except adding a conditional based on the "capabilities"
response):

	0. transport-helper.c invokes the remote helper.  This requires
	   a pipe used to send commands to the remote helper
	   (helper->in) and a pipe used to receive responses from the
	   remote helper (helper->out)

	1. transport-helper.c sends the "capabilities" command to decide
	   what to do.  The remote helper replies that it would like
	   some feedback from fast-import.

	2. transport-helper.c forks and execs git fast-import with input
	   redirected from helper->out and the cat-blob fd redirected
	   to helper->in

	3. transport-helper.c tells the remote helper to start the
	   import

	4. wait for fast-import to exit

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-30  8:29                 ` Jonathan Nieder
@ 2012-07-30 13:55                   ` Florian Achleitner
  2012-07-30 16:55                     ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-30 13:55 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Monday 30 July 2012 03:29:52 Jonathan Nieder wrote:
> > Generally I like your prefered solution.
> > I think there's one problem:
> > The pipe needs to be created before the fork, so that the fd can be
> > inherited. 
> The relevant pipe already exists at that point: the remote helper's
> stdin.
> 
> In other words, it could work like this (just like the existing demo
> code, except adding a conditional based on the "capabilities"
> response):
> 
>         0. transport-helper.c invokes the remote helper.  This requires
>            a pipe used to send commands to the remote helper
>            (helper->in) and a pipe used to receive responses from the
>            remote helper (helper->out)
> 
>         1. transport-helper.c sends the "capabilities" command to decide
>            what to do.  The remote helper replies that it would like
>            some feedback from fast-import.
> 
>         2. transport-helper.c forks and execs git fast-import with input
>            redirected from helper->out and the cat-blob fd redirected
>            to helper->in

fast-import writes to the helpers stdin..

>         3. transport-helper.c tells the remote helper to start the
>            import

transport-helper writes commands to the helper's stdin.

> 
>         4. wait for fast-import to exit

Hm .. that would mean, that both fast-import and git (transport-helper) would 
write to the remote-helper's stdin, right?

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-30 13:55                   ` Florian Achleitner
@ 2012-07-30 16:55                     ` Jonathan Nieder
  2012-07-31 19:31                       ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-30 16:55 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:

> Hm .. that would mean, that both fast-import and git (transport-helper) would 
> write to the remote-helper's stdin, right?

Yes, first git writes the list of refs to import, and then fast-import
writes feedback during the import.  Is that a problem?

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-30 16:55                     ` Jonathan Nieder
@ 2012-07-31 19:31                       ` Florian Achleitner
  2012-07-31 22:43                         ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-07-31 19:31 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Monday 30 July 2012 11:55:02 Jonathan Nieder wrote:
> Florian Achleitner wrote:
> > Hm .. that would mean, that both fast-import and git (transport-helper)
> > would write to the remote-helper's stdin, right?
> 
> Yes, first git writes the list of refs to import, and then fast-import
> writes feedback during the import.  Is that a problem?

I haven't tried that yet, nor do I remember anything where I've already seen 
two processes writing to the same pipe.
At least it sounds cumbersome to me. Processes' lifetimes overlap, so buffering 
and flushing could mix data.
We have to use it for both purposes interchangably  because there can be more 
than one import command to the remote-helper, of course.

Will try that in test-program..

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-31 19:31                       ` Florian Achleitner
@ 2012-07-31 22:43                         ` Jonathan Nieder
  2012-08-01  8:25                           ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-07-31 22:43 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:

> I haven't tried that yet, nor do I remember anything where I've already seen
> two processes writing to the same pipe.

It's a perfectly normal and well supported thing to do.

[...]
> Will try that in test-program..

Thanks.

Good luck,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-07-31 22:43                         ` Jonathan Nieder
@ 2012-08-01  8:25                           ` Florian Achleitner
  2012-08-01 19:42                             ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-08-01  8:25 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Tuesday 31 July 2012 15:43:57 Jonathan Nieder wrote:
> Florian Achleitner wrote:
> > I haven't tried that yet, nor do I remember anything where I've already
> > seen two processes writing to the same pipe.
> 
> It's a perfectly normal and well supported thing to do.

I played around with a little testprogram. It generally works.
I'm still not convinced that this doesn't cause more problems than it can 
solve.
The standard defines that write calls to pipe fds are atomic, i.e. data is not 
interleaved with data from other processes, if the data is less than PIPE_BUF 
[1].
We would need some kind of locking/synchronization to make it work for sure, 
while I believe it will work most of the time.

Currently it runs  like this:
transport-helper.c writes one or more 'import <ref>' lines, we don't know in 
advance how many and how long they are. Then it waits for fast-import to 
finish.

When the first line arrives at the remote-helper, it starts importing one line 
at a time, leaving the remaining lines in the pipe.
For importing it requires the data from fast-import, which would be mixed with 
import lines or queued at the end of them.

[1] 
http://pubs.opengroup.org/onlinepubs/009695399/functions/write.html

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-01  8:25                           ` Florian Achleitner
@ 2012-08-01 19:42                             ` Jonathan Nieder
  2012-08-12 10:06                               ` Florian Achleitner
  0 siblings, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-08-01 19:42 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi again,

Florian Achleitner wrote:

> When the first line arrives at the remote-helper, it starts importing one line 
> at a time, leaving the remaining lines in the pipe.
> For importing it requires the data from fast-import, which would be mixed with 
> import lines or queued at the end of them.

Oh, good catch.

The way it's supposed to work is that in a bidi-import, the remote
helper reads in the entire list of refs to be imported and only once
the newline indicating that that list is over arrives starts writing
its fast-import stream.  We could make this more obvious by not
spawning fast-import until immediately before writing that newline.

This needs to be clearly documented in the git-remote-helpers(1) page
if the bidi-import command is introduced.

If a remote helper writes commands for fast-import before that newline
comes, that is a bug in the remote helper, plain and simple.  It might
be fun to diagnose this problem:

	static void pipe_drained_or_die(int fd, const char *msg)
	{
		char buf[1];
		int flags = fcntl(fd, F_GETFL);
		if (flags < 0)
			die_errno("cannot get pipe flags");
		if (fcntl(fd, F_SETFL, flags | O_NONBLOCK))
			die_errno("cannot set up non-blocking pipe read");
		if (read(fd, buf, 1) > 0)
			die("%s", msg);
		if (fcntl(fd, F_SETFL, flags))
			die_errno("cannot restore pipe flags");
	}
	...

	for (i = 0; i < nr_heads; i++) {
		write "import %s\n", to_fetch[i]->name;
	}

	if (getenv("GIT_REMOTE_HELPERS_SLOW_SANITY_CHECK"))
		sleep(1);

	pipe_drained_or_die("unexpected output from remote helper before fast-import launch");

	if (get_importer(transport, &fastimport))
		die("couldn't run fast-import");
	write_constant(data->helper->in, "\n");

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-01 19:42                             ` Jonathan Nieder
@ 2012-08-12 10:06                               ` Florian Achleitner
  2012-08-12 16:12                                 ` Jonathan Nieder
  2012-08-12 19:36                                 ` Jonathan Nieder
  0 siblings, 2 replies; 57+ messages in thread
From: Florian Achleitner @ 2012-08-12 10:06 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

Hi,

back to the pipe-topic.

On Wednesday 01 August 2012 12:42:48 Jonathan Nieder wrote:
> Hi again,
> 
> Florian Achleitner wrote:
> > When the first line arrives at the remote-helper, it starts importing one
> > line at a time, leaving the remaining lines in the pipe.
> > For importing it requires the data from fast-import, which would be mixed
> > with import lines or queued at the end of them.
> 
> Oh, good catch.
> 
> The way it's supposed to work is that in a bidi-import, the remote
> helper reads in the entire list of refs to be imported and only once
> the newline indicating that that list is over arrives starts writing
> its fast-import stream.  We could make this more obvious by not
> spawning fast-import until immediately before writing that newline.
> 
> This needs to be clearly documented in the git-remote-helpers(1) page
> if the bidi-import command is introduced.
> 
> If a remote helper writes commands for fast-import before that newline
> comes, that is a bug in the remote helper, plain and simple.  It might
> be fun to diagnose this problem:

This would require all existing remote helpers that use 'import' to be ported 
to the new concept, right? Probably there is no other..

> 
> 	static void pipe_drained_or_die(int fd, const char *msg)
> 	{
> 		char buf[1];
> 		int flags = fcntl(fd, F_GETFL);
> 		if (flags < 0)
> 			die_errno("cannot get pipe flags");
> 		if (fcntl(fd, F_SETFL, flags | O_NONBLOCK))
> 			die_errno("cannot set up non-blocking pipe read");
> 		if (read(fd, buf, 1) > 0)
> 			die("%s", msg);
> 		if (fcntl(fd, F_SETFL, flags))
> 			die_errno("cannot restore pipe flags");
> 	}
> 	...
> 
> 	for (i = 0; i < nr_heads; i++) {
> 		write "import %s\n", to_fetch[i]->name;
> 	}
> 
> 	if (getenv("GIT_REMOTE_HELPERS_SLOW_SANITY_CHECK"))
> 		sleep(1);
> 
> 	pipe_drained_or_die("unexpected output from remote helper before
> fast-import launch");
> 
> 	if (get_importer(transport, &fastimport))
> 		die("couldn't run fast-import");
> 	write_constant(data->helper->in, "\n");

I still don't believe that sharing the input pipe of the remote helper is 
worth the hazzle.
It still requires an additional pipe to be setup, the one from fast-import to 
the remote-helper, sharing one FD at the remote helper.
It still requires more than just stdin, stdout, stderr.

I would suggest to use a fifo. It can be openend independently, after forking 
and on windows they have named pipes with similar semantics, so I think this 
could be easily ported. 
I would suggest the following changes:
- add a capability to the remote helper 'bidi-import', or 'bidi-pipe'. This 
signals that the remote helper requires data from fast-import.

- add a command 'bidi-import', or 'bidi-pipe' that is tells the remote helper 
which filename the fifo is at, so that it can open it and read it when it 
handles 'import' commands.

- transport-helper.c creates the fifo on demand, i.e. on seeing the capability, 
in the gitdir or in /tmp.

- fast-import gets the name of the fifo as a command-line argument. The 
alternative would be to add a command, but that's not allowed, because it 
changes the stream semantics.
Another alternative would be to use the existing --cat-pipe-fd argument. But 
that requires to open the fifo before execing fast-import and makes us 
dependent on the posix model of forking and inheriting file descriptors, while 
opening a fifo in fast-import would not.

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-12 10:06                               ` Florian Achleitner
@ 2012-08-12 16:12                                 ` Jonathan Nieder
  2012-08-12 19:39                                   ` Florian Achleitner
  2012-08-12 19:36                                 ` Jonathan Nieder
  1 sibling, 1 reply; 57+ messages in thread
From: Jonathan Nieder @ 2012-08-12 16:12 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi again,

Florian Achleitner wrote:

> back to the pipe-topic.

Ok, thanks.

[...]
>> The way it's supposed to work is that in a bidi-import, the remote
>> helper reads in the entire list of refs to be imported and only once
>> the newline indicating that that list is over arrives starts writing
>> its fast-import stream.
[...]
> This would require all existing remote helpers that use 'import' to be ported 
> to the new concept, right? Probably there is no other..

You mean all existing remote helpers that use 'bidi-import', right?
There are none.

[...]
> I still don't believe that sharing the input pipe of the remote helper is 
> worth the hazzle.
> It still requires an additional pipe to be setup, the one from fast-import to 
> the remote-helper, sharing one FD at the remote helper.

If I understand correctly, you misunderstood how sharing the input
pipe works.  Have you tried it?

It does not involve setting up an additional pipe.  Standard input for
the remote helper is already a pipe.  That pipe is what allows
transport-helper.c to communicate with the remote helper.  Letting
fast-import share that pipe involves passing that file descriptor to
git fast-import.  No additional pipe() calls.

Do you mean that it would be too much work to implement?  This
explanation just doesn't make sense to me, given that the version
using pipe() *already* *exists* and is *tested*.

I get the feeling I am missing something very basic.  I would welcome
input from others that shows what I am missing.

[...]
> Another alternative would be to use the existing --cat-pipe-fd argument. But 
> that requires to open the fifo before execing fast-import and makes us 
> dependent on the posix model of forking and inheriting file descriptors, while 
> opening a fifo in fast-import would not.

I'm getting kind of frustrated with this conversation going nowhere.  Here
is a compromise to explain why.  Suppose:

- fast-import learns a --cat-blob-file parameter, which tells it to open a
  file to write responses to "cat-blob" and "ls" commands to instead of
  using an inherited file descriptor

- transport-helper.c sets up a named pipe and passes it using --cat-blob-file.

- transport-helper.c reads from that named pipe and copies everything it sees
  to the remote helper's standard input, until fast-import exits.

This would:

 - allow us to continue to use a very simple protocol for communicating
   with the remote helper, where commands and fast-import backflow both
   come on standard input

 - work fine on both Windows and Unix

Meanwhile it would:

 - be 100% functionally equivalent to the solution where fast-import
   writes directly to the remote helper's standard input.  Two programs
   can have the same pipe open for writing at the same time for a few
   seconds and that is *perfectly fine*.  On Unix and on Windows.

   On Windows the only complication with the pipe()-based  is that we haven't
   wired up the low-level logic to pass file descriptors other than
   stdin, stdout, stderr to child processes; and if I have understood
   earlier messages correctly, the operating system *does* have a
   concept of that and this is just a todo item in msys
   implementation.

 - be more complicated than the code that already exists for this
   stuff.

So while I presented this as a compromise, I don't see the point.

Is your goal portability, a dislike of the interface, some
implementation detail I have missed, or something else?  Could you
explain the problem as concisely but clearly as possible (perhaps
using an example) so that others like Sverre, Peff, or David can help
think through it and to explain it in a way that dim people like me
understand what's going on?

Puzzled,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-12 10:06                               ` Florian Achleitner
  2012-08-12 16:12                                 ` Jonathan Nieder
@ 2012-08-12 19:36                                 ` Jonathan Nieder
  1 sibling, 0 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-08-12 19:36 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Hi again,

Florian Achleitner wrote:

> Another alternative would be to use the existing --cat-pipe-fd argument. But 
> that requires to open the fifo before execing fast-import and makes us 
> dependent on the posix model of forking and inheriting file descriptors

Let me elaborate on this, which I think is the real point (though I
could easily be wrong).

Probably instead of just sending feedback I should have been sending
suggested patches to compare.  You do not work for me and your time is
not my time, and I would not want to have the responsibility of
dictating how your code works, anyway.  Generally speaking, as long as
code is useful and not hurting maintainability, the best thing I can
do is to make it easy to incorporate.  That has side benefits, too,
like giving an example of what it is like to have your code
incorporated and creating momentum.  Small mistakes can be fixed
later.

Unfortunately here we are talking about two interfaces that need to
be stable.  Mistakes stay --- once there is a UI wart in interfaces
people are using, we are stuck continuing to support it.

To make life even more complicated, there are two interfaces involved
here:

 - the remote-helper protocol, which I think it is very important
   to keep sane and stable.  Despite the remote-helper protocol
   existing for a long time, it hasn't seen a lot of adoption
   outside git yet, and complicating the protocol will not help
   that.

   I am worried about what it would mean to add an additional
   stream on top of the standard input and output used in the current
   remote-helper protocol.  Receiving responses to fast-import
   commands on stdin seems very simple.  Maybe I am wrong?

 - the fast-import interface, which is also important but I'm not
   as worried about.  Adding a new command-line parameter means
   importers that call fast-import can unnecessarily depend on
   newer versions of git, so it's still something to be avoided.

Given a particular interface, there may be multiple choices of how to
implement it.  For example, on Unix the remote-helper protocol could
be fulfilled by letting fast-import inherit the file descriptor
helper->in, while on Windows it could for example be fulfilled by
using DuplicateHandle() to transmit the pipe handle before or after
fast-import has already started running.

The implementation strategy can easily be changed later.  What is
important is that the interface be pleasant and *possible* to
implement sanely.

Hoping that clarifies a little,
Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-12 16:12                                 ` Jonathan Nieder
@ 2012-08-12 19:39                                   ` Florian Achleitner
  2012-08-12 20:10                                     ` Jonathan Nieder
  0 siblings, 1 reply; 57+ messages in thread
From: Florian Achleitner @ 2012-08-12 19:39 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Florian Achleitner, git, David Michael Barr, Sverre Rabbelier,
	Jeff King, Johannes Sixt

On Sunday 12 August 2012 09:12:58 Jonathan Nieder wrote:
> Hi again,
> 
> Florian Achleitner wrote:
> > back to the pipe-topic.
> 
> Ok, thanks.
> 
> [...]
> 
> >> The way it's supposed to work is that in a bidi-import, the remote
> >> helper reads in the entire list of refs to be imported and only once
> >> the newline indicating that that list is over arrives starts writing
> >> its fast-import stream.
> 
> [...]
> 
> > This would require all existing remote helpers that use 'import' to be
> > ported to the new concept, right? Probably there is no other..
> 
> You mean all existing remote helpers that use 'bidi-import', right?
> There are none.

Ok, it would not affect the existing import command.
> 
> [...]
> 
> > I still don't believe that sharing the input pipe of the remote helper is
> > worth the hazzle.
> > It still requires an additional pipe to be setup, the one from fast-import
> > to the remote-helper, sharing one FD at the remote helper.
> 
> If I understand correctly, you misunderstood how sharing the input
> pipe works.  Have you tried it?

Yes wrote a test program, sharing works, that's not the problem.

> 
> It does not involve setting up an additional pipe.  Standard input for
> the remote helper is already a pipe.  That pipe is what allows
> transport-helper.c to communicate with the remote helper.  Letting
> fast-import share that pipe involves passing that file descriptor to
> git fast-import.  No additional pipe() calls.
> 
> Do you mean that it would be too much work to implement?  This
> explanation just doesn't make sense to me, given that the version
> using pipe() *already* *exists* and is *tested*.

Yes, that was the first version I wrote, and remote-svn-alpha uses.

> 
> I get the feeling I am missing something very basic.  I would welcome
> input from others that shows what I am missing.
> 

This is how I see it, probably it's all wrong:
I thought the main problem is, that we don't want processes to have *more than 
three pipes attached*, i.e. stdout, stdin, stderr, because existing APIs don't 
allow it.
When we share stdin of the remote helper, we achieve this goal for this one 
process, but fast-import still has an additional pipe:
stdout  --> shell;
stderr --> shell; 
stdin <-- remote-helper; 
additional_pipe --> remote-helper.

That's what I wanted to say: We still have more than three pipes on fast-
import.
And we need to transfer that fourth file descriptor by inheritance and it's 
number as a command line argument. 
So if we make the remote-helper have only three pipes by double-using stdin, 
but fast-import still has four pipes, what problem does it solve?

Using fifos would remove the requirement to inherit more than three pipes. 
That's my point.

[..]
> 
> Meanwhile it would:
> 
>  - be 100% functionally equivalent to the solution where fast-import
>    writes directly to the remote helper's standard input.  Two programs
>    can have the same pipe open for writing at the same time for a few
>    seconds and that is *perfectly fine*.  On Unix and on Windows.
> 
>    On Windows the only complication with the pipe()-based  is that we
> haven't wired up the low-level logic to pass file descriptors other than
> stdin, stdout, stderr to child processes; and if I have understood earlier
> messages correctly, the operating system *does* have a
>    concept of that and this is just a todo item in msys
>    implementation.

I digged into MSDN and it seems it's not a problem at all on the windows api 
layer. Pipe handles can be inherited. [1]
If the low-level logic once supports passing more than 3 fds, it will work on 
fast-import as well as remote-helper.

> 
>  - be more complicated than the code that already exists for this
>    stuff.
> 
> So while I presented this as a compromise, I don't see the point.
> 
> Is your goal portability, a dislike of the interface, some
> implementation detail I have missed, or something else?  Could you
> explain the problem as concisely but clearly as possible (perhaps
> using an example) so that others like Sverre, Peff, or David can help
> think through it and to explain it in a way that dim people like me
> understand what's going on?

It all started as portability-only discussion. On Linux, my first version would 
have worked. It created an additional pipe before forking using pipe(). Runs 
great, it did it like remote-svn-alpha.sh.

I wouldn't have started to produce something else or start a discussion on my 
own. But I was told, it's not good because of portability. This is the root of 
this endless story. (you already know the thread, I think). Since weeks nobody 
of them is interested in that except you and me.
 
So if we accept having more than three pipes on a process, we have no more 
problem.
We can dig out that first version, as well as write the one proposed by you.
While your version saves some trouble by not requiring an additional pipe() 
call and not requiring the prexec_cb, but adding a little complexity with the 
re-using of stdin.

Currently I have the implemented the original pipe version, the original fifo 
version, the fifo version described a mail ago. I'm going to implement the 
stdin-sharing version now..

[1] http://msdn.microsoft.com/en-
us/library/windows/desktop/aa365782(v=vs.85).aspx

> 
> Puzzled,
> Jonathan

Piped,
Florian ;)

^ permalink raw reply	[flat|nested] 57+ messages in thread

* Re: [RFC 1/4 v2] Implement a basic remote helper for svn in C.
  2012-08-12 19:39                                   ` Florian Achleitner
@ 2012-08-12 20:10                                     ` Jonathan Nieder
  0 siblings, 0 replies; 57+ messages in thread
From: Jonathan Nieder @ 2012-08-12 20:10 UTC (permalink / raw)
  To: Florian Achleitner
  Cc: git, David Michael Barr, Sverre Rabbelier, Jeff King,
	Johannes Sixt

Florian Achleitner wrote:

> This is how I see it, probably it's all wrong:
> I thought the main problem is, that we don't want processes to have *more than
> three pipes attached*, i.e. stdout, stdin, stderr, because existing APIs don't
> allow it.

Oh, that makes sense.  Thanks for explaining, and sorry to have been so
dense.

At the Windows API level, Set/GetStdHandle() is only advertised to
handle stdin, stdout, and stderr, so on Windows there would indeed
need to be some magic to communicate the inherited HANDLE value to
fast-import.

But I am confident in the Windows porters, and if a fast-import
interface change ends up being needed, I think we can deal with it
when the moment comes and it wouldn't necessitate changing the remote
helper interface.

You also mentioned before that passing fast-import responses to the
remote helper stdin means that the remote helper has to slurp up the
whole list of refs to import before starting to talk to fast-import.
That was good practice already anyway, but it is a real pitfall and a
real downside to the single-input approach.  Thanks for bringing it
up.

Jonathan

^ permalink raw reply	[flat|nested] 57+ messages in thread

end of thread, other threads:[~2012-08-12 20:11 UTC | newest]

Thread overview: 57+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-06-04 17:20 [RFC 0/4] Florian Achleitner
2012-06-04 17:20 ` [RFC 1/4] Implement a basic remote helper vor svn in C Florian Achleitner
2012-06-04 17:20   ` [RFC 2/4] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
2012-06-04 17:20     ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
2012-06-04 17:20       ` [RFC 4/4] Add cat-blob report pipe from fast-import to remote-helper Florian Achleitner
2012-06-05  1:33         ` David Michael Barr
2012-06-05  6:56         ` Jeff King
2012-06-05  7:07           ` David Michael Barr
2012-06-05  8:14             ` Jeff King
2012-06-05 22:16               ` Florian Achleitner
2012-06-06 13:43                 ` Jeff King
2012-06-06 21:04                   ` Florian Achleitner
2012-06-05  8:51           ` Johannes Sixt
2012-06-05  9:07             ` Jeff King
2012-06-05 22:17               ` Florian Achleitner
2012-06-05  9:09             ` Johannes Sixt
2012-06-05  1:21       ` [RFC 3/4] Add svndump_init_fd to allow reading dumps from arbitrary FDs David Michael Barr
2012-06-29  7:49 ` [RFC 0/4 v2] Florian Achleitner
2012-06-29  7:54   ` [RFC 1/4 v2] Implement a basic remote helper for svn in C Florian Achleitner
2012-07-02 11:07     ` Jonathan Nieder
2012-07-06  0:30       ` Jonathan Nieder
2012-07-06 10:39         ` Florian Achleitner
2012-07-26  8:31       ` Florian Achleitner
2012-07-26  9:08         ` Jonathan Nieder
2012-07-26 16:16           ` Florian Achleitner
2012-07-28  7:00             ` Jonathan Nieder
2012-07-30  8:12               ` Florian Achleitner
2012-07-30  8:29                 ` Jonathan Nieder
2012-07-30 13:55                   ` Florian Achleitner
2012-07-30 16:55                     ` Jonathan Nieder
2012-07-31 19:31                       ` Florian Achleitner
2012-07-31 22:43                         ` Jonathan Nieder
2012-08-01  8:25                           ` Florian Achleitner
2012-08-01 19:42                             ` Jonathan Nieder
2012-08-12 10:06                               ` Florian Achleitner
2012-08-12 16:12                                 ` Jonathan Nieder
2012-08-12 19:39                                   ` Florian Achleitner
2012-08-12 20:10                                     ` Jonathan Nieder
2012-08-12 19:36                                 ` Jonathan Nieder
2012-07-26 17:29           ` Junio C Hamano
2012-07-30  8:12             ` Florian Achleitner
2012-07-26  9:45       ` Steven Michalske
     [not found]       ` <358E6F1E-8BAD-4F82-B270-0233AB86EF66@gmail.com>
2012-07-26 11:40         ` Jonathan Nieder
2012-07-26 14:28           ` Florian Achleitner
2012-07-26 14:54             ` Jonathan Nieder
2012-07-27  7:23               ` Florian Achleitner
2012-07-28  6:54                 ` Jonathan Nieder
2012-06-29  7:58   ` [RFC 2/4 v2] Integrate remote-svn into svn-fe/Makefile Florian Achleitner
2012-06-29  7:59   ` [RFC 3/4 v2] Add svndump_init_fd to allow reading dumps from arbitrary FDs Florian Achleitner
2012-06-29  8:00   ` [RFC 4/4 v2] Add cat-blob report fifo from fast-import to remote-helper Florian Achleitner
2012-07-21 12:45     ` [RFC 4/4 v3] " Florian Achleitner
2012-07-21 14:48       ` Jonathan Nieder
2012-07-21 15:24         ` Florian Achleitner
2012-07-21 15:44           ` Jonathan Nieder
2012-07-22 21:03             ` Florian Achleitner
2012-07-22 21:24               ` Jonathan Nieder
2012-07-21 15:58           ` Jonathan Nieder

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).