git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH 6/6] Add SVN dump parser
  2010-06-04 13:26 [PATCH 0/6] Merge David's SVN exporter into git.git Ramkumar Ramachandra
@ 2010-06-04 13:26 ` Ramkumar Ramachandra
  0 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-04 13:26 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Junio C Hamano

svndump parses data that is in SVN dumpfile format produced by
`svnadmin dump` with the help of line_buffer, and uses repo_tree and
fast_export to emit a git fast-import stream.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/svndump.c |  294 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h |    7 ++
 2 files changed, 301 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
new file mode 100644
index 0000000..9ee1246
--- /dev/null
+++ b/vcs-svn/svndump.c
@@ -0,0 +1,294 @@
+/*
+ * Parse and rearrange a svnadmin dump.
+ * Create the dump with:
+ * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
+ */
+
+#include "cache.h"
+#include "git-compat-util.h"
+
+#include "repo_tree.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+#define NODEACT_REPLACE 4
+#define NODEACT_DELETE 3
+#define NODEACT_ADD 2
+#define NODEACT_CHANGE 1
+#define NODEACT_UNKNOWN 0
+
+#define DUMP_CTX 0
+#define REV_CTX  1
+#define NODE_CTX 2
+
+#define LENGTH_UNKNOWN (~0)
+#define DATE_RFC2822_LEN 31
+
+/* Create memory pool for log messages */
+obj_pool_gen(log, char, 4096);
+
+static char* log_copy(uint32_t length, char *log)
+{
+	char *buffer;
+	log_free(log_pool.size);
+	buffer = log_pointer(log_alloc(length));
+	strncpy(buffer, log, length);
+	return buffer;
+}
+
+static struct {
+	uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
+	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+} node_ctx;
+
+static struct {
+	uint32_t revision, author;
+	unsigned long timestamp;
+	char *log;
+} rev_ctx;
+
+static struct {
+	uint32_t uuid, url;
+} dump_ctx;
+
+static struct {
+	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
+		revision_number, node_path, node_kind, node_action,
+		node_copyfrom_path, node_copyfrom_rev, text_content_length,
+		prop_content_length, content_length;
+} keys;
+
+static void reset_node_ctx(char *fname)
+{
+	node_ctx.type = 0;
+	node_ctx.action = NODEACT_UNKNOWN;
+	node_ctx.propLength = LENGTH_UNKNOWN;
+	node_ctx.textLength = LENGTH_UNKNOWN;
+	node_ctx.src[0] = ~0;
+	node_ctx.srcRev = 0;
+	node_ctx.srcMode = 0;
+	pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+	node_ctx.mark = 0;
+}
+
+static void reset_rev_ctx(uint32_t revision)
+{
+	rev_ctx.revision = revision;
+	rev_ctx.timestamp = "";
+	rev_ctx.log = NULL;
+	rev_ctx.author = ~0;
+}
+
+static void reset_dump_ctx(uint32_t url)
+{
+	dump_ctx.url = url;
+	dump_ctx.uuid = ~0;
+}
+
+static void init_keys(void)
+{
+	keys.svn_log = pool_intern("svn:log");
+	keys.svn_author = pool_intern("svn:author");
+	keys.svn_date = pool_intern("svn:date");
+	keys.svn_executable = pool_intern("svn:executable");
+	keys.svn_special = pool_intern("svn:special");
+	keys.uuid = pool_intern("UUID");
+	keys.revision_number = pool_intern("Revision-number");
+	keys.node_path = pool_intern("Node-path");
+	keys.node_kind = pool_intern("Node-kind");
+	keys.node_action = pool_intern("Node-action");
+	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
+	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
+	keys.text_content_length = pool_intern("Text-content-length");
+	keys.prop_content_length = pool_intern("Prop-content-length");
+	keys.content_length = pool_intern("Content-length");
+}
+
+static void read_props(void)
+{
+	uint32_t len;
+	uint32_t key = ~0;
+	char buffer[27];
+	char *val = NULL;
+	char *t;
+	while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
+		if (!strncmp(t, "K ", 2)) {
+			len = atoi(&t[2]);
+			key = pool_intern(buffer_read_string(len));
+			buffer_read_line();
+		} else if (!strncmp(t, "V ", 2)) {
+			len = atoi(&t[2]);
+			val = buffer_read_string(len);
+			if (key == keys.svn_log) {
+				/* Value length excludes terminating nul. */
+				rev_ctx.log = log_copy(len + 1, val);
+			} else if (key == keys.svn_author) {
+				rev_ctx.author = pool_intern(val);
+			} else if (key == keys.svn_date) {
+				if (parse_date(val, buffer, sizeof(buffer)) > 0)
+					rev_ctx.timestamp = strtoul(buffer, NULL, 0);
+				else
+					fprintf(stderr, "Invalid timestamp: %s", val);
+			} else if (key == keys.svn_executable) {
+				node_ctx.type = REPO_MODE_EXE;
+			} else if (key == keys.svn_special) {
+				node_ctx.type = REPO_MODE_LNK;
+			}
+			key = ~0;
+			buffer_read_line();
+		}
+	}
+}
+
+static void handle_node(void)
+{
+	if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) {
+		read_props();
+	}
+
+	if (node_ctx.srcRev) {
+		node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+	}
+
+	if (node_ctx.textLength != LENGTH_UNKNOWN &&
+		node_ctx.type != REPO_MODE_DIR) {
+		node_ctx.mark = next_blob_mark();
+	}
+
+	if (node_ctx.action == NODEACT_DELETE) {
+		repo_delete(node_ctx.dst);
+	} else if (node_ctx.action == NODEACT_CHANGE ||
+			   node_ctx.action == NODEACT_REPLACE) {
+		if (node_ctx.action == NODEACT_REPLACE &&
+			node_ctx.type == REPO_MODE_DIR) {
+			repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if (node_ctx.propLength != LENGTH_UNKNOWN ) {
+			repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		}
+	} else if (node_ctx.action == NODEACT_ADD) {
+		if (node_ctx.srcRev &&
+			node_ctx.propLength == LENGTH_UNKNOWN &&
+			node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) ||
+				   node_ctx.textLength != LENGTH_UNKNOWN){
+			repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		}
+	}
+
+	if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) {
+		node_ctx.type = node_ctx.srcMode;
+	}
+
+	if (node_ctx.mark) {
+		fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
+	} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+		buffer_skip_bytes(node_ctx.textLength);
+	}
+}
+
+static void handle_revision(void)
+{
+	if (rev_ctx.revision)
+		repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+		            dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+}
+
+void svndump_read(uint32_t url)
+{
+	char *val;
+	char *t;
+	uint32_t active_ctx = DUMP_CTX;
+	uint32_t len;
+	uint32_t key;
+
+	reset_dump_ctx(url);
+	while ((t = buffer_read_line())) {
+		val = strstr(t, ": ");
+		if (!val) continue;
+		*val++ = '\0';
+		*val++ = '\0';
+		key = pool_intern(t);
+
+		if(key == keys.uuid) {
+			dump_ctx.uuid = pool_intern(val);
+		} else if (key == keys.revision_number) {
+			if (active_ctx == NODE_CTX) handle_node();
+			if (active_ctx != DUMP_CTX) handle_revision();
+			active_ctx = REV_CTX;
+			reset_rev_ctx(atoi(val));
+		} else if (key == keys.node_path) {
+			if (active_ctx == NODE_CTX)
+				handle_node();
+			active_ctx = NODE_CTX;
+			reset_node_ctx(val);
+		} else if (key == keys.node_kind) {
+			if (!strcmp(val, "dir")) {
+				node_ctx.type = REPO_MODE_DIR;
+			} else if (!strcmp(val, "file")) {
+				node_ctx.type = REPO_MODE_BLB;
+			} else {
+				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
+		} else if (key == keys.node_action) {
+			if (!strcmp(val, "delete")) {
+				node_ctx.action = NODEACT_DELETE;
+			} else if (!strcmp(val, "add")) {
+				node_ctx.action = NODEACT_ADD;
+			} else if (!strcmp(val, "change")) {
+				node_ctx.action = NODEACT_CHANGE;
+			} else if (!strcmp(val, "replace")) {
+				node_ctx.action = NODEACT_REPLACE;
+			} else {
+				fprintf(stderr, "Unknown node-action: %s\n", val);
+				node_ctx.action = NODEACT_UNKNOWN;
+			}
+		} else if (key == keys.node_copyfrom_path) {
+			pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+		} else if (key == keys.node_copyfrom_rev) {
+			node_ctx.srcRev = atoi(val);
+		} else if (key == keys.text_content_length) {
+			node_ctx.textLength = atoi(val);
+		} else if (key == keys.prop_content_length) {
+			node_ctx.propLength = atoi(val);
+		} else if (key == keys.content_length) {
+			len = atoi(val);
+			buffer_read_line();
+			if (active_ctx == REV_CTX) {
+				read_props();
+			} else if (active_ctx == NODE_CTX) {
+				handle_node();
+				active_ctx = REV_CTX;
+			} else {
+				fprintf(stderr, "Unexpected content length header: %d\n", len);
+				buffer_skip_bytes(len);
+			}
+		}
+	}
+	if (active_ctx == NODE_CTX) handle_node();
+	if (active_ctx != DUMP_CTX) handle_revision();
+}
+
+static void svndump_init(void)
+{
+	log_init();
+	repo_init();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+	init_keys();
+}
+
+void svndump_reset(void)
+{
+	log_reset();
+	buffer_reset();
+	repo_reset();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+}
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
new file mode 100644
index 0000000..e205f1f
--- /dev/null
+++ b/vcs-svn/svndump.h
@@ -0,0 +1,7 @@
+#ifndef SVNDUMP_H_
+#define SVNDUMP_H_
+
+void svndump_read(char *url);
+void svndump_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 6/6] Add SVN dump parser
  2010-06-04 13:41 [PATCH 0/6] Merge David's SVN exporter Ramkumar Ramachandra
@ 2010-06-04 13:41 ` Ramkumar Ramachandra
  0 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-04 13:41 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

svndump parses data that is in SVN dumpfile format produced by
`svnadmin dump` with the help of line_buffer, and uses repo_tree and
fast_export to emit a git fast-import stream.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/svndump.c |  294 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h |    7 ++
 2 files changed, 301 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
new file mode 100644
index 0000000..9ee1246
--- /dev/null
+++ b/vcs-svn/svndump.c
@@ -0,0 +1,294 @@
+/*
+ * Parse and rearrange a svnadmin dump.
+ * Create the dump with:
+ * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
+ */
+
+#include "cache.h"
+#include "git-compat-util.h"
+
+#include "repo_tree.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+#define NODEACT_REPLACE 4
+#define NODEACT_DELETE 3
+#define NODEACT_ADD 2
+#define NODEACT_CHANGE 1
+#define NODEACT_UNKNOWN 0
+
+#define DUMP_CTX 0
+#define REV_CTX  1
+#define NODE_CTX 2
+
+#define LENGTH_UNKNOWN (~0)
+#define DATE_RFC2822_LEN 31
+
+/* Create memory pool for log messages */
+obj_pool_gen(log, char, 4096);
+
+static char* log_copy(uint32_t length, char *log)
+{
+	char *buffer;
+	log_free(log_pool.size);
+	buffer = log_pointer(log_alloc(length));
+	strncpy(buffer, log, length);
+	return buffer;
+}
+
+static struct {
+	uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
+	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+} node_ctx;
+
+static struct {
+	uint32_t revision, author;
+	unsigned long timestamp;
+	char *log;
+} rev_ctx;
+
+static struct {
+	uint32_t uuid, url;
+} dump_ctx;
+
+static struct {
+	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
+		revision_number, node_path, node_kind, node_action,
+		node_copyfrom_path, node_copyfrom_rev, text_content_length,
+		prop_content_length, content_length;
+} keys;
+
+static void reset_node_ctx(char *fname)
+{
+	node_ctx.type = 0;
+	node_ctx.action = NODEACT_UNKNOWN;
+	node_ctx.propLength = LENGTH_UNKNOWN;
+	node_ctx.textLength = LENGTH_UNKNOWN;
+	node_ctx.src[0] = ~0;
+	node_ctx.srcRev = 0;
+	node_ctx.srcMode = 0;
+	pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+	node_ctx.mark = 0;
+}
+
+static void reset_rev_ctx(uint32_t revision)
+{
+	rev_ctx.revision = revision;
+	rev_ctx.timestamp = "";
+	rev_ctx.log = NULL;
+	rev_ctx.author = ~0;
+}
+
+static void reset_dump_ctx(uint32_t url)
+{
+	dump_ctx.url = url;
+	dump_ctx.uuid = ~0;
+}
+
+static void init_keys(void)
+{
+	keys.svn_log = pool_intern("svn:log");
+	keys.svn_author = pool_intern("svn:author");
+	keys.svn_date = pool_intern("svn:date");
+	keys.svn_executable = pool_intern("svn:executable");
+	keys.svn_special = pool_intern("svn:special");
+	keys.uuid = pool_intern("UUID");
+	keys.revision_number = pool_intern("Revision-number");
+	keys.node_path = pool_intern("Node-path");
+	keys.node_kind = pool_intern("Node-kind");
+	keys.node_action = pool_intern("Node-action");
+	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
+	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
+	keys.text_content_length = pool_intern("Text-content-length");
+	keys.prop_content_length = pool_intern("Prop-content-length");
+	keys.content_length = pool_intern("Content-length");
+}
+
+static void read_props(void)
+{
+	uint32_t len;
+	uint32_t key = ~0;
+	char buffer[27];
+	char *val = NULL;
+	char *t;
+	while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
+		if (!strncmp(t, "K ", 2)) {
+			len = atoi(&t[2]);
+			key = pool_intern(buffer_read_string(len));
+			buffer_read_line();
+		} else if (!strncmp(t, "V ", 2)) {
+			len = atoi(&t[2]);
+			val = buffer_read_string(len);
+			if (key == keys.svn_log) {
+				/* Value length excludes terminating nul. */
+				rev_ctx.log = log_copy(len + 1, val);
+			} else if (key == keys.svn_author) {
+				rev_ctx.author = pool_intern(val);
+			} else if (key == keys.svn_date) {
+				if (parse_date(val, buffer, sizeof(buffer)) > 0)
+					rev_ctx.timestamp = strtoul(buffer, NULL, 0);
+				else
+					fprintf(stderr, "Invalid timestamp: %s", val);
+			} else if (key == keys.svn_executable) {
+				node_ctx.type = REPO_MODE_EXE;
+			} else if (key == keys.svn_special) {
+				node_ctx.type = REPO_MODE_LNK;
+			}
+			key = ~0;
+			buffer_read_line();
+		}
+	}
+}
+
+static void handle_node(void)
+{
+	if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) {
+		read_props();
+	}
+
+	if (node_ctx.srcRev) {
+		node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+	}
+
+	if (node_ctx.textLength != LENGTH_UNKNOWN &&
+		node_ctx.type != REPO_MODE_DIR) {
+		node_ctx.mark = next_blob_mark();
+	}
+
+	if (node_ctx.action == NODEACT_DELETE) {
+		repo_delete(node_ctx.dst);
+	} else if (node_ctx.action == NODEACT_CHANGE ||
+			   node_ctx.action == NODEACT_REPLACE) {
+		if (node_ctx.action == NODEACT_REPLACE &&
+			node_ctx.type == REPO_MODE_DIR) {
+			repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if (node_ctx.propLength != LENGTH_UNKNOWN ) {
+			repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		}
+	} else if (node_ctx.action == NODEACT_ADD) {
+		if (node_ctx.srcRev &&
+			node_ctx.propLength == LENGTH_UNKNOWN &&
+			node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) ||
+				   node_ctx.textLength != LENGTH_UNKNOWN){
+			repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		}
+	}
+
+	if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) {
+		node_ctx.type = node_ctx.srcMode;
+	}
+
+	if (node_ctx.mark) {
+		fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
+	} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+		buffer_skip_bytes(node_ctx.textLength);
+	}
+}
+
+static void handle_revision(void)
+{
+	if (rev_ctx.revision)
+		repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+		            dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+}
+
+void svndump_read(uint32_t url)
+{
+	char *val;
+	char *t;
+	uint32_t active_ctx = DUMP_CTX;
+	uint32_t len;
+	uint32_t key;
+
+	reset_dump_ctx(url);
+	while ((t = buffer_read_line())) {
+		val = strstr(t, ": ");
+		if (!val) continue;
+		*val++ = '\0';
+		*val++ = '\0';
+		key = pool_intern(t);
+
+		if(key == keys.uuid) {
+			dump_ctx.uuid = pool_intern(val);
+		} else if (key == keys.revision_number) {
+			if (active_ctx == NODE_CTX) handle_node();
+			if (active_ctx != DUMP_CTX) handle_revision();
+			active_ctx = REV_CTX;
+			reset_rev_ctx(atoi(val));
+		} else if (key == keys.node_path) {
+			if (active_ctx == NODE_CTX)
+				handle_node();
+			active_ctx = NODE_CTX;
+			reset_node_ctx(val);
+		} else if (key == keys.node_kind) {
+			if (!strcmp(val, "dir")) {
+				node_ctx.type = REPO_MODE_DIR;
+			} else if (!strcmp(val, "file")) {
+				node_ctx.type = REPO_MODE_BLB;
+			} else {
+				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
+		} else if (key == keys.node_action) {
+			if (!strcmp(val, "delete")) {
+				node_ctx.action = NODEACT_DELETE;
+			} else if (!strcmp(val, "add")) {
+				node_ctx.action = NODEACT_ADD;
+			} else if (!strcmp(val, "change")) {
+				node_ctx.action = NODEACT_CHANGE;
+			} else if (!strcmp(val, "replace")) {
+				node_ctx.action = NODEACT_REPLACE;
+			} else {
+				fprintf(stderr, "Unknown node-action: %s\n", val);
+				node_ctx.action = NODEACT_UNKNOWN;
+			}
+		} else if (key == keys.node_copyfrom_path) {
+			pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+		} else if (key == keys.node_copyfrom_rev) {
+			node_ctx.srcRev = atoi(val);
+		} else if (key == keys.text_content_length) {
+			node_ctx.textLength = atoi(val);
+		} else if (key == keys.prop_content_length) {
+			node_ctx.propLength = atoi(val);
+		} else if (key == keys.content_length) {
+			len = atoi(val);
+			buffer_read_line();
+			if (active_ctx == REV_CTX) {
+				read_props();
+			} else if (active_ctx == NODE_CTX) {
+				handle_node();
+				active_ctx = REV_CTX;
+			} else {
+				fprintf(stderr, "Unexpected content length header: %d\n", len);
+				buffer_skip_bytes(len);
+			}
+		}
+	}
+	if (active_ctx == NODE_CTX) handle_node();
+	if (active_ctx != DUMP_CTX) handle_revision();
+}
+
+static void svndump_init(void)
+{
+	log_init();
+	repo_init();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+	init_keys();
+}
+
+void svndump_reset(void)
+{
+	log_reset();
+	buffer_reset();
+	repo_reset();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+}
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
new file mode 100644
index 0000000..e205f1f
--- /dev/null
+++ b/vcs-svn/svndump.h
@@ -0,0 +1,7 @@
+#ifndef SVNDUMP_H_
+#define SVNDUMP_H_
+
+void svndump_read(char *url);
+void svndump_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 0/6] Another attempt to get the SVN exporter merged
@ 2010-06-10 13:09 Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 1/6] Add memory pool library Ramkumar Ramachandra
                   ` (7 more replies)
  0 siblings, 8 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

Hi,

It certainly looks like I'll never give up trying to get this series
merged- this is my third attempt :) Much thanks to David for all the
awesome code, and to Jonathan for all the detailed timely
reviews. Hopefully, this time the series will have fewer mistakes and
will actually be merged. Sadly, I still haven't been able to get rid
of the compiler warnings about unused functions, and it looks like
this series won't graduate to `master` before that happens- I'd
appreciate pointers on how to do this.

Please feel free to nitpick every little detail, but please don't use
that as an excuse to hold up the series. I'd appreciate some acks from
everyone who has been involved with this to make it easier for Junio
to decide.

Major change since last time: Removed dependency on mmap for
portability reasons.

Thanks!

-- Ram

David Barr (5):
  Add memory pool library
  Add library for string-specific memory pool
  Add stream helper library
  Add infrastructure to write revisions in fast-export format
  Add SVN dump parser

Jason Evans (1):
  Add cpp macro implementation of treaps

 vcs-svn/fast_export.c |   74 +++++++++++
 vcs-svn/fast_export.h |   14 ++
 vcs-svn/line_buffer.c |  134 ++++++++++++++++++++
 vcs-svn/line_buffer.h |   14 ++
 vcs-svn/obj_pool.h    |   90 +++++++++++++
 vcs-svn/repo_tree.c   |  335 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/repo_tree.h   |   26 ++++
 vcs-svn/string_pool.c |  116 +++++++++++++++++
 vcs-svn/string_pool.h |   15 +++
 vcs-svn/svndump.c     |  298 +++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h     |    7 +
 vcs-svn/trp.h         |  201 +++++++++++++++++++++++++++++
 vcs-svn/trp.txt       |   61 +++++++++
 13 files changed, 1385 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/fast_export.c
 create mode 100644 vcs-svn/fast_export.h
 create mode 100644 vcs-svn/line_buffer.c
 create mode 100644 vcs-svn/line_buffer.h
 create mode 100644 vcs-svn/obj_pool.h
 create mode 100644 vcs-svn/repo_tree.c
 create mode 100644 vcs-svn/repo_tree.h
 create mode 100644 vcs-svn/string_pool.c
 create mode 100644 vcs-svn/string_pool.h
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h
 create mode 100644 vcs-svn/trp.h
 create mode 100644 vcs-svn/trp.txt

^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH 1/6] Add memory pool library
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-12  6:42   ` Jonathan Nieder
  2010-06-10 13:09 ` [PATCH 2/6] Add cpp macro implementation of treaps Ramkumar Ramachandra
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

Add a memory pool library implemented using cpp macros. The library
provides macros that can be used to create a type-specific memory pool
API.

The memory pool library is distinguished from the existing specialized
allocators in alloc.c by using a contiguous block for all allocations.
This means that on one hand, long-lived pointers have to be written as
offsets, since the base address changes as the pool grows, but on the
other hand, the entire pool can be easily written to the file system.
This allows the memory pool to persist between runs of an application.

For svn-fe, such a facility is useful because each svn revision can
copy trees and files from any previous revision.  Therefore the
relevant information for all revisions has to persist somehow to
support incremental runs.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/obj_pool.h |   90 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 90 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/obj_pool.h

diff --git a/vcs-svn/obj_pool.h b/vcs-svn/obj_pool.h
new file mode 100644
index 0000000..29417b3
--- /dev/null
+++ b/vcs-svn/obj_pool.h
@@ -0,0 +1,90 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#ifndef OBJ_POOL_H_
+#define OBJ_POOL_H_
+
+#include "git-compat-util.h"
+
+/*
+ * The obj_pool_gen() macro generates a type-specific memory pool
+ * implementation.
+ *
+ * Arguments:
+ *
+ *   pre              : Prefix for generated functions (ex: string_).
+ *   obj_t            : Type for treap data structure (ex: char).
+ *   intial_capacity  : The initial size of the memory pool (ex: 4096).
+ *
+ */
+#define obj_pool_gen(pre, obj_t, initial_capacity) \
+static struct { \
+	uint32_t committed; \
+	uint32_t size; \
+	uint32_t capacity; \
+	obj_t *base; \
+	FILE *file; \
+} pre##_pool = { 0, 0, 0, NULL, NULL}; \
+static void pre##_init(void) \
+{ \
+	struct stat st; \
+	pre##_pool.file = fopen(#pre ".bin", "a+"); \
+	rewind(pre##_pool.file); \
+	fstat(fileno(pre##_pool.file), &st); \
+	pre##_pool.size = st.st_size / sizeof(obj_t); \
+	pre##_pool.committed = pre##_pool.size; \
+	pre##_pool.capacity = pre##_pool.size * 2; \
+	if (pre##_pool.capacity < initial_capacity) \
+		pre##_pool.capacity = initial_capacity; \
+	pre##_pool.base = malloc(pre##_pool.capacity * sizeof(obj_t)); \
+	fread(pre##_pool.base, sizeof(obj_t), pre##_pool.size, pre##_pool.file); \
+} \
+static uint32_t pre##_alloc(uint32_t count) \
+{ \
+	uint32_t offset; \
+	if (pre##_pool.size + count > pre##_pool.capacity) { \
+		while (pre##_pool.size + count > pre##_pool.capacity) \
+			if (pre##_pool.capacity) \
+				pre##_pool.capacity *= 2; \
+			else \
+				pre##_pool.capacity = initial_capacity; \
+		pre##_pool.base = realloc(pre##_pool.base, \
+					pre##_pool.capacity * sizeof(obj_t)); \
+	} \
+	offset = pre##_pool.size; \
+	pre##_pool.size += count; \
+	return offset; \
+} \
+static void pre##_free(uint32_t count) \
+{ \
+	pre##_pool.size -= count; \
+} \
+static uint32_t pre##_offset(obj_t *obj) \
+{ \
+	return obj == NULL ? ~0 : obj - pre##_pool.base; \
+} \
+static obj_t *pre##_pointer(uint32_t offset) \
+{ \
+	return offset >= pre##_pool.size ? NULL : &pre##_pool.base[offset]; \
+} \
+static void pre##_commit(void) \
+{ \
+	pre##_pool.committed += fwrite(pre##_pool.base + pre##_pool.committed, \
+		sizeof(obj_t), pre##_pool.size - pre##_pool.committed, \
+		pre##_pool.file); \
+} \
+static void pre##_reset(void) \
+{ \
+	if (pre##_pool.base) { \
+		free(pre##_pool.base); \
+		fclose(pre##_pool.file); \
+	} \
+	pre##_pool.base = NULL; \
+	pre##_pool.size = 0; \
+	pre##_pool.capacity = 0; \
+	pre##_pool.file = NULL; \
+}
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 2/6] Add cpp macro implementation of treaps
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 1/6] Add memory pool library Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 3/6] Add library for string-specific memory pool Ramkumar Ramachandra
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: Jason Evans <jasone@canonware.com>

The implementation exposes an API to generate type-specific treap
implmentation and various functions to operate on it. It uses
obj_pool.h to store memory nodes in a treap.

Treaps provide a memory-efficient binary search tree structure.
Insertion/deletion/search are about as about as fast in the average
case as red-black trees and the chances of worst-case behavior are
vanishingly small, thanks to (pseudo-)randomness.  That is a small
price to pay, given that treaps are much simpler to implement.

[db: Altered to reference nodes by offset from a common base pointer]
[db: Bob Jenkins' hashing implementation dropped for Knuth's]
[db: Methods unnecessary for search and insert dropped]

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/trp.h   |  201 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/trp.txt |   61 +++++++++++++++++
 2 files changed, 262 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/trp.h
 create mode 100644 vcs-svn/trp.txt

diff --git a/vcs-svn/trp.h b/vcs-svn/trp.h
new file mode 100644
index 0000000..50fde39
--- /dev/null
+++ b/vcs-svn/trp.h
@@ -0,0 +1,201 @@
+/*
+ * cpp macro implementation of treaps.
+ *
+ * Usage:
+ *   #include <stdint.h>
+ *   #include <trp.h>
+ *   trp_gen(...)
+ *
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#ifndef TRP_H_
+#define TRP_H_
+
+/* Node structure. */
+struct trp_node {
+	uint32_t trpn_left;
+	uint32_t trpn_right;
+};
+
+/* Root structure. */
+struct trp_root {
+	uint32_t trp_root;
+};
+
+/* Pointer/Offset conversion */
+#define trpn_pointer(a_base, a_offset) (a_base##_pointer(a_offset))
+#define trpn_offset(a_base, a_pointer) (a_base##_offset(a_pointer))
+#define trpn_modify(a_base, a_offset) \
+	do { \
+		if ((a_offset) < a_base##_pool.committed) { \
+			uint32_t old_offset = (a_offset);\
+			(a_offset) = a_base##_alloc(1); \
+			*trpn_pointer(a_base, a_offset) = \
+				*trpn_pointer(a_base, old_offset); \
+		} \
+	} while (0);
+
+/* Left accessors. */
+#define trp_left_get(a_base, a_field, a_node) \
+	(trpn_pointer(a_base, a_node)->a_field.trpn_left)
+#define trp_left_set(a_base, a_field, a_node, a_left) \
+	do { trpn_modify(a_base, a_node); \
+	trp_left_get(a_base, a_field, a_node) = (a_left); } while(0)
+
+/* Right accessors. */
+#define trp_right_get(a_base, a_field, a_node) \
+	(trpn_pointer(a_base, a_node)->a_field.trpn_right)
+#define trp_right_set(a_base, a_field, a_node, a_right) \
+	do { trpn_modify(a_base, a_node); \
+	trp_right_get(a_base, a_field, a_node) = (a_right); } while(0)
+
+/* Priority accessors. */
+#define KNUTH_GOLDEN_RATIO_32BIT 2654435761u
+#define trp_prio_get(a_node) \
+	(KNUTH_GOLDEN_RATIO_32BIT*(a_node))
+
+/* Node initializer. */
+#define trp_node_new(a_base, a_field, a_node) \
+	trp_left_set(a_base, a_field, (a_node), ~0); \
+	trp_right_set(a_base, a_field, (a_node), ~0)
+
+/* Internal utility macros. */
+#define trpn_first(a_base, a_field, a_root, r_node) \
+	do { \
+		(r_node) = (a_root); \
+		if ((r_node) == ~0) \
+			return NULL; \
+		while (~trp_left_get(a_base, a_field, (r_node))) \
+			(r_node) = trp_left_get(a_base, a_field, (r_node)); \
+	} while (0)
+
+#define trpn_rotate_left(a_base, a_field, a_node, r_node) \
+	do { (r_node) = trp_right_get(a_base, a_field, (a_node)); \
+	trp_right_set(a_base, a_field, (a_node), \
+		trp_left_get(a_base, a_field, (r_node))); \
+	trp_left_set(a_base, a_field, (r_node), (a_node)); } while(0)
+
+#define trpn_rotate_right(a_base, a_field, a_node, r_node) \
+	do { (r_node) = trp_left_get(a_base, a_field, (a_node)); \
+	trp_left_set(a_base, a_field, (a_node), \
+		trp_right_get(a_base, a_field, (r_node))); \
+	trp_right_set(a_base, a_field, (r_node), (a_node)); } while(0)
+
+#define trp_gen(a_attr, a_pre, a_type, a_field, a_base, a_cmp) \
+a_attr a_type *a_pre##first(struct trp_root *treap) \
+{ \
+	uint32_t ret; \
+	trpn_first(a_base, a_field, treap->trp_root, ret); \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type *a_pre##next(struct trp_root *treap, a_type *node) { \
+	uint32_t ret; \
+	uint32_t offset = trpn_offset(a_base, node); \
+	if (~trp_right_get(a_base, a_field, offset)) { \
+		trpn_first(a_base, a_field, \
+			trp_right_get(a_base, a_field, offset), ret); \
+	} else { \
+		uint32_t tnode = treap->trp_root; \
+		ret = ~0; \
+		while (1) { \
+			int cmp = (a_cmp)(trpn_pointer(a_base, offset), \
+				trpn_pointer(a_base, tnode)); \
+			if (cmp < 0) { \
+				ret = tnode; \
+				tnode = trp_left_get(a_base, a_field, tnode); \
+			} else if (cmp > 0) { \
+				tnode = trp_right_get(a_base, a_field, tnode); \
+			} else { \
+				break; \
+			} \
+		} \
+	} \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr a_type *a_pre##search(struct trp_root *treap, a_type *key) \
+{ \
+	int cmp; \
+	uint32_t ret = treap->trp_root; \
+	while (~ret && (cmp = (a_cmp)(key, trpn_pointer(a_base,ret)))) \
+		if (cmp < 0) \
+			ret = trp_left_get(a_base, a_field, ret); \
+		else \
+			ret = trp_right_get(a_base, a_field, ret); \
+	return trpn_pointer(a_base, ret); \
+} \
+a_attr uint32_t a_pre##insert_recurse(uint32_t cur_node, uint32_t ins_node) \
+{ \
+	if (cur_node == ~0) \
+		return (ins_node); \
+	else { \
+		uint32_t ret; \
+		int cmp = (a_cmp)(trpn_pointer(a_base, ins_node), \
+					trpn_pointer(a_base, cur_node)); \
+		if (cmp < 0) { \
+			uint32_t left = a_pre##insert_recurse( \
+				trp_left_get(a_base, a_field, cur_node), ins_node); \
+			trp_left_set(a_base, a_field, cur_node, left); \
+			if (trp_prio_get(left) < trp_prio_get(cur_node)) \
+				trpn_rotate_right(a_base, a_field, cur_node, ret); \
+			else \
+				ret = cur_node; \
+		} else { \
+			uint32_t right = a_pre##insert_recurse( \
+				trp_right_get(a_base, a_field, cur_node), ins_node); \
+			trp_right_set(a_base, a_field, cur_node, right); \
+			if (trp_prio_get(right) < trp_prio_get(cur_node)) \
+				trpn_rotate_left(a_base, a_field, cur_node, ret); \
+			else \
+				ret = cur_node; \
+		} \
+		return (ret); \
+	} \
+} \
+a_attr void a_pre##insert(struct trp_root *treap, a_type *node) \
+{ \
+	uint32_t offset = trpn_offset(a_base, node); \
+	trp_node_new(a_base, a_field, offset); \
+	treap->trp_root = a_pre##insert_recurse( treap->trp_root, offset); \
+} \
+a_attr uint32_t a_pre##remove_recurse(uint32_t cur_node, uint32_t rem_node) \
+{ \
+	int cmp = a_cmp(trpn_pointer(a_base, rem_node), \
+			trpn_pointer(a_base, cur_node)); \
+	if (cmp == 0) { \
+		uint32_t ret; \
+		uint32_t left = trp_left_get(a_base, a_field, cur_node); \
+		uint32_t right = trp_right_get(a_base, a_field, cur_node); \
+		if (left == ~0) { \
+			if (right == ~0) \
+				return (~0); \
+		} else if (right == ~0 || trp_prio_get(left) < trp_prio_get(right)) { \
+			trpn_rotate_right(a_base, a_field, cur_node, ret); \
+			right = a_pre##remove_recurse(cur_node, rem_node); \
+			trp_right_set(a_base, a_field, ret, right); \
+			return (ret); \
+		} \
+		trpn_rotate_left(a_base, a_field, cur_node, ret); \
+		left = a_pre##remove_recurse(cur_node, rem_node); \
+		trp_left_set(a_base, a_field, ret, left); \
+		return (ret); \
+	} else if (cmp < 0) { \
+		uint32_t left = a_pre##remove_recurse( \
+			trp_left_get(a_base, a_field, cur_node), rem_node); \
+		trp_left_set(a_base, a_field, cur_node, left); \
+		return (cur_node); \
+	} else { \
+		uint32_t right = a_pre##remove_recurse( \
+			trp_right_get(a_base, a_field, cur_node), rem_node); \
+		trp_right_set(a_base, a_field, cur_node, right); \
+		return (cur_node); \
+	} \
+} \
+a_attr void a_pre##remove(struct trp_root *treap, a_type *node) \
+{ \
+	treap->trp_root = a_pre##remove_recurse(treap->trp_root, \
+		trpn_offset(a_base, node)); \
+} \
+
+#endif
diff --git a/vcs-svn/trp.txt b/vcs-svn/trp.txt
new file mode 100644
index 0000000..b7e2d18
--- /dev/null
+++ b/vcs-svn/trp.txt
@@ -0,0 +1,61 @@
+TODO: Update this documentation to match the changes to trp.h
+
+The trp_gen() macro generates a type-specific treap implementation,
+based on the above cpp macros.
+
+Arguments:
+
+  a_attr     : Function attribute for generated functions (ex: static).
+  a_pre      : Prefix for generated functions (ex: treap_).
+  a_t_type   : Type for treap data structure (ex: treap_t).
+  a_type     : Type for treap node data structure (ex: treap_node_t).
+  a_field    : Name of treap node linkage (ex: treap_link).
+  a_base     : Expression for the base pointer from which nodes are offset.
+  a_cmp      : Node comparison function name, with the following prototype:
+                 int (a_cmp *)(a_type *a_node, a_type *a_other);
+                                       ^^^^^^
+                                    or a_key
+               Interpretation of comparision function return values:
+                 -1 : a_node <  a_other
+                  0 : a_node == a_other
+                  1 : a_node >  a_other
+               In all cases, the a_node or a_key macro argument is the first
+               argument to the comparison function, which makes it possible
+               to write comparison functions that treat the first argument
+               specially.
+
+Assuming the following setup:
+
+  typedef struct ex_node_s ex_node_t;
+  struct ex_node_s {
+      trp_node(ex_node_t) ex_link;
+  };
+  typedef trp(ex_node_t) ex_t;
+  static ex_node_t ex_base[MAX_NODES];
+  trp_gen(static, ex_, ex_t, ex_node_t, ex_link, ex_base, ex_cmp)
+
+The following API is generated:
+
+  static void
+  ex_new(ex_t *treap);
+      Description: Initialize a treap structure.
+      Args:
+        treap: Pointer to an uninitialized treap object.
+
+  static ex_node_t *
+  ex_psearch(ex_t *treap, ex_node_t *key);
+      Description: Search for node that matches key.  If no match is found,
+                   return what would be key's successor/predecessor, were
+                   key in treap.
+      Args:
+        treap: Pointer to a initialized treap object.
+        key  : Search key.
+      Ret: Node in treap that matches key, or if no match, hypothetical
+           node's successor/predecessor (NULL if no successor/predecessor).
+
+  static void
+  ex_insert(ex_t *treap, ex_node_t *node);
+      Description: Insert node into treap.
+      Args:
+        treap: Pointer to a initialized treap object.
+        node : Node to be inserted into treap.
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 3/6] Add library for string-specific memory pool
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 1/6] Add memory pool library Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 2/6] Add cpp macro implementation of treaps Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-11 19:33   ` Junio C Hamano
  2010-06-10 13:09 ` [PATCH 4/6] Add stream helper library Ramkumar Ramachandra
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

This library uses the macros in the obj_pool.h and trp.h to create a
memory pool for strings and expose an API for handling them.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/string_pool.c |  116 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/string_pool.h |   15 ++++++
 2 files changed, 131 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/string_pool.c
 create mode 100644 vcs-svn/string_pool.h

diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
new file mode 100644
index 0000000..523a6cc
--- /dev/null
+++ b/vcs-svn/string_pool.c
@@ -0,0 +1,116 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+
+#include "trp.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+typedef struct node_s node_t;
+static struct trp_root tree = { ~0 };
+
+struct node_s {
+	uint32_t offset;
+	struct trp_node children;
+};
+
+/* Create two memory pools: one for node_t, and another for strings */
+obj_pool_gen(node, node_t, 4096);
+obj_pool_gen(string, char, 4096);
+
+static char *node_value(node_t *node)
+{
+	return node ? string_pointer(node->offset) : NULL;
+}
+
+static int node_cmp(node_t *a, node_t *b)
+{
+	return strcmp(node_value(a), node_value(b));
+}
+
+/* Build a Treap from the node_s structure (a trp_node w/ offset) */
+trp_gen(static, tree_, node_t, children, node, node_cmp);
+
+char *pool_fetch(uint32_t entry)
+{
+	return node_value(node_pointer(entry));
+}
+
+uint32_t pool_intern(char *key)
+{
+	/* Canonicalize key */
+	node_t *match = NULL;
+	uint32_t key_len;
+	if (key == NULL)
+		return ~0;
+	key_len = strlen(key) + 1;
+	node_t *node = node_pointer(node_alloc(1));
+	node->offset = string_alloc(key_len);
+	strcpy(node_value(node), key);
+	match = tree_search(&tree, node);
+	if (!match) {
+		tree_insert(&tree, node);
+	} else {
+		node_free(1);
+		string_free(key_len);
+		node = match;
+	}
+	return node_offset(node);
+}
+
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
+{
+	char *token = strtok_r(str, delim, saveptr);
+	return token ? pool_intern(token) : ~0;
+}
+
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
+{
+	uint32_t i;
+	for (i = 0; i < len && ~seq[i]; i++) {
+		fputs(pool_fetch(seq[i]), stream);
+		if (i < len - 1 && ~seq[i + 1])
+			fputc(delim, stream);
+	}
+}
+
+uint32_t pool_tok_seq(uint32_t max, uint32_t *seq, char *delim, char *str)
+{
+	char *context = NULL;
+	uint32_t length = 0, token = str ? pool_tok_r(str, delim, &context) : ~0;
+	while (length < max) {
+		seq[length++] = token;
+		if (token == ~0)
+			break;
+		token = pool_tok_r(NULL, delim, &context);
+	}
+	seq[length ? length - 1 : 0] = ~0;
+	return length;
+}
+
+void pool_init(void)
+{
+	uint32_t node;
+	uint32_t string = 0;
+	string_init();
+	while (string < string_pool.size) {
+		node = node_alloc(1);
+		node_pointer(node)->offset = string;
+		tree_insert(&tree, node_pointer(node));
+		string += strlen(string_pointer(string)) + 1;
+	}
+}
+
+void pool_commit(void)
+{
+	string_commit();
+}
+
+void pool_reset(void)
+{
+	node_reset();
+	string_reset();
+}
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
new file mode 100644
index 0000000..a69d165
--- /dev/null
+++ b/vcs-svn/string_pool.h
@@ -0,1 +1,15 @@
+#ifndef STRING_POOL_H_
+#define STRING_POOL_H_
+
+#include "git-compat-util.h"
+
+uint32_t pool_intern(char *key);
+char *pool_fetch(uint32_t entry);
+uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
+void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
+uint32_t pool_tok_seq(uint32_t max, uint32_t *seq, char *delim, char *str);
+void pool_init(void);
+void pool_commit(void);
+void pool_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 4/6] Add stream helper library
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
                   ` (2 preceding siblings ...)
  2010-06-10 13:09 ` [PATCH 3/6] Add library for string-specific memory pool Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 5/6] Add infrastructure to write revisions in fast-export format Ramkumar Ramachandra
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

This library provides facilities to read streams into buffers. It
maintains a couple of static buffers and provides an API to use them.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/line_buffer.c |  134 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/line_buffer.h |   14 +++++
 2 files changed, 148 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/line_buffer.c
 create mode 100644 vcs-svn/line_buffer.h

diff --git a/vcs-svn/line_buffer.c b/vcs-svn/line_buffer.c
new file mode 100644
index 0000000..8ff1fb6
--- /dev/null
+++ b/vcs-svn/line_buffer.c
@@ -0,0 +1,134 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+
+#include "line_buffer.h"
+#include "obj_pool.h"
+
+#define LINE_BUFFER_LEN 10000
+#define COPY_BUFFER_LEN 4096
+
+/* Create memory pool for char sequence of known length */
+obj_pool_gen(blob, char, 4096);
+
+static char line_buffer[LINE_BUFFER_LEN];
+static char byte_buffer[COPY_BUFFER_LEN];
+static uint32_t line_buffer_len = 0;
+static uint32_t line_len = 0;
+static FILE *infile;
+
+int buffer_init(char *filename)
+{
+	infile = fopen(filename, "r");
+	if(!infile)
+		return 1;
+	return 0;
+}
+
+int buffer_deinit()
+{
+	fclose(infile);
+	return 0;
+}
+
+char *buffer_read_line(void)
+{
+	char *end;
+	uint32_t n_read;
+
+	if (line_len) {
+		memmove(line_buffer, &line_buffer[line_len],
+			line_buffer_len - line_len);
+		line_buffer_len -= line_len;
+		line_len = 0;
+	}
+
+	end = memchr(line_buffer, '\n', line_buffer_len);
+	while (line_buffer_len < LINE_BUFFER_LEN - 1 &&
+	       !feof(infile) && ferror(infile) && NULL == end) {
+		n_read = fread(&line_buffer[line_buffer_len], 1,
+			       LINE_BUFFER_LEN - 1 - line_buffer_len,
+			       infile);
+		end = memchr(&line_buffer[line_buffer_len], '\n', n_read);
+		line_buffer_len += n_read;
+	}
+
+	if (ferror(infile))
+		return NULL;
+
+	if (end != NULL) {
+		line_len = end - line_buffer;
+		line_buffer[line_len++] = '\0';
+	} else {
+		line_len = line_buffer_len;
+		line_buffer[line_buffer_len] = '\0';
+	}
+
+	if (line_len == 0)
+		return NULL;
+
+	return line_buffer;
+}
+
+char *buffer_read_string(uint32_t len)
+{
+	char *s;
+	blob_free(blob_pool.size);
+	s = blob_pointer(blob_alloc(len + 1));
+	uint32_t offset = 0;
+	if (line_buffer_len > line_len) {
+		offset = line_buffer_len - line_len;
+		if (offset > len)
+			offset = len;
+		memcpy(s, &line_buffer[line_len], offset);
+		line_len += offset;
+	}
+	if (offset < len)
+		offset += fread(&s[offset], 1, len - offset, infile);
+	s[offset] = '\0';
+	return s;
+}
+
+void buffer_copy_bytes(uint32_t len)
+{
+	uint32_t in;
+	if (line_buffer_len > line_len) {
+		in = line_buffer_len - line_len;
+		if (in > len)
+			in = len;
+		fwrite(&line_buffer[line_len], 1, in, stdout);
+		len -= in;
+		line_len += in;
+	}
+	while (len > 0 && !feof(infile)) {
+		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+		in = fread(byte_buffer, 1, in, infile);
+		len -= in;
+		fwrite(byte_buffer, 1, in, stdout);
+	}
+}
+
+void buffer_skip_bytes(uint32_t len)
+{
+	uint32_t in;
+	if (line_buffer_len > line_len) {
+		in = line_buffer_len - line_len;
+		if (in > len)
+			in = len;
+		line_len += in;
+		len -= in;
+	}
+	while (len > 0 && !feof(infile) && !ferror(infile)) {
+		in = len < COPY_BUFFER_LEN ? len : COPY_BUFFER_LEN;
+		in = fread(byte_buffer, 1, in, infile);
+		len -= in;
+	}
+}
+
+void buffer_reset(void)
+{
+	blob_reset();
+}
diff --git a/vcs-svn/line_buffer.h b/vcs-svn/line_buffer.h
new file mode 100644
index 0000000..a6c42d7
--- /dev/null
+++ b/vcs-svn/line_buffer.h
@@ -0,1 +1,14 @@
+#ifndef LINE_BUFFER_H_
+#define LINE_BUFFER_H_
+
+#include "git-compat-util.h"
+
+int buffer_init(char *filename);
+int buffer_deinit(void);
+char *buffer_read_line(void);
+char *buffer_read_string(uint32_t len);
+void buffer_copy_bytes(uint32_t len);
+void buffer_skip_bytes(uint32_t len);
+void buffer_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 5/6] Add infrastructure to write revisions in fast-export format
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
                   ` (3 preceding siblings ...)
  2010-06-10 13:09 ` [PATCH 4/6] Add stream helper library Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-10 13:09 ` [PATCH 6/6] Add SVN dump parser Ramkumar Ramachandra
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

repo_tree maintains the exporter's state and provides a facility to
to call fast_export, which then writes objects to stdout suitable for
consumption by git-fast-import.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/fast_export.c |   74 +++++++++++
 vcs-svn/fast_export.h |   14 ++
 vcs-svn/repo_tree.c   |  335 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/repo_tree.h   |   26 ++++
 4 files changed, 449 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/fast_export.c
 create mode 100644 vcs-svn/fast_export.h
 create mode 100644 vcs-svn/repo_tree.c
 create mode 100644 vcs-svn/repo_tree.h

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
new file mode 100644
index 0000000..0fe6672
--- /dev/null
+++ b/vcs-svn/fast_export.c
@@ -0,0 +1,74 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "repo_tree.h"
+#include "string_pool.h"
+
+#define MAX_GITSVN_LINE_LEN 4096
+
+static uint32_t first_commit_done;
+
+void fast_export_delete(uint32_t depth, uint32_t *path)
+{
+	putchar('D');
+	putchar(' ');
+	pool_print_seq(depth, path, '/', stdout);
+	putchar('\n');
+}
+
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+						uint32_t mark)
+{
+	printf("M %06o :%d ", mode, mark);
+	pool_print_seq(depth, path, '/', stdout);
+	putchar('\n');
+}
+
+static char gitsvnline[MAX_GITSVN_LINE_LEN];
+void fast_export_commit(uint32_t revision, uint32_t author, char *log,
+			uint32_t uuid, uint32_t url,
+			unsigned long timestamp)
+{
+	if (!log)
+		log = "";
+	if (~uuid && ~url) {
+		snprintf(gitsvnline, MAX_GITSVN_LINE_LEN, "\n\ngit-svn-id: %s@%d %s\n",
+				 pool_fetch(url), revision, pool_fetch(uuid));
+	} else {
+		*gitsvnline = '\0';
+	}
+	printf("commit refs/heads/master\n");
+	printf("committer %s <%s@%s> %ld +0000\n",
+		   ~author ? pool_fetch(author) : "nobody",
+		   ~author ? pool_fetch(author) : "nobody",
+		   ~uuid ? pool_fetch(uuid) : "local", timestamp);
+	printf("data %zd\n%s%s\n",
+		   strlen(log) + strlen(gitsvnline), log, gitsvnline);
+	if (!first_commit_done) {
+		if (revision > 1)
+			printf("from refs/heads/master^0\n");
+		first_commit_done = 1;
+	}
+	repo_diff(revision - 1, revision);
+	fputc('\n', stdout);
+
+	printf("progress Imported commit %d.\n\n", revision);
+}
+
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len)
+{
+	if (mode == REPO_MODE_LNK) {
+		/* svn symlink blobs start with "link " */
+		buffer_skip_bytes(5);
+		len -= 5;
+	}
+	printf("blob\nmark :%d\ndata %d\n", mark, len);
+	buffer_copy_bytes(len);
+	fputc('\n', stdout);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
new file mode 100644
index 0000000..47e8f56
--- /dev/null
+++ b/vcs-svn/fast_export.h
@@ -0,0 +1,14 @@
+#ifndef FAST_EXPORT_H_
+#define FAST_EXPORT_H_
+
+#include <stdint.h>
+#include <time.h>
+
+void fast_export_delete(uint32_t depth, uint32_t *path);
+void fast_export_modify(uint32_t depth, uint32_t *path, uint32_t mode,
+			uint32_t mark);
+void fast_export_commit(uint32_t revision, uint32_t author, char *log,
+			uint32_t uuid, uint32_t url, unsigned long timestamp);
+void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len);
+
+#endif
diff --git a/vcs-svn/repo_tree.c b/vcs-svn/repo_tree.c
new file mode 100644
index 0000000..6565779
--- /dev/null
+++ b/vcs-svn/repo_tree.c
@@ -0,0 +1,335 @@
+/*
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "git-compat-util.h"
+
+#include "string_pool.h"
+#include "repo_tree.h"
+#include "obj_pool.h"
+#include "fast_export.h"
+
+#include "trp.h"
+
+struct repo_dirent {
+	uint32_t name_offset;
+	struct trp_node children;
+	uint32_t mode;
+	uint32_t content_offset;
+};
+
+struct repo_dir {
+	struct trp_root entries;
+};
+
+struct repo_commit {
+	uint32_t root_dir_offset;
+};
+
+/* Generate memory pools for commit, dir and dirent */
+obj_pool_gen(commit, struct repo_commit, 4096);
+obj_pool_gen(dir, struct repo_dir, 4096);
+obj_pool_gen(dirent, struct repo_dirent, 4096);
+
+static int repo_dirent_name_cmp(const void *a, const void *b);
+
+/* Build a Treap from the node_s structure (a trp_node w/ offset) */
+trp_gen(static, dirent_, struct repo_dirent, children, dirent, repo_dirent_name_cmp);
+
+static uint32_t active_commit;
+static uint32_t _mark;
+
+uint32_t next_blob_mark(void)
+{
+	return _mark++;
+}
+
+static struct repo_dir *repo_commit_root_dir(struct repo_commit *commit)
+{
+	return dir_pointer(commit->root_dir_offset);
+}
+
+static struct repo_dirent *repo_first_dirent(struct repo_dir *dir)
+{
+	return dirent_first(&dir->entries);
+}
+
+static int repo_dirent_name_cmp(const void *a, const void *b)
+{
+	const struct repo_dirent *dirent1 = a, *dirent2 = b;
+	uint32_t a_offset = dirent1->name_offset;
+	uint32_t b_offset = dirent2->name_offset;
+	return (a_offset > b_offset) - (a_offset < b_offset);
+}
+
+static int repo_dirent_is_dir(struct repo_dirent *dirent)
+{
+	return dirent != NULL && dirent->mode == REPO_MODE_DIR;
+}
+
+static struct repo_dir *repo_dir_from_dirent(struct repo_dirent *dirent)
+{
+	if (!repo_dirent_is_dir(dirent))
+		return NULL;
+	return dir_pointer(dirent->content_offset);
+}
+
+static struct repo_dir *repo_clone_dir(struct repo_dir *orig_dir)
+{
+	uint32_t orig_o, new_o;
+	orig_o = dir_offset(orig_dir);
+	if (orig_o >= dir_pool.committed)
+		return orig_dir;
+	new_o = dir_alloc(1);
+	orig_dir = dir_pointer(orig_o);
+	*dir_pointer(new_o) = *orig_dir;
+	return dir_pointer(new_o);
+}
+
+static struct repo_dirent *repo_read_dirent(uint32_t revision, uint32_t *path)
+{
+	uint32_t name = 0;
+	struct repo_dirent *key = dirent_pointer(dirent_alloc(1));
+	struct repo_dir *dir = NULL;
+	struct repo_dirent *dirent = NULL;
+	dir = repo_commit_root_dir(commit_pointer(revision));
+	while (~(name = *path++)) {
+		key->name_offset = name;
+		dirent = dirent_search(&dir->entries, key);
+		if (dirent == NULL || !repo_dirent_is_dir(dirent))
+			break;
+		dir = repo_dir_from_dirent(dirent);
+	}
+	dirent_free(1);
+	return dirent;
+}
+
+static void
+repo_write_dirent(uint32_t *path, uint32_t mode, uint32_t content_offset,
+		  uint32_t del)
+{
+	uint32_t name, revision, dir_o = ~0, parent_dir_o = ~0;
+	struct repo_dir *dir;
+	struct repo_dirent *key;
+	struct repo_dirent *dirent = NULL;
+	revision = active_commit;
+	dir = repo_commit_root_dir(commit_pointer(revision));
+	dir = repo_clone_dir(dir);
+	commit_pointer(revision)->root_dir_offset = dir_offset(dir);
+	while (~(name = *path++)) {
+		parent_dir_o = dir_offset(dir);
+
+		key = dirent_pointer(dirent_alloc(1));
+		key->name_offset = name;
+
+		dirent = dirent_search(&dir->entries, key);
+		if (dirent == NULL)
+			dirent = key;
+		else
+			dirent_free(1);
+
+		if (dirent == key) {
+			dirent->mode = REPO_MODE_DIR;
+			dirent->content_offset = 0;
+			dirent_insert(&dir->entries, dirent);
+		}
+
+
+		if (dirent_offset(dirent) < dirent_pool.committed) {
+			dir_o = repo_dirent_is_dir(dirent) ? dirent->content_offset : ~0;
+			dirent_remove(&dir->entries, dirent);
+			dirent = dirent_pointer(dirent_alloc(1));
+			dirent->name_offset = name;
+			dirent->mode = REPO_MODE_DIR;
+			dirent->content_offset = dir_o;
+			dirent_insert(&dir->entries, dirent);
+		}
+
+		dir = repo_dir_from_dirent(dirent);
+		dir = repo_clone_dir(dir);
+		dirent->content_offset = dir_offset(dir);
+	}
+	if (dirent == NULL)
+		return;
+	dirent->mode = mode;
+	dirent->content_offset = content_offset;
+	if (del && ~parent_dir_o)
+		dirent_remove(&dir_pointer(parent_dir_o)->entries, dirent);
+}
+
+uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst)
+{
+	uint32_t mode = 0, content_offset = 0;
+	struct repo_dirent *src_dirent;
+	src_dirent = repo_read_dirent(revision, src);
+	if (src_dirent != NULL) {
+		mode = src_dirent->mode;
+		content_offset = src_dirent->content_offset;
+		repo_write_dirent(dst, mode, content_offset, 0);
+	}
+	return mode;
+}
+
+void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark)
+{
+	repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+uint32_t repo_replace(uint32_t *path, uint32_t blob_mark)
+{
+	uint32_t mode = 0;
+	struct repo_dirent *src_dirent;
+	src_dirent = repo_read_dirent(active_commit, path);
+	if (src_dirent != NULL) {
+		mode = src_dirent->mode;
+		repo_write_dirent(path, mode, blob_mark, 0);
+	}
+	return mode;
+}
+
+void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark)
+{
+	struct repo_dirent *src_dirent;
+	src_dirent = repo_read_dirent(active_commit, path);
+	if (src_dirent != NULL && blob_mark == 0) {
+		blob_mark = src_dirent->content_offset;
+	}
+	repo_write_dirent(path, mode, blob_mark, 0);
+}
+
+void repo_delete(uint32_t *path)
+{
+	repo_write_dirent(path, 0, 0, 1);
+}
+
+static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir);
+
+static void repo_git_add(uint32_t depth, uint32_t *path, struct repo_dirent *dirent)
+{
+	if (repo_dirent_is_dir(dirent)) {
+		repo_git_add_r(depth, path, repo_dir_from_dirent(dirent));
+	} else {
+		fast_export_modify(depth, path, dirent->mode, dirent->content_offset);
+	}
+}
+
+static void repo_git_add_r(uint32_t depth, uint32_t *path, struct repo_dir *dir)
+{
+	struct repo_dirent *de = repo_first_dirent(dir);
+	while (de) {
+		path[depth] = de->name_offset;
+		repo_git_add(depth + 1, path, de);
+		de = dirent_next(&dir->entries, de);
+	}
+}
+
+static void repo_diff_r(uint32_t depth, uint32_t *path, struct repo_dir *dir1,
+			struct repo_dir *dir2)
+{
+	struct repo_dirent *de1, *de2;
+	de1 = repo_first_dirent(dir1);
+	de2 = repo_first_dirent(dir2);
+
+	while (de1 && de2) {
+		if (de1->name_offset < de2->name_offset) {
+			path[depth] = de1->name_offset;
+			fast_export_delete(depth + 1, path);
+			de1 = dirent_next(&dir1->entries, de1);
+			continue;
+		} else if (de1->name_offset > de2->name_offset) {
+			path[depth] = de2->name_offset;
+			repo_git_add(depth + 1, path, de2);
+			de2 = dirent_next(&dir2->entries, de2);
+			continue;
+		}
+		path[depth] = de1->name_offset;
+		if (de1->mode != de2->mode ||
+		    de1->content_offset != de2->content_offset) {
+			if (repo_dirent_is_dir(de1) && repo_dirent_is_dir(de2)) {
+				repo_diff_r(depth + 1, path,
+					    repo_dir_from_dirent(de1),
+					    repo_dir_from_dirent(de2));
+			} else {
+				if (repo_dirent_is_dir(de1) != repo_dirent_is_dir(de2)) {
+					fast_export_delete(depth + 1, path);
+				}
+				repo_git_add(depth + 1, path, de2);
+			}
+		}
+		de1 = dirent_next(&dir1->entries, de1);
+		de2 = dirent_next(&dir2->entries, de2);
+	}
+	while (de1) {
+		path[depth] = de1->name_offset;
+		fast_export_delete(depth + 1, path);
+		de1 = dirent_next(&dir1->entries, de1);
+	}
+	while (de2) {
+		path[depth] = de2->name_offset;
+		repo_git_add(depth + 1, path, de2);
+		de2 = dirent_next(&dir2->entries, de2);
+	}
+}
+
+static uint32_t path_stack[REPO_MAX_PATH_DEPTH];
+
+void repo_diff(uint32_t r1, uint32_t r2)
+{
+	repo_diff_r(0,
+		    path_stack,
+		    repo_commit_root_dir(commit_pointer(r1)),
+		    repo_commit_root_dir(commit_pointer(r2)));
+}
+
+void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
+		 uint32_t url, unsigned long timestamp)
+{
+	fast_export_commit(revision, author, log, uuid, url, timestamp);
+	pool_commit();
+	dirent_commit();
+	dir_commit();
+	commit_commit();
+	active_commit = commit_alloc(1);
+	commit_pointer(active_commit)->root_dir_offset =
+		commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+static void mark_init(void)
+{
+	uint32_t i;
+	_mark = 0;
+	for (i = 0; i < dirent_pool.size; i++)
+		if (!repo_dirent_is_dir(dirent_pointer(i)) &&
+		    dirent_pointer(i)->content_offset > _mark)
+			_mark = dirent_pointer(i)->content_offset;
+	_mark++;
+}
+
+void repo_init() {
+	pool_init();
+	commit_init();
+	dir_init();
+	dirent_init();
+	mark_init();
+	if (commit_pool.size == 0) {
+		/* Create empty tree for commit 0. */
+		commit_alloc(1);
+		commit_pointer(0)->root_dir_offset = dir_alloc(1);
+		dir_pointer(0)->entries.trp_root = ~0;
+		dir_commit();
+		commit_commit();
+	}
+	/* Preallocate next commit, ready for changes. */
+	active_commit = commit_alloc(1);
+	commit_pointer(active_commit)->root_dir_offset =
+		commit_pointer(active_commit - 1)->root_dir_offset;
+}
+
+void repo_reset(void)
+{
+	pool_reset();
+	commit_reset();
+	dir_reset();
+	dirent_reset();
+}
diff --git a/vcs-svn/repo_tree.h b/vcs-svn/repo_tree.h
new file mode 100644
index 0000000..5476175
--- /dev/null
+++ b/vcs-svn/repo_tree.h
@@ -0,0 +1,26 @@
+#ifndef REPO_TREE_H_
+#define REPO_TREE_H_
+
+#include "git-compat-util.h"
+
+#define REPO_MODE_DIR 0040000
+#define REPO_MODE_BLB 0100644
+#define REPO_MODE_EXE 0100755
+#define REPO_MODE_LNK 0120000
+
+#define REPO_MAX_PATH_LEN 4096
+#define REPO_MAX_PATH_DEPTH 1000
+
+uint32_t next_blob_mark(void);
+uint32_t repo_copy(uint32_t revision, uint32_t *src, uint32_t *dst);
+void repo_add(uint32_t *path, uint32_t mode, uint32_t blob_mark);
+uint32_t repo_replace(uint32_t *path, uint32_t blob_mark);
+void repo_modify(uint32_t *path, uint32_t mode, uint32_t blob_mark);
+void repo_delete(uint32_t *path);
+void repo_commit(uint32_t revision, uint32_t author, char *log, uint32_t uuid,
+		 uint32_t url, long unsigned timestamp);
+void repo_diff(uint32_t r1, uint32_t r2);
+void repo_init(void);
+void repo_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH 6/6] Add SVN dump parser
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
                   ` (4 preceding siblings ...)
  2010-06-10 13:09 ` [PATCH 5/6] Add infrastructure to write revisions in fast-export format Ramkumar Ramachandra
@ 2010-06-10 13:09 ` Ramkumar Ramachandra
  2010-06-10 15:24   ` Ramkumar Ramachandra
       [not found] ` <AANLkTin3iQK7YHGgjxlAjtchu3ZpntjQHK7LkfxxJj6q@mail.gmail.com>
  2010-06-12  6:26 ` Jonathan Nieder
  7 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:09 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

From: David Barr <david.barr@cordelta.com>

svndump parses data that is in SVN dumpfile format produced by
`svnadmin dump` with the help of line_buffer, and uses repo_tree and
fast_export to emit a git fast-import stream.

Signed-off-by: David Barr <david.barr@cordelta.com>
Signed-off-by: Ramkumar Ramachandra <artagnon@gmail.com>
---
 vcs-svn/svndump.c |  298 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/svndump.h |    7 ++
 2 files changed, 305 insertions(+), 0 deletions(-)
 create mode 100644 vcs-svn/svndump.c
 create mode 100644 vcs-svn/svndump.h

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
new file mode 100644
index 0000000..92c15d7
--- /dev/null
+++ b/vcs-svn/svndump.c
@@ -0,0 +1,298 @@
+/*
+ * Parse and rearrange a svnadmin dump.
+ * Create the dump with:
+ * svnadmin dump --incremental -r<startrev>:<endrev> <repository> >outfile
+ *
+ * Licensed under a two-clause BSD-style license.
+ * See LICENSE for details.
+ */
+
+#include "cache.h"
+#include "git-compat-util.h"
+
+#include "repo_tree.h"
+#include "fast_export.h"
+#include "line_buffer.h"
+#include "obj_pool.h"
+#include "string_pool.h"
+
+#define NODEACT_REPLACE 4
+#define NODEACT_DELETE 3
+#define NODEACT_ADD 2
+#define NODEACT_CHANGE 1
+#define NODEACT_UNKNOWN 0
+
+#define DUMP_CTX 0
+#define REV_CTX  1
+#define NODE_CTX 2
+
+#define LENGTH_UNKNOWN (~0)
+#define DATE_RFC2822_LEN 31
+
+/* Create memory pool for log messages */
+obj_pool_gen(log, char, 4096);
+
+static char* log_copy(uint32_t length, char *log)
+{
+	char *buffer;
+	log_free(log_pool.size);
+	buffer = log_pointer(log_alloc(length));
+	strncpy(buffer, log, length);
+	return buffer;
+}
+
+static struct {
+	uint32_t action, propLength, textLength, srcRev, srcMode, mark, type;
+	uint32_t src[REPO_MAX_PATH_DEPTH], dst[REPO_MAX_PATH_DEPTH];
+} node_ctx;
+
+static struct {
+	uint32_t revision, author;
+	unsigned long timestamp;
+	char *log;
+} rev_ctx;
+
+static struct {
+	uint32_t uuid, url;
+} dump_ctx;
+
+static struct {
+	uint32_t svn_log, svn_author, svn_date, svn_executable, svn_special, uuid,
+		revision_number, node_path, node_kind, node_action,
+		node_copyfrom_path, node_copyfrom_rev, text_content_length,
+		prop_content_length, content_length;
+} keys;
+
+static void reset_node_ctx(char *fname)
+{
+	node_ctx.type = 0;
+	node_ctx.action = NODEACT_UNKNOWN;
+	node_ctx.propLength = LENGTH_UNKNOWN;
+	node_ctx.textLength = LENGTH_UNKNOWN;
+	node_ctx.src[0] = ~0;
+	node_ctx.srcRev = 0;
+	node_ctx.srcMode = 0;
+	pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.dst, "/", fname);
+	node_ctx.mark = 0;
+}
+
+static void reset_rev_ctx(uint32_t revision)
+{
+	rev_ctx.revision = revision;
+	rev_ctx.timestamp = "";
+	rev_ctx.log = NULL;
+	rev_ctx.author = ~0;
+}
+
+static void reset_dump_ctx(uint32_t url)
+{
+	dump_ctx.url = url;
+	dump_ctx.uuid = ~0;
+}
+
+static void init_keys(void)
+{
+	keys.svn_log = pool_intern("svn:log");
+	keys.svn_author = pool_intern("svn:author");
+	keys.svn_date = pool_intern("svn:date");
+	keys.svn_executable = pool_intern("svn:executable");
+	keys.svn_special = pool_intern("svn:special");
+	keys.uuid = pool_intern("UUID");
+	keys.revision_number = pool_intern("Revision-number");
+	keys.node_path = pool_intern("Node-path");
+	keys.node_kind = pool_intern("Node-kind");
+	keys.node_action = pool_intern("Node-action");
+	keys.node_copyfrom_path = pool_intern("Node-copyfrom-path");
+	keys.node_copyfrom_rev = pool_intern("Node-copyfrom-rev");
+	keys.text_content_length = pool_intern("Text-content-length");
+	keys.prop_content_length = pool_intern("Prop-content-length");
+	keys.content_length = pool_intern("Content-length");
+}
+
+static void read_props(void)
+{
+	uint32_t len;
+	uint32_t key = ~0;
+	char buffer[27];
+	char *val = NULL;
+	char *t;
+	while ((t = buffer_read_line()) && strcmp(t, "PROPS-END")) {
+		if (!strncmp(t, "K ", 2)) {
+			len = atoi(&t[2]);
+			key = pool_intern(buffer_read_string(len));
+			buffer_read_line();
+		} else if (!strncmp(t, "V ", 2)) {
+			len = atoi(&t[2]);
+			val = buffer_read_string(len);
+			if (key == keys.svn_log) {
+				/* Value length excludes terminating nul. */
+				rev_ctx.log = log_copy(len + 1, val);
+			} else if (key == keys.svn_author) {
+				rev_ctx.author = pool_intern(val);
+			} else if (key == keys.svn_date) {
+				if (parse_date(val, buffer, sizeof(buffer)) > 0)
+					rev_ctx.timestamp = strtoul(buffer, NULL, 0);
+				else
+					fprintf(stderr, "Invalid timestamp: %s", val);
+			} else if (key == keys.svn_executable) {
+				node_ctx.type = REPO_MODE_EXE;
+			} else if (key == keys.svn_special) {
+				node_ctx.type = REPO_MODE_LNK;
+			}
+			key = ~0;
+			buffer_read_line();
+		}
+	}
+}
+
+static void handle_node(void)
+{
+	if (node_ctx.propLength != LENGTH_UNKNOWN && node_ctx.propLength) {
+		read_props();
+	}
+
+	if (node_ctx.srcRev) {
+		node_ctx.srcMode = repo_copy(node_ctx.srcRev, node_ctx.src, node_ctx.dst);
+	}
+
+	if (node_ctx.textLength != LENGTH_UNKNOWN &&
+		node_ctx.type != REPO_MODE_DIR) {
+		node_ctx.mark = next_blob_mark();
+	}
+
+	if (node_ctx.action == NODEACT_DELETE) {
+		repo_delete(node_ctx.dst);
+	} else if (node_ctx.action == NODEACT_CHANGE ||
+			   node_ctx.action == NODEACT_REPLACE) {
+		if (node_ctx.action == NODEACT_REPLACE &&
+			node_ctx.type == REPO_MODE_DIR) {
+			repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if (node_ctx.propLength != LENGTH_UNKNOWN ) {
+			repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		}
+	} else if (node_ctx.action == NODEACT_ADD) {
+		if (node_ctx.srcRev &&
+			node_ctx.propLength != LENGTH_UNKNOWN) {
+			repo_modify(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		} else if (node_ctx.srcRev &&
+			node_ctx.textLength != LENGTH_UNKNOWN) {
+			node_ctx.srcMode = repo_replace(node_ctx.dst, node_ctx.mark);
+		} else if ((node_ctx.type == REPO_MODE_DIR && !node_ctx.srcRev) ||
+				   node_ctx.textLength != LENGTH_UNKNOWN){
+			repo_add(node_ctx.dst, node_ctx.type, node_ctx.mark);
+		}
+	}
+
+	if (node_ctx.propLength == LENGTH_UNKNOWN && node_ctx.srcMode) {
+		node_ctx.type = node_ctx.srcMode;
+	}
+
+	if (node_ctx.mark) {
+		fast_export_blob(node_ctx.type, node_ctx.mark, node_ctx.textLength);
+	} else if (node_ctx.textLength != LENGTH_UNKNOWN) {
+		buffer_skip_bytes(node_ctx.textLength);
+	}
+}
+
+static void handle_revision(void)
+{
+	if (rev_ctx.revision)
+		repo_commit(rev_ctx.revision, rev_ctx.author, rev_ctx.log,
+			dump_ctx.uuid, dump_ctx.url, rev_ctx.timestamp);
+}
+
+void svndump_read(uint32_t url)
+{
+	char *val;
+	char *t;
+	uint32_t active_ctx = DUMP_CTX;
+	uint32_t len;
+	uint32_t key;
+
+	reset_dump_ctx(url);
+	while ((t = buffer_read_line())) {
+		val = strstr(t, ": ");
+		if (!val) continue;
+		*val++ = '\0';
+		*val++ = '\0';
+		key = pool_intern(t);
+
+		if(key == keys.uuid) {
+			dump_ctx.uuid = pool_intern(val);
+		} else if (key == keys.revision_number) {
+			if (active_ctx == NODE_CTX) handle_node();
+			if (active_ctx != DUMP_CTX) handle_revision();
+			active_ctx = REV_CTX;
+			reset_rev_ctx(atoi(val));
+		} else if (key == keys.node_path) {
+			if (active_ctx == NODE_CTX)
+				handle_node();
+			active_ctx = NODE_CTX;
+			reset_node_ctx(val);
+		} else if (key == keys.node_kind) {
+			if (!strcmp(val, "dir")) {
+				node_ctx.type = REPO_MODE_DIR;
+			} else if (!strcmp(val, "file")) {
+				node_ctx.type = REPO_MODE_BLB;
+			} else {
+				fprintf(stderr, "Unknown node-kind: %s\n", val);
+			}
+		} else if (key == keys.node_action) {
+			if (!strcmp(val, "delete")) {
+				node_ctx.action = NODEACT_DELETE;
+			} else if (!strcmp(val, "add")) {
+				node_ctx.action = NODEACT_ADD;
+			} else if (!strcmp(val, "change")) {
+				node_ctx.action = NODEACT_CHANGE;
+			} else if (!strcmp(val, "replace")) {
+				node_ctx.action = NODEACT_REPLACE;
+			} else {
+				fprintf(stderr, "Unknown node-action: %s\n", val);
+				node_ctx.action = NODEACT_UNKNOWN;
+			}
+		} else if (key == keys.node_copyfrom_path) {
+			pool_tok_seq(REPO_MAX_PATH_DEPTH, node_ctx.src, "/", val);
+		} else if (key == keys.node_copyfrom_rev) {
+			node_ctx.srcRev = atoi(val);
+		} else if (key == keys.text_content_length) {
+			node_ctx.textLength = atoi(val);
+		} else if (key == keys.prop_content_length) {
+			node_ctx.propLength = atoi(val);
+		} else if (key == keys.content_length) {
+			len = atoi(val);
+			buffer_read_line();
+			if (active_ctx == REV_CTX) {
+				read_props();
+			} else if (active_ctx == NODE_CTX) {
+				handle_node();
+				active_ctx = REV_CTX;
+			} else {
+				fprintf(stderr, "Unexpected content length header: %d\n", len);
+				buffer_skip_bytes(len);
+			}
+		}
+	}
+	if (active_ctx == NODE_CTX) handle_node();
+	if (active_ctx != DUMP_CTX) handle_revision();
+}
+
+static void svndump_init(void)
+{
+	repo_init();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+	init_keys();
+}
+
+void svndump_reset(void)
+{
+	log_reset();
+	buffer_reset();
+	repo_reset();
+	reset_dump_ctx(~0);
+	reset_rev_ctx(0);
+	reset_node_ctx(NULL);
+}
diff --git a/vcs-svn/svndump.h b/vcs-svn/svndump.h
new file mode 100644
index 0000000..e205f1f
--- /dev/null
+++ b/vcs-svn/svndump.h
@@ -0,0 +1,7 @@
+#ifndef SVNDUMP_H_
+#define SVNDUMP_H_
+
+void svndump_read(char *url);
+void svndump_reset(void);
+
+#endif
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 0/6] Another attempt to get the SVN exporter merged
       [not found] ` <AANLkTin3iQK7YHGgjxlAjtchu3ZpntjQHK7LkfxxJj6q@mail.gmail.com>
@ 2010-06-10 13:22   ` Ramkumar Ramachandra
  0 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 13:22 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano,
	Ævar Arnfjörð Bjarmason

Hi Ævar,

Ævar Arnfjörð Bjarmason wrote:
> I'd like to test this out but funneling patches into git-am with GMail
> is a pain. Is it available in a public Git repository somewhere?

Thank you for bringing this up- I'm aware that others might be
interested as well. I've rolled out the patches from the 'git-merge'
branch, but you can test the independent project from the 'master'
branch which includes a Makefile and small manpage (courtesy Jonathan)
as well [1].

-- Ram

[1] http://github.com/artagnon/svn-dump-fast-export

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 6/6] Add SVN dump parser
  2010-06-10 13:09 ` [PATCH 6/6] Add SVN dump parser Ramkumar Ramachandra
@ 2010-06-10 15:24   ` Ramkumar Ramachandra
  0 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-10 15:24 UTC (permalink / raw)
  To: Git Mailing List
  Cc: David Michael Barr, Jonathan Nieder, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano,
	Ævar Arnfjörð Bjarmason

Hi,

> +void svndump_reset(void)
> +{
> +       log_reset();
> +       buffer_reset();
> +       repo_reset();
> +       reset_dump_ctx(~0);
> +       reset_rev_ctx(0);
> +       reset_node_ctx(NULL);
> +}

Thanks to Ævar for pointing this out- kindly read the patch with this
diff squashed in:

diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index 92c15d7..516a520 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -289,9 +289,6 @@ static void svndump_init(void)

 void svndump_reset(void)
 {
-	log_reset();
-	buffer_reset();
-	repo_reset();
 	reset_dump_ctx(~0);
 	reset_rev_ctx(0);
 	reset_node_ctx(NULL);

-- Ram

^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/6] Add library for string-specific memory pool
  2010-06-10 13:09 ` [PATCH 3/6] Add library for string-specific memory pool Ramkumar Ramachandra
@ 2010-06-11 19:33   ` Junio C Hamano
  2010-06-14  9:26     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2010-06-11 19:33 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Git Mailing List, David Michael Barr, Jonathan Nieder,
	Sverre Rabbelier, Michael J Gruber

Ramkumar Ramachandra <artagnon@gmail.com> writes:

> diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
> new file mode 100644
> index 0000000..a69d165
> --- /dev/null
> +++ b/vcs-svn/string_pool.h
> @@ -0,1 +1,15 @@

How did you manage to get "-0,1" here?  This is supposed to be a new file.
PATCH 4/6 throws the same puzzlement at me.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 0/6] Another attempt to get the SVN exporter merged
  2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
                   ` (6 preceding siblings ...)
       [not found] ` <AANLkTin3iQK7YHGgjxlAjtchu3ZpntjQHK7LkfxxJj6q@mail.gmail.com>
@ 2010-06-12  6:26 ` Jonathan Nieder
  2010-06-14 14:41   ` Ramkumar Ramachandra
  7 siblings, 1 reply; 21+ messages in thread
From: Jonathan Nieder @ 2010-06-12  6:26 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Git Mailing List, David Michael Barr, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

Hi Ram,

Ramkumar Ramachandra wrote:

> It certainly looks like I'll never give up trying to get this series
> merged- this is my third attempt

First of all, thanks for your work on this and sorry to take so long
to respond[1].

> I still haven't been able to get rid
> of the compiler warnings about unused functions

Will reply to the relevant patch.  Since it’s not so bad to fix, I don’t
think that should hold up queueing the series in pu.

> it looks like
> this series won't graduate to `master` before that happens- I'd
> appreciate pointers on how to do this.

Instead, I suspect the main missing ingredient is a caller.  The
series builds up some library infrastructure with no new feature to
exercise it, which makes it a good time to get feedback but not a good
time to merge.

Of course, we could easily add a feature to exercise it: David’s
svn-fe tool is IMO quite useful on its own (regardless of how the code
will be used later by remote-svn).

I don’t think that is so important for the remote-svn project.  In
other words, I hope some other reviewers show up, but if that doesn’t
happen, I would suggest submitting again for inclusion once the
remote-svn command with import capability is functional.

I planned to write a series adding svn-fe to contrib/ and then the
computer died.  If someone else doesn’t do it first, hopefully I can
try again this weekend.

> Major change since last time: Removed dependency on mmap for
> portability reasons.

Sad but perhaps necessary.  I guess this also opens the possibility of
later making the save operation atomic with the fsync() + rename()
trick.

Regards,
Jonathan

[1] My excuse: laptop died.  My new setup is finally in place and
working well but this Monday I will be on vacation in a land of poor
connectivity --- agh.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] Add memory pool library
  2010-06-10 13:09 ` [PATCH 1/6] Add memory pool library Ramkumar Ramachandra
@ 2010-06-12  6:42   ` Jonathan Nieder
  2010-06-14 14:25     ` Ramkumar Ramachandra
  0 siblings, 1 reply; 21+ messages in thread
From: Jonathan Nieder @ 2010-06-12  6:42 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Git Mailing List, David Michael Barr, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

Ramkumar Ramachandra wrote:

> +#define obj_pool_gen(pre, obj_t, initial_capacity) \
> +static struct { \
> +	uint32_t committed; \
> +	uint32_t size; \
> +	uint32_t capacity; \
> +	obj_t *base; \
> +	FILE *file; \
> +} pre##_pool = { 0, 0, 0, NULL, NULL}; \
> +static void pre##_init(void) \
> +{ \
[...]

This defines a family of functions and not all pools use them all.
One workaround is to annotate them, like this:

 #ifdef __GNUC__
 #define MAYBE_UNUSED __attribute__((__unused__))
 #else
 #define MAYBE_UNUSED
 #endif

 #define obj_pool_gen(pre, obj_t, initial_capacity) \
 ... \
 static MAYBE_UNUSED void pre##_init(void) \
 { \
 ...

Could that work here?

The “unused” attribute was added in gcc 2.7.

Jonathan

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/6] Add library for string-specific memory pool
  2010-06-11 19:33   ` Junio C Hamano
@ 2010-06-14  9:26     ` Ramkumar Ramachandra
  2010-06-14 13:36       ` Junio C Hamano
  0 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-14  9:26 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git Mailing List, David Michael Barr, Jonathan Nieder,
	Sverre Rabbelier, Michael J Gruber

Hi Junio,

Junio C Hamano wrote:
>> diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
>> new file mode 100644
>> index 0000000..a69d165
>> --- /dev/null
>> +++ b/vcs-svn/string_pool.h
>> @@ -0,1 +1,15 @@
>
> How did you manage to get "-0,1" here?  This is supposed to be a new file.
> PATCH 4/6 throws the same puzzlement at me.

I hand-edited the patch in Emacs and expected that diff-mode would
take care of all this. Apparently, I was wrong- I will investigate
this problem further, because editing patches by hand and making some
last-minute corrections (just before send-email) is very useful.
Thanks for pointing this out!

And thanks for getting the series into `pu`! Now I can focus on
figuring out the SVN API.

-- Ram

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/6] Add library for string-specific memory pool
  2010-06-14  9:26     ` Ramkumar Ramachandra
@ 2010-06-14 13:36       ` Junio C Hamano
  2010-06-14 13:49         ` Ramkumar Ramachandra
  0 siblings, 1 reply; 21+ messages in thread
From: Junio C Hamano @ 2010-06-14 13:36 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Git Mailing List, David Michael Barr, Jonathan Nieder,
	Sverre Rabbelier, Michael J Gruber

Ramkumar Ramachandra <artagnon@gmail.com> writes:

> I hand-edited the patch in Emacs and expected that diff-mode would
> take care of all this. Apparently, I was wrong- I will investigate
> this problem further,...

The problem with Emacs diff mode I already know about.  It gets confused
by the "-- SP LF" line at the end of the format-patch output when it
recounts diff.

> And thanks for getting the series into `pu`! Now I can focus on
> figuring out the SVN API.

Don't thank me.  You did all the work with others on the list.

Note that there isn't that much difference between being in 'pu' and being
in the mailing list archive.  Depending on how further discussions go, the
series can be replaced with an improvement or even can be dropped as a
whole.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/6] Add library for string-specific memory pool
  2010-06-14 13:36       ` Junio C Hamano
@ 2010-06-14 13:49         ` Ramkumar Ramachandra
  2010-06-14 14:45           ` David Michael Barr
  0 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-14 13:49 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Git Mailing List, David Michael Barr, Jonathan Nieder,
	Sverre Rabbelier, Michael J Gruber

Hi Junio,

Junio C Hamano wrote:
> The problem with Emacs diff mode I already know about.  It gets confused
> by the "-- SP LF" line at the end of the format-patch output when it
> recounts diff.

I see. I'll probably prepare a patch then.

> Note that there isn't that much difference between being in 'pu' and being
> in the mailing list archive.  Depending on how further discussions go, the
> series can be replaced with an improvement or even can be dropped as a
> whole.

It's an indicator of progress, if not anything else. The project is
already pretty mature imho- after squashing in a few bugfixes, it
should be ready for `next`.

-- Ram

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] Add memory pool library
  2010-06-12  6:42   ` Jonathan Nieder
@ 2010-06-14 14:25     ` Ramkumar Ramachandra
  2010-06-14 14:44       ` Andreas Ericsson
  0 siblings, 1 reply; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-14 14:25 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Git Mailing List, David Michael Barr, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano

Hi Jonathan,

Jonathan Nieder wrote:
>  #ifdef __GNUC__
>  #define MAYBE_UNUSED __attribute__((__unused__))
>  #else
>  #define MAYBE_UNUSED
>  #endif

You'd suggested this earlier, but I was looking more for something
that we could use to mark some specific functions as unused instead of
marking everything as unused in the generation macro. Anyway, I have a
patch ready, and svn-fe compiles fine with -Wall -Werror.

> Could that work here?
>
> The “unused” attribute was added in gcc 2.7.

Junio? Is this a good solution, or should we think of something else?
What about other compilers?

-- Ram

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 0/6] Another attempt to get the SVN exporter merged
  2010-06-12  6:26 ` Jonathan Nieder
@ 2010-06-14 14:41   ` Ramkumar Ramachandra
  0 siblings, 0 replies; 21+ messages in thread
From: Ramkumar Ramachandra @ 2010-06-14 14:41 UTC (permalink / raw)
  To: Jonathan Nieder
  Cc: Git Mailing List, David Michael Barr, Sverre Rabbelier,
	Michael J Gruber, Junio C Hamano,
	Ævar Arnfjörð Bjarmason

Hi Jonathan,

Jonathan Nieder wrote:
> First of all, thanks for your work on this and sorry to take so long
> to respond[1].

Ah that's alright. I'm glad you're up and running again :)

> I planned to write a series adding svn-fe to contrib/ and then the
> computer died.  If someone else doesn’t do it first, hopefully I can
> try again this weekend.

Ævar has also been asking about this. I have a branch ready to roll
out from- I think I can do it today.

> Sad but perhaps necessary.  I guess this also opens the possibility of
> later making the save operation atomic with the fsync() + rename()
> trick.

Yeah, mmap looked a lot more elegant.

-- Ram

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 1/6] Add memory pool library
  2010-06-14 14:25     ` Ramkumar Ramachandra
@ 2010-06-14 14:44       ` Andreas Ericsson
  0 siblings, 0 replies; 21+ messages in thread
From: Andreas Ericsson @ 2010-06-14 14:44 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Jonathan Nieder, Git Mailing List, David Michael Barr,
	Sverre Rabbelier, Michael J Gruber, Junio C Hamano

On 06/14/2010 04:25 PM, Ramkumar Ramachandra wrote:
> Hi Jonathan,
> 
> Jonathan Nieder wrote:
>>   #ifdef __GNUC__
>>   #define MAYBE_UNUSED __attribute__((__unused__))
>>   #else
>>   #define MAYBE_UNUSED
>>   #endif
> 
> You'd suggested this earlier, but I was looking more for something
> that we could use to mark some specific functions as unused instead of
> marking everything as unused in the generation macro. Anyway, I have a
> patch ready, and svn-fe compiles fine with -Wall -Werror.
> 
>> Could that work here?
>>
>> The “unused” attribute was added in gcc 2.7.
> 
> Junio? Is this a good solution, or should we think of something else?
> What about other compilers?
> 

__attribute__((stuff)) is gcc-specific. It's usually macro'd away with
#ifndef __GNUC__
#define __attribute__(x)
#endif
which is why it requires double parantheses. gcc warns for but doesn't
fail on unknown attributes, so it should be safe to use this with
gcc older than 2.7 as well. Or you make it conditional to __GNUC__
being >= 2 and __GNUC_MINOR__ being >= 7.

-- 
Andreas Ericsson                   andreas.ericsson@op5.se
OP5 AB                             www.op5.se
Tel: +46 8-230225                  Fax: +46 8-230231

Considering the successes of the wars on alcohol, poverty, drugs and
terror, I think we should give some serious thought to declaring war
on peace.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH 3/6] Add library for string-specific memory pool
  2010-06-14 13:49         ` Ramkumar Ramachandra
@ 2010-06-14 14:45           ` David Michael Barr
  0 siblings, 0 replies; 21+ messages in thread
From: David Michael Barr @ 2010-06-14 14:45 UTC (permalink / raw)
  To: Ramkumar Ramachandra
  Cc: Junio C Hamano, Git Mailing List, Jonathan Nieder,
	Sverre Rabbelier, Michael J Gruber

Hi Ram,

>> Note that there isn't that much difference between being in 'pu' and being
>> in the mailing list archive.  Depending on how further discussions go, the
>> series can be replaced with an improvement or even can be dropped as a
>> whole.
> 
> It's an indicator of progress, if not anything else. The project is
> already pretty mature imho- after squashing in a few bugfixes, it
> should be ready for `next`.

I have a feeling that these patches will need a bit more love before they are
ready for 'next'. The persistence component is the least mature of the lot.
I'd really like some feedback on making the persistence robust and simple.
Now that persistence is append-only, the file based representation no longer
need be identical to the in-memory representation.

I've tried several times to simplify the buffer_read_line() method in line_buffer.h
Every time I've ended up with slightly different behaviour.
Someone well versed in I/O might be able to greatly simplify it.
It may well be reduced to a simple wrapper around strbuf methods.

I'm still toying in my head about how to simplify the data structure used to
represent the trees. Conceptually, it is a multiway tree with the constraint that
the labels of siblings share a common prefix at the parent. It is implemented as
a ternary tree with left and right links to siblings in the multiway tree and a
middle link to the 'root' child in the multiway tree, from which all children are
reachable via left/right links.
As the code stands, the middle link is indirected via a 'directory' node.
I'd like to remove this redundancy and make the design of the structure clearer.

There is scope for a massive rename of methods, arguments and variables so
that the code is easier to read.

--
David Barr.

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2010-06-14 14:46 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2010-06-10 13:09 [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
2010-06-10 13:09 ` [PATCH 1/6] Add memory pool library Ramkumar Ramachandra
2010-06-12  6:42   ` Jonathan Nieder
2010-06-14 14:25     ` Ramkumar Ramachandra
2010-06-14 14:44       ` Andreas Ericsson
2010-06-10 13:09 ` [PATCH 2/6] Add cpp macro implementation of treaps Ramkumar Ramachandra
2010-06-10 13:09 ` [PATCH 3/6] Add library for string-specific memory pool Ramkumar Ramachandra
2010-06-11 19:33   ` Junio C Hamano
2010-06-14  9:26     ` Ramkumar Ramachandra
2010-06-14 13:36       ` Junio C Hamano
2010-06-14 13:49         ` Ramkumar Ramachandra
2010-06-14 14:45           ` David Michael Barr
2010-06-10 13:09 ` [PATCH 4/6] Add stream helper library Ramkumar Ramachandra
2010-06-10 13:09 ` [PATCH 5/6] Add infrastructure to write revisions in fast-export format Ramkumar Ramachandra
2010-06-10 13:09 ` [PATCH 6/6] Add SVN dump parser Ramkumar Ramachandra
2010-06-10 15:24   ` Ramkumar Ramachandra
     [not found] ` <AANLkTin3iQK7YHGgjxlAjtchu3ZpntjQHK7LkfxxJj6q@mail.gmail.com>
2010-06-10 13:22   ` [PATCH 0/6] Another attempt to get the SVN exporter merged Ramkumar Ramachandra
2010-06-12  6:26 ` Jonathan Nieder
2010-06-14 14:41   ` Ramkumar Ramachandra
  -- strict thread matches above, loose matches on Subject: below --
2010-06-04 13:41 [PATCH 0/6] Merge David's SVN exporter Ramkumar Ramachandra
2010-06-04 13:41 ` [PATCH 6/6] Add SVN dump parser Ramkumar Ramachandra
2010-06-04 13:26 [PATCH 0/6] Merge David's SVN exporter into git.git Ramkumar Ramachandra
2010-06-04 13:26 ` [PATCH 6/6] Add SVN dump parser Ramkumar Ramachandra

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).