git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jonathan Nieder <jrnieder@gmail.com>
To: git@vger.kernel.org
Cc: David Barr <david.barr@cordelta.com>,
	Ramkumar Ramachandra <artagnon@gmail.com>,
	Sverre Rabbelier <srabbelier@gmail.com>,
	Sam Vilain <sam@vilain.net>, Stephen Bash <bash@genarts.com>,
	Tomas Carnecky <tom@dbservice.com>
Subject: [PATCH 06/10] vcs-svn: do not rely on marks for old blobs
Date: Fri, 10 Dec 2010 04:26:19 -0600	[thread overview]
Message-ID: <20101210102619.GF26331@burratino> (raw)
In-Reply-To: <20101210102007.GA26298@burratino>

Retrieve old blobs by name and revision number from fast-import.
One step closer to bounded memory usage in svn-fe.

Signed-off-by: Jonathan Nieder <jrnieder@gmail.com>
---
Superfluous except that it shows how to parse 'ls' responses.
A demo.

 vcs-svn/fast_export.c |   53 +++++++++++++++++++++++++++++++++++++++++++++++++
 vcs-svn/fast_export.h |    3 ++
 vcs-svn/string_pool.c |    2 +-
 vcs-svn/string_pool.h |    2 +-
 vcs-svn/svndump.c     |    6 +++++
 5 files changed, 64 insertions(+), 2 deletions(-)

diff --git a/vcs-svn/fast_export.c b/vcs-svn/fast_export.c
index 960b252..cca9810 100644
--- a/vcs-svn/fast_export.c
+++ b/vcs-svn/fast_export.c
@@ -88,6 +88,21 @@ static int ends_with(const char *s, size_t len, const char *suffix)
 	return !memcmp(s + len - suffixlen, suffix, suffixlen);
 }
 
+static int parse_ls_response_line(const char *line, struct strbuf *objnam)
+{
+	const char *end = line + strlen(line);
+	const char *name, *tab;
+
+	if (end - line < strlen("100644 blob "))
+		return error("ls response too short: %s", line);
+	name = line + strlen("100644 blob ");
+	tab = memchr(name, '\t', end - name);
+	if (!tab)
+		return error("ls response does not contain tab: %s", line);
+	strbuf_add(objnam, name, tab - name);
+	return 0;
+}
+
 static int parse_cat_response_line(const char *header, off_t *len)
 {
 	size_t headerlen = strlen(header);
@@ -129,6 +144,31 @@ static off_t cat_mark(uint32_t mark)
 	return length;
 }
 
+static off_t cat_from_rev(uint32_t rev, const uint32_t *path)
+{
+	const char *response;
+	off_t length = length;
+	struct strbuf blob_name = STRBUF_INIT;
+
+	/* ls :5 "path/to/old/file" */
+	printf("ls :%"PRIu32" \"", rev);
+	pool_print_seq(REPO_MAX_PATH_DEPTH, path, '/', stdout);
+	printf("\"\n");
+	fflush(stdout);
+
+	response = get_response_line();
+	if (parse_ls_response_line(response, &blob_name))
+		die("invalid ls response: %s", response);
+
+	printf("cat-blob %s\n", blob_name.buf);
+	fflush(stdout);
+	response = get_response_line();
+	if (parse_cat_response_line(response, &length))
+		die("invalid cat-blob response: %s", response);
+	strbuf_release(&blob_name);
+	return length;
+}
+
 static long delta_apply(uint32_t mark, off_t len, struct line_buffer *input,
 			off_t preimage_len, uint32_t old_mode)
 {
@@ -195,3 +235,16 @@ void fast_export_blob_delta(uint32_t mode, uint32_t mark,
 						old_mode);
 	record_postimage(mark, mode, postimage_len);
 }
+
+void fast_export_blob_delta_rev(uint32_t mode, uint32_t mark, uint32_t old_mode,
+				uint32_t old_rev, const uint32_t *old_path,
+				uint32_t len, struct line_buffer *input)
+{
+	long postimage_len;
+	if (len > maximum_signed_value_of_type(off_t))
+		die("enormous delta");
+	postimage_len = delta_apply(mark, (off_t) len, input,
+					cat_from_rev(old_rev, old_path),
+					old_mode);
+	record_postimage(mark, mode, postimage_len);
+}
diff --git a/vcs-svn/fast_export.h b/vcs-svn/fast_export.h
index 6f77c3b..487d3d4 100644
--- a/vcs-svn/fast_export.h
+++ b/vcs-svn/fast_export.h
@@ -13,5 +13,8 @@ void fast_export_blob(uint32_t mode, uint32_t mark, uint32_t len,
 void fast_export_blob_delta(uint32_t mode, uint32_t mark,
 			uint32_t old_mode, uint32_t old_mark,
 			uint32_t len, struct line_buffer *input);
+void fast_export_blob_delta_rev(uint32_t mode, uint32_t mark, uint32_t old_mode,
+			uint32_t old_rev, const uint32_t *old_path,
+			uint32_t len, struct line_buffer *input);
 
 #endif
diff --git a/vcs-svn/string_pool.c b/vcs-svn/string_pool.c
index f5b1da8..c08abac 100644
--- a/vcs-svn/string_pool.c
+++ b/vcs-svn/string_pool.c
@@ -65,7 +65,7 @@ uint32_t pool_tok_r(char *str, const char *delim, char **saveptr)
 	return token ? pool_intern(token) : ~0;
 }
 
-void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream)
+void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream)
 {
 	uint32_t i;
 	for (i = 0; i < len && ~seq[i]; i++) {
diff --git a/vcs-svn/string_pool.h b/vcs-svn/string_pool.h
index 222fb66..3720cf8 100644
--- a/vcs-svn/string_pool.h
+++ b/vcs-svn/string_pool.h
@@ -4,7 +4,7 @@
 uint32_t pool_intern(const char *key);
 const char *pool_fetch(uint32_t entry);
 uint32_t pool_tok_r(char *str, const char *delim, char **saveptr);
-void pool_print_seq(uint32_t len, uint32_t *seq, char delim, FILE *stream);
+void pool_print_seq(uint32_t len, const uint32_t *seq, char delim, FILE *stream);
 uint32_t pool_tok_seq(uint32_t sz, uint32_t *seq, const char *delim, char *str);
 void pool_reset(void);
 
diff --git a/vcs-svn/svndump.c b/vcs-svn/svndump.c
index c6d6337..da968fa 100644
--- a/vcs-svn/svndump.c
+++ b/vcs-svn/svndump.c
@@ -259,6 +259,12 @@ static void handle_node(void)
 		fast_export_blob(node_ctx.type, mark, node_ctx.textLength, &input);
 		return;
 	}
+	if (node_ctx.srcRev) {
+		fast_export_blob_delta_rev(node_ctx.type, mark, old_mode,
+					node_ctx.srcRev, node_ctx.src,
+					node_ctx.textLength, &input);
+		return;
+	}
 	fast_export_blob_delta(node_ctx.type, mark, old_mode, old_mark,
 				node_ctx.textLength, &input);
 }
-- 
1.7.2.4

  parent reply	other threads:[~2010-12-10 10:26 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-12-10 10:20 [RFC/PATCH 00/10] vcs-svn: prepare for (implement?) incremental import Jonathan Nieder
2010-12-10 10:21 ` [PATCH 01/10] vcs-svn: use higher mark numbers for blobs Jonathan Nieder
2010-12-10 10:22 ` [PATCH 02/10] vcs-svn: save marks for imported commits Jonathan Nieder
2011-03-06 11:15   ` Jonathan Nieder
2010-12-10 10:23 ` [PATCH 03/10] vcs-svn: introduce cat_mark function to retrieve a marked blob Jonathan Nieder
2010-12-10 10:23 ` [PATCH 04/10] vcs-svn: make apply_delta caller retrieve preimage Jonathan Nieder
2010-12-10 10:25 ` [PATCH 05/10] vcs-svn: split off function to export result from delta application Jonathan Nieder
2010-12-10 10:26 ` Jonathan Nieder [this message]
2010-12-10 10:27 ` [PATCH 07/10] vcs-svn: split off function to make 'ls' requests Jonathan Nieder
2010-12-10 10:28 ` [PATCH 08/10] vcs-svn: prepare to eliminate repo_tree structure Jonathan Nieder
2011-03-06 12:52   ` [PATCH v2] " Jonathan Nieder
2011-03-06 20:41     ` David Barr
2010-12-10 10:30 ` [PATCH 09/10] vcs-svn: simplifications for repo_modify_path et al Jonathan Nieder
2010-12-10 10:33 ` [PATCH 10/10] vcs-svn: eliminate repo_tree structure Jonathan Nieder
     [not found] ` <C59168D0-B409-4A83-B96C-8CCD42D0B62F@cordelta.com>
     [not found]   ` <20101211184654.GA17464@burratino>
2010-12-11 22:47     ` [RFC/PATCH] fast-import: treat filemodify with empty tree as delete Jonathan Nieder
2010-12-11 23:00     ` [PATCH db/vcs-svn-incremental] vcs-svn: avoid git-isms in fast-import stream Jonathan Nieder
2010-12-11 23:04 ` [PATCH 12/10] vcs-svn: quote paths correctly for ls command David Michael Barr
2010-12-11 23:11   ` [PATCH db/vcs-svn-incremental] vcs-svn: quote all paths passed to fast-import Jonathan Nieder
2010-12-12  9:32 ` [PATCH 13/10] vcs-svn: use mark from previous import for parent commit David Michael Barr
2010-12-12 17:06   ` Jonathan Nieder
2011-03-06 22:54 ` [PATCH v2 00/12] vcs-svn: incremental import Jonathan Nieder
2011-03-06 23:03   ` [PATCH 01/12] vcs-svn: use higher mark numbers for blobs Jonathan Nieder
2011-03-08 19:08     ` Junio C Hamano
2011-03-09  6:55       ` Jonathan Nieder
2011-03-06 23:04   ` [PATCH 02/12] vcs-svn: save marks for imported commits Jonathan Nieder
2011-03-06 23:07   ` [PATCH 03/12] vcs-svn: introduce repo_read_path to check the content at a path Jonathan Nieder
2011-03-06 23:08   ` [PATCH 04/12] vcs-svn: handle_node: use repo_read_path Jonathan Nieder
2011-03-06 23:09   ` [PATCH 05/12] vcs-svn: simplify repo_modify_path and repo_copy Jonathan Nieder
2011-03-06 23:09   ` [PATCH 06/12] vcs-svn: add a comment before each commit Jonathan Nieder
2011-03-06 23:10   ` [PATCH 07/12] vcs-svn: allow input errors to be detected promptly Jonathan Nieder
2011-03-06 23:11   ` [PATCH 08/12] vcs-svn: set up channel to read fast-import cat-blob response Jonathan Nieder
2011-03-06 23:12   ` [PATCH 09/12] vcs-svn: eliminate repo_tree structure Jonathan Nieder
2011-03-06 23:12   ` [PATCH 10/12] vcs-svn: quote paths correctly for ls command Jonathan Nieder
2011-03-06 23:13   ` [PATCH 11/12] vcs-svn: handle filenames with dq correctly Jonathan Nieder
2011-03-06 23:16   ` [PATCH 12/12] vcs-svn: use mark from previous import for parent commit Jonathan Nieder
2011-03-07 12:24   ` [PATCH v2 00/12] vcs-svn: incremental import Sverre Rabbelier
2011-03-07 21:23     ` Jonathan Nieder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20101210102619.GF26331@burratino \
    --to=jrnieder@gmail.com \
    --cc=artagnon@gmail.com \
    --cc=bash@genarts.com \
    --cc=david.barr@cordelta.com \
    --cc=git@vger.kernel.org \
    --cc=sam@vilain.net \
    --cc=srabbelier@gmail.com \
    --cc=tom@dbservice.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).