git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Elijah Newren <newren@gmail.com>
To: git@vger.kernel.org
Cc: larsxschneider@gmail.com, sandals@crustytoothpaste.net,
	peff@peff.net, me@ttaylorr.com, jrnieder@gmail.com,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH 09/10] fast-export: add a --show-original-ids option to show original names
Date: Sat, 10 Nov 2018 22:23:11 -0800
Message-ID: <20181111062312.16342-10-newren@gmail.com> (raw)
In-Reply-To: <20181111062312.16342-1-newren@gmail.com>

Knowing the original names (hashes) of commits, blobs, and tags can
sometimes enable post-filtering that would otherwise be difficult or
impossible.  In particular, the desire to rewrite commit messages which
refer to other prior commits (on top of whatever other filtering is
being done) is very difficult without knowing the original names of each
commit.

This commit teaches a new --show-original-ids option to fast-export
which will make it add a 'originally <hash>' line to blob, commits, and
tags.  It also teaches fast-import to parse (and ignore) such lines.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
 Documentation/git-fast-export.txt |  7 +++++++
 builtin/fast-export.c             | 20 +++++++++++++++-----
 fast-import.c                     | 17 +++++++++++++++++
 t/t9350-fast-export.sh            | 17 +++++++++++++++++
 4 files changed, 56 insertions(+), 5 deletions(-)

diff --git a/Documentation/git-fast-export.txt b/Documentation/git-fast-export.txt
index 2916096bdd..4e40f0b99a 100644
--- a/Documentation/git-fast-export.txt
+++ b/Documentation/git-fast-export.txt
@@ -121,6 +121,13 @@ marks the same across runs.
 	used by a repository which already contains the necessary
 	parent commits.
 
+--show-original-ids::
+	Add an extra directive to the output for commits and blobs,
+	`originally <SHA1SUM>`.  While such directives will likely be
+	ignored by importers such as git-fast-import, it may be useful
+	for intermediary filters (e.g. for rewriting commit messages
+	which refer to older commits, or for stripping blobs by id).
+
 --refspec::
 	Apply the specified refspec to each ref exported. Multiple of them can
 	be specified.
diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index ea9c5b1c00..cc01dcc90c 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -38,6 +38,7 @@ static int use_done_feature;
 static int no_data;
 static int full_tree;
 static int reference_excluded_commits;
+static int show_original_ids;
 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
 static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
 static struct refspec refspecs = REFSPEC_INIT_FETCH;
@@ -271,7 +272,10 @@ static void export_blob(const struct object_id *oid)
 
 	mark_next_object(object);
 
-	printf("blob\nmark :%"PRIu32"\ndata %lu\n", last_idnum, size);
+	printf("blob\nmark :%"PRIu32"\n", last_idnum);
+	if (show_original_ids)
+		printf("originally %s\n", oid_to_hex(oid));
+	printf("data %lu\n", size);
 	if (size && fwrite(buf, size, 1, stdout) != 1)
 		die_errno("could not write blob '%s'", oid_to_hex(oid));
 	printf("\n");
@@ -628,8 +632,10 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 		reencoded = reencode_string(message, "UTF-8", encoding);
 	if (!commit->parents)
 		printf("reset %s\n", refname);
-	printf("commit %s\nmark :%"PRIu32"\n%.*s\n%.*s\ndata %u\n%s",
-	       refname, last_idnum,
+	printf("commit %s\nmark :%"PRIu32"\n", refname, last_idnum);
+	if (show_original_ids)
+		printf("originally %s\n", oid_to_hex(&commit->object.oid));
+	printf("%.*s\n%.*s\ndata %u\n%s",
 	       (int)(author_end - author), author,
 	       (int)(committer_end - committer), committer,
 	       (unsigned)(reencoded
@@ -807,8 +813,10 @@ static void handle_tag(const char *name, struct tag *tag)
 
 	if (starts_with(name, "refs/tags/"))
 		name += 10;
-	printf("tag %s\nfrom :%d\n%.*s%sdata %d\n%.*s\n",
-	       name, tagged_mark,
+	printf("tag %s\nfrom :%d\n", name, tagged_mark);
+	if (show_original_ids)
+		printf("originally %s\n", oid_to_hex(&tag->object.oid));
+	printf("%.*s%sdata %d\n%.*s\n",
 	       (int)(tagger_end - tagger), tagger,
 	       tagger == tagger_end ? "" : "\n",
 	       (int)message_size, (int)message_size, message ? message : "");
@@ -1089,6 +1097,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
 		OPT_BOOL(0, "anonymize", &anonymize, N_("anonymize output")),
 		OPT_BOOL(0, "reference-excluded-parents",
 			 &reference_excluded_commits, N_("Reference parents which are not in fast-export stream by sha1sum")),
+		OPT_BOOL(0, "show-original-ids", &show_original_ids,
+			    N_("Show original sha1sums of blobs/commits")),
 
 		OPT_END()
 	};
diff --git a/fast-import.c b/fast-import.c
index 95600c78e0..232b6a8b8d 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -14,11 +14,13 @@ Format of STDIN stream:
 
   new_blob ::= 'blob' lf
     mark?
+    originally?
     file_content;
   file_content ::= data;
 
   new_commit ::= 'commit' sp ref_str lf
     mark?
+    originally?
     ('author' (sp name)? sp '<' email '>' sp when lf)?
     'committer' (sp name)? sp '<' email '>' sp when lf
     commit_msg
@@ -49,6 +51,7 @@ Format of STDIN stream:
 
   new_tag ::= 'tag' sp tag_str lf
     'from' sp commit-ish lf
+    originally?
     ('tagger' (sp name)? sp '<' email '>' sp when lf)?
     tag_msg;
   tag_msg ::= data;
@@ -73,6 +76,8 @@ Format of STDIN stream:
   data ::= (delimited_data | exact_data)
     lf?;
 
+  originally ::= 'originally' sp not_lf+ lf
+
     # note: delim may be any string but must not contain lf.
     # data_line may contain any data but must not be exactly
     # delim.
@@ -1968,6 +1973,13 @@ static void parse_mark(void)
 		next_mark = 0;
 }
 
+static void parse_original_identifier(void)
+{
+	const char *v;
+	if (skip_prefix(command_buf.buf, "originally ", &v))
+		read_next_command();
+}
+
 static int parse_data(struct strbuf *sb, uintmax_t limit, uintmax_t *len_res)
 {
 	const char *data;
@@ -2110,6 +2122,7 @@ static void parse_new_blob(void)
 {
 	read_next_command();
 	parse_mark();
+	parse_original_identifier();
 	parse_and_store_blob(&last_blob, NULL, next_mark);
 }
 
@@ -2733,6 +2746,7 @@ static void parse_new_commit(const char *arg)
 
 	read_next_command();
 	parse_mark();
+	parse_original_identifier();
 	if (skip_prefix(command_buf.buf, "author ", &v)) {
 		author = parse_ident(v);
 		read_next_command();
@@ -2865,6 +2879,9 @@ static void parse_new_tag(const char *arg)
 		die("Invalid ref name or SHA1 expression: %s", from);
 	read_next_command();
 
+	/* originally ... */
+	parse_original_identifier();
+
 	/* tagger ... */
 	if (skip_prefix(command_buf.buf, "tagger ", &v)) {
 		tagger = parse_ident(v);
diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh
index c2f40d6a40..5ad6669910 100755
--- a/t/t9350-fast-export.sh
+++ b/t/t9350-fast-export.sh
@@ -77,6 +77,23 @@ test_expect_success 'fast-export --reference-excluded-parents master~2..master'
 		 test $MASTER = $(git rev-parse --verify refs/heads/rewrite))
 '
 
+test_expect_success 'fast-export --show-original-ids' '
+
+	git fast-export --show-original-ids master >output &&
+	grep ^originally output| sed -e s/^originally.// | sort >actual &&
+	git rev-list --objects master muss >objects-and-names &&
+	awk "{print \$1}" objects-and-names | sort >commits-trees-blobs &&
+	comm -23 actual commits-trees-blobs >unfound &&
+	test_must_be_empty unfound
+'
+
+test_expect_success 'fast-export --show-original-ids | git fast-import' '
+
+	git fast-export --show-original-ids master muss | git fast-import --quiet &&
+	test $MASTER = $(git rev-parse --verify refs/heads/master) &&
+	test $MUSS = $(git rev-parse --verify refs/tags/muss)
+'
+
 test_expect_success 'iso-8859-1' '
 
 	git config i18n.commitencoding ISO8859-1 &&
-- 
2.19.1.866.g82735bcbde


  parent reply index

Thread overview: 90+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-23 13:04 Import/Export as a fast way to purge files from Git? Lars Schneider
2018-09-23 14:55 ` Eric Sunshine
2018-09-23 15:58   ` Lars Schneider
2018-09-23 15:53 ` brian m. carlson
2018-09-23 17:04   ` Jeff King
2018-09-24 17:24 ` Elijah Newren
2018-10-31 19:15   ` Lars Schneider
2018-11-01  7:12     ` Elijah Newren
2018-11-11  6:23       ` [PATCH 00/10] fast export and import fixes and features Elijah Newren
2018-11-11  6:23         ` [PATCH 01/10] git-fast-import.txt: fix documentation for --quiet option Elijah Newren
2018-11-11  6:33           ` Jeff King
2018-11-11  6:23         ` [PATCH 02/10] git-fast-export.txt: clarify misleading documentation about rev-list args Elijah Newren
2018-11-11  6:36           ` Jeff King
2018-11-11  7:17             ` Elijah Newren
2018-11-13 23:25               ` Elijah Newren
2018-11-13 23:39                 ` Jonathan Nieder
2018-11-14  0:02                   ` Elijah Newren
2018-11-11  6:23         ` [PATCH 03/10] fast-export: use value from correct enum Elijah Newren
2018-11-11  6:36           ` Jeff King
2018-11-11 20:10             ` Ævar Arnfjörð Bjarmason
2018-11-12  9:12               ` Ævar Arnfjörð Bjarmason
2018-11-12 11:31               ` Jeff King
2018-11-11  6:23         ` [PATCH 04/10] fast-export: avoid dying when filtering by paths and old tags exist Elijah Newren
2018-11-11  6:44           ` Jeff King
2018-11-11  7:38             ` Elijah Newren
2018-11-12 12:32               ` Jeff King
2018-11-12 22:50             ` brian m. carlson
2018-11-13 14:38               ` Jeff King
2018-11-11  6:23         ` [PATCH 05/10] fast-export: move commit rewriting logic into a function for reuse Elijah Newren
2018-11-11  6:47           ` Jeff King
2018-11-11  6:23         ` [PATCH 06/10] fast-export: when using paths, avoid corrupt stream with non-existent mark Elijah Newren
2018-11-11  6:53           ` Jeff King
2018-11-11  8:01             ` Elijah Newren
2018-11-12 12:45               ` Jeff King
2018-11-12 15:36                 ` Elijah Newren
2018-11-11  6:23         ` [PATCH 07/10] fast-export: ensure we export requested refs Elijah Newren
2018-11-11  7:02           ` Jeff King
2018-11-11  8:20             ` Elijah Newren
2018-11-11  6:23         ` [PATCH 08/10] fast-export: add --reference-excluded-parents option Elijah Newren
2018-11-11  7:11           ` Jeff King
2018-11-11  6:23         ` Elijah Newren [this message]
2018-11-11  7:20           ` [PATCH 09/10] fast-export: add a --show-original-ids option to show original names Jeff King
2018-11-11  8:32             ` Elijah Newren
2018-11-12 12:53               ` Jeff King
2018-11-12 15:46                 ` Elijah Newren
2018-11-12 16:31                   ` Jeff King
2018-11-11  6:23         ` [PATCH 10/10] fast-export: add --always-show-modify-after-rename Elijah Newren
2018-11-11  7:23           ` Jeff King
2018-11-11  8:42             ` Elijah Newren
2018-11-12 12:58               ` Jeff King
2018-11-12 18:08                 ` Elijah Newren
2018-11-13 14:45                   ` Jeff King
2018-11-13 17:10                     ` Elijah Newren
2018-11-14  7:14                       ` Jeff King
2018-11-11  7:27         ` [PATCH 00/10] fast export and import fixes and features Jeff King
2018-11-11  8:44           ` Elijah Newren
2018-11-12 13:00             ` Jeff King
2018-11-14  0:25         ` [PATCH v2 00/11] " Elijah Newren
2018-11-14  0:25           ` [PATCH v2 01/11] git-fast-import.txt: fix documentation for --quiet option Elijah Newren
2018-11-14  0:25           ` [PATCH v2 02/11] git-fast-export.txt: clarify misleading documentation about rev-list args Elijah Newren
2018-11-14  0:25           ` [PATCH v2 03/11] fast-export: use value from correct enum Elijah Newren
2018-11-14  0:25           ` [PATCH v2 04/11] fast-export: avoid dying when filtering by paths and old tags exist Elijah Newren
2018-11-14 19:17             ` SZEDER Gábor
2018-11-14 23:13               ` Elijah Newren
2018-11-14  0:25           ` [PATCH v2 05/11] fast-export: move commit rewriting logic into a function for reuse Elijah Newren
2018-11-14  0:25           ` [PATCH v2 06/11] fast-export: when using paths, avoid corrupt stream with non-existent mark Elijah Newren
2018-11-14  0:25           ` [PATCH v2 07/11] fast-export: ensure we export requested refs Elijah Newren
2018-11-14  0:25           ` [PATCH v2 08/11] fast-export: add --reference-excluded-parents option Elijah Newren
2018-11-14 19:27             ` SZEDER Gábor
2018-11-14 23:16               ` Elijah Newren
2018-11-14  0:25           ` [PATCH v2 09/11] fast-import: remove unmaintained duplicate documentation Elijah Newren
2018-11-14  0:25           ` [PATCH v2 10/11] fast-export: add a --show-original-ids option to show original names Elijah Newren
2018-11-14  0:26           ` [PATCH v2 11/11] fast-export: add --always-show-modify-after-rename Elijah Newren
2018-11-14  7:25           ` [PATCH v2 00/11] fast export and import fixes and features Jeff King
2018-11-16  7:59           ` [PATCH v3 " Elijah Newren
2018-11-16  7:59             ` [PATCH v3 01/11] fast-export: convert sha1 to oid Elijah Newren
2018-11-16  7:59             ` [PATCH v3 02/11] git-fast-import.txt: fix documentation for --quiet option Elijah Newren
2018-11-16  7:59             ` [PATCH v3 03/11] git-fast-export.txt: clarify misleading documentation about rev-list args Elijah Newren
2018-11-16  7:59             ` [PATCH v3 04/11] fast-export: use value from correct enum Elijah Newren
2018-11-16  7:59             ` [PATCH v3 05/11] fast-export: avoid dying when filtering by paths and old tags exist Elijah Newren
2018-11-16  7:59             ` [PATCH v3 06/11] fast-export: move commit rewriting logic into a function for reuse Elijah Newren
2018-11-16  7:59             ` [PATCH v3 07/11] fast-export: when using paths, avoid corrupt stream with non-existent mark Elijah Newren
2018-11-16  7:59             ` [PATCH v3 08/11] fast-export: ensure we export requested refs Elijah Newren
2018-11-16  7:59             ` [PATCH v3 09/11] fast-export: add --reference-excluded-parents option Elijah Newren
2018-11-16  7:59             ` [PATCH v3 10/11] fast-import: remove unmaintained duplicate documentation Elijah Newren
2018-11-16  7:59             ` [PATCH v3 11/11] fast-export: add a --show-original-ids option to show original names Elijah Newren
2018-11-16 12:29               ` SZEDER Gábor
2018-11-16  8:50             ` [PATCH v3 00/11] fast export and import fixes and features Jeff King
2018-11-12  9:17       ` Import/Export as a fast way to purge files from Git? Ævar Arnfjörð Bjarmason
2018-11-12 15:34         ` Elijah Newren

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181111062312.16342-10-newren@gmail.com \
    --to=newren@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=larsxschneider@gmail.com \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    --cc=sandals@crustytoothpaste.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox