git@vger.kernel.org mailing list mirror (one of many)
 help / Atom feed
From: Elijah Newren <newren@gmail.com>
To: git@vger.kernel.org
Cc: larsxschneider@gmail.com, sandals@crustytoothpaste.net,
	peff@peff.net, me@ttaylorr.com, jrnieder@gmail.com,
	Elijah Newren <newren@gmail.com>
Subject: [PATCH 07/10] fast-export: ensure we export requested refs
Date: Sat, 10 Nov 2018 22:23:09 -0800
Message-ID: <20181111062312.16342-8-newren@gmail.com> (raw)
In-Reply-To: <20181111062312.16342-1-newren@gmail.com>

If file paths are specified to fast-export and a ref points to a commit
that does not touch any of the relevant paths, then that ref would
sometimes fail to be exported.  (This depends on whether any ancestors
of the commit which do touch the relevant paths would be exported with
that same ref name or a different ref name.)  To avoid this problem,
put *all* specified refs into extra_refs to start, and then as we export
each commit, remove the refname used in the 'commit $REFNAME' directive
from extra_refs.  Then, in handle_tags_and_duplicates() we know which
refs actually do need a manual reset directive in order to be included.

This means that we do need some special handling for excluded refs; e.g.
if someone runs
   git fast-export ^master master
then they've asked for master to be exported, but they have also asked
for the commit which master points to and all of its history to be
excluded.  That logically means ref deletion.  Previously, such refs
were just silently omitted from being exported despite having been
explicitly requested for export.

Signed-off-by: Elijah Newren <newren@gmail.com>
---
NOTE: I was hoping the strmap API proposal would materialize, but I either
missed it or it hasn't shown up.  The usage of string_list in this patch
would be better replaced by what Peff suggested.

 builtin/fast-export.c  | 48 +++++++++++++++++++++++++++++++-----------
 t/t9350-fast-export.sh | 16 +++++++++++---
 2 files changed, 49 insertions(+), 15 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index 5648a8ce9c..0d0bbd9445 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -38,6 +38,7 @@ static int use_done_feature;
 static int no_data;
 static int full_tree;
 static struct string_list extra_refs = STRING_LIST_INIT_NODUP;
+static struct string_list tag_refs = STRING_LIST_INIT_NODUP;
 static struct refspec refspecs = REFSPEC_INIT_FETCH;
 static int anonymize;
 static struct revision_sources revision_sources;
@@ -611,6 +612,7 @@ static void handle_commit(struct commit *commit, struct rev_info *rev,
 			export_blob(&diff_queued_diff.queue[i]->two->oid);
 
 	refname = *revision_sources_at(&revision_sources, commit);
+	string_list_remove(&extra_refs, refname, 0);
 	if (anonymize) {
 		refname = anonymize_refname(refname);
 		anonymize_ident_line(&committer, &committer_end);
@@ -814,7 +816,7 @@ static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
 		/* handle nested tags */
 		while (tag && tag->object.type == OBJ_TAG) {
 			parse_object(the_repository, &tag->object.oid);
-			string_list_append(&extra_refs, full_name)->util = tag;
+			string_list_append(&tag_refs, full_name)->util = tag;
 			tag = (struct tag *)tag->tagged;
 		}
 		if (!tag)
@@ -873,25 +875,30 @@ static void get_tags_and_duplicates(struct rev_cmdline_info *info)
 		}
 
 		/*
-		 * This ref will not be updated through a commit, lets make
-		 * sure it gets properly updated eventually.
+		 * Make sure this ref gets properly updated eventually, whether
+		 * through a commit or manually at the end.
 		 */
-		if (*revision_sources_at(&revision_sources, commit) ||
-		    commit->object.flags & SHOWN)
+		if (e->item->type != OBJ_TAG)
 			string_list_append(&extra_refs, full_name)->util = commit;
+
 		if (!*revision_sources_at(&revision_sources, commit))
 			*revision_sources_at(&revision_sources, commit) = full_name;
 	}
+
+	string_list_sort(&extra_refs);
+	string_list_remove_duplicates(&extra_refs, 0);
 }
 
-static void handle_tags_and_duplicates(void)
+static void handle_tags_and_duplicates(struct string_list *extras)
 {
 	struct commit *commit;
 	int i;
 
-	for (i = extra_refs.nr - 1; i >= 0; i--) {
-		const char *name = extra_refs.items[i].string;
-		struct object *object = extra_refs.items[i].util;
+	for (i = extras->nr - 1; i >= 0; i--) {
+		const char *name = extras->items[i].string;
+		struct object *object = extras->items[i].util;
+		int mark;
+
 		switch (object->type) {
 		case OBJ_TAG:
 			handle_tag(name, (struct tag *)object);
@@ -912,8 +919,24 @@ static void handle_tags_and_duplicates(void)
 				       name, sha1_to_hex(null_sha1));
 				continue;
 			}
-			printf("reset %s\nfrom :%d\n\n", name,
-			       get_object_mark(&commit->object));
+
+			mark = get_object_mark(&commit->object);
+			if (!mark) {
+				/*
+				 * Getting here means we have a commit which
+				 * was excluded by a negative refspec (e.g.
+				 * fast-export ^master master).  If the user
+				 * wants the branch exported but every commit
+				 * in its history to be deleted, that sounds
+				 * like a ref deletion to me.
+				 */
+				printf("reset %s\nfrom %s\n\n",
+				       name, sha1_to_hex(null_sha1));
+				continue;
+			}
+
+			printf("reset %s\nfrom :%d\n\n", name, mark
+			       );
 			show_progress();
 			break;
 		}
@@ -1101,7 +1124,8 @@ int cmd_fast_export(int argc, const char **argv, const char *prefix)
 		}
 	}
 
-	handle_tags_and_duplicates();
+	handle_tags_and_duplicates(&extra_refs);
+	handle_tags_and_duplicates(&tag_refs);
 	handle_deletes();
 
 	if (export_filename && lastimportid != last_idnum)
diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh
index dbb560c110..a0c93f2212 100755
--- a/t/t9350-fast-export.sh
+++ b/t/t9350-fast-export.sh
@@ -552,10 +552,20 @@ test_expect_success 'use refspec' '
 	test_cmp expected actual
 '
 
-test_expect_success 'delete refspec' '
+test_expect_success 'delete ref because entire history excluded' '
 	git branch to-delete &&
-	git fast-export --refspec :refs/heads/to-delete to-delete ^to-delete > actual &&
-	cat > expected <<-EOF &&
+	git fast-export to-delete ^to-delete >actual &&
+	cat >expected <<-EOF &&
+	reset refs/heads/to-delete
+	from 0000000000000000000000000000000000000000
+
+	EOF
+	test_cmp expected actual
+'
+
+test_expect_success 'delete refspec' '
+	git fast-export --refspec :refs/heads/to-delete >actual &&
+	cat >expected <<-EOF &&
 	reset refs/heads/to-delete
 	from 0000000000000000000000000000000000000000
 
-- 
2.19.1.866.g82735bcbde


  parent reply index

Thread overview: 76+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-09-23 13:04 Import/Export as a fast way to purge files from Git? Lars Schneider
2018-09-23 14:55 ` Eric Sunshine
2018-09-23 15:58   ` Lars Schneider
2018-09-23 15:53 ` brian m. carlson
2018-09-23 17:04   ` Jeff King
2018-09-24 17:24 ` Elijah Newren
2018-10-31 19:15   ` Lars Schneider
2018-11-01  7:12     ` Elijah Newren
2018-11-11  6:23       ` [PATCH 00/10] fast export and import fixes and features Elijah Newren
2018-11-11  6:23         ` [PATCH 01/10] git-fast-import.txt: fix documentation for --quiet option Elijah Newren
2018-11-11  6:33           ` Jeff King
2018-11-11  6:23         ` [PATCH 02/10] git-fast-export.txt: clarify misleading documentation about rev-list args Elijah Newren
2018-11-11  6:36           ` Jeff King
2018-11-11  7:17             ` Elijah Newren
2018-11-13 23:25               ` Elijah Newren
2018-11-13 23:39                 ` Jonathan Nieder
2018-11-14  0:02                   ` Elijah Newren
2018-11-11  6:23         ` [PATCH 03/10] fast-export: use value from correct enum Elijah Newren
2018-11-11  6:36           ` Jeff King
2018-11-11 20:10             ` Ævar Arnfjörð Bjarmason
2018-11-12  9:12               ` Ævar Arnfjörð Bjarmason
2018-11-12 11:31               ` Jeff King
2018-11-11  6:23         ` [PATCH 04/10] fast-export: avoid dying when filtering by paths and old tags exist Elijah Newren
2018-11-11  6:44           ` Jeff King
2018-11-11  7:38             ` Elijah Newren
2018-11-12 12:32               ` Jeff King
2018-11-12 22:50             ` brian m. carlson
2018-11-13 14:38               ` Jeff King
2018-11-11  6:23         ` [PATCH 05/10] fast-export: move commit rewriting logic into a function for reuse Elijah Newren
2018-11-11  6:47           ` Jeff King
2018-11-11  6:23         ` [PATCH 06/10] fast-export: when using paths, avoid corrupt stream with non-existent mark Elijah Newren
2018-11-11  6:53           ` Jeff King
2018-11-11  8:01             ` Elijah Newren
2018-11-12 12:45               ` Jeff King
2018-11-12 15:36                 ` Elijah Newren
2018-11-11  6:23         ` Elijah Newren [this message]
2018-11-11  7:02           ` [PATCH 07/10] fast-export: ensure we export requested refs Jeff King
2018-11-11  8:20             ` Elijah Newren
2018-11-11  6:23         ` [PATCH 08/10] fast-export: add --reference-excluded-parents option Elijah Newren
2018-11-11  7:11           ` Jeff King
2018-11-11  6:23         ` [PATCH 09/10] fast-export: add a --show-original-ids option to show original names Elijah Newren
2018-11-11  7:20           ` Jeff King
2018-11-11  8:32             ` Elijah Newren
2018-11-12 12:53               ` Jeff King
2018-11-12 15:46                 ` Elijah Newren
2018-11-12 16:31                   ` Jeff King
2018-11-11  6:23         ` [PATCH 10/10] fast-export: add --always-show-modify-after-rename Elijah Newren
2018-11-11  7:23           ` Jeff King
2018-11-11  8:42             ` Elijah Newren
2018-11-12 12:58               ` Jeff King
2018-11-12 18:08                 ` Elijah Newren
2018-11-13 14:45                   ` Jeff King
2018-11-13 17:10                     ` Elijah Newren
2018-11-14  7:14                       ` Jeff King
2018-11-11  7:27         ` [PATCH 00/10] fast export and import fixes and features Jeff King
2018-11-11  8:44           ` Elijah Newren
2018-11-12 13:00             ` Jeff King
2018-11-14  0:25         ` [PATCH v2 00/11] " Elijah Newren
2018-11-14  0:25           ` [PATCH v2 01/11] git-fast-import.txt: fix documentation for --quiet option Elijah Newren
2018-11-14  0:25           ` [PATCH v2 02/11] git-fast-export.txt: clarify misleading documentation about rev-list args Elijah Newren
2018-11-14  0:25           ` [PATCH v2 03/11] fast-export: use value from correct enum Elijah Newren
2018-11-14  0:25           ` [PATCH v2 04/11] fast-export: avoid dying when filtering by paths and old tags exist Elijah Newren
2018-11-14 19:17             ` SZEDER Gábor
2018-11-14 23:13               ` Elijah Newren
2018-11-14  0:25           ` [PATCH v2 05/11] fast-export: move commit rewriting logic into a function for reuse Elijah Newren
2018-11-14  0:25           ` [PATCH v2 06/11] fast-export: when using paths, avoid corrupt stream with non-existent mark Elijah Newren
2018-11-14  0:25           ` [PATCH v2 07/11] fast-export: ensure we export requested refs Elijah Newren
2018-11-14  0:25           ` [PATCH v2 08/11] fast-export: add --reference-excluded-parents option Elijah Newren
2018-11-14 19:27             ` SZEDER Gábor
2018-11-14 23:16               ` Elijah Newren
2018-11-14  0:25           ` [PATCH v2 09/11] fast-import: remove unmaintained duplicate documentation Elijah Newren
2018-11-14  0:25           ` [PATCH v2 10/11] fast-export: add a --show-original-ids option to show original names Elijah Newren
2018-11-14  0:26           ` [PATCH v2 11/11] fast-export: add --always-show-modify-after-rename Elijah Newren
2018-11-14  7:25           ` [PATCH v2 00/11] fast export and import fixes and features Jeff King
2018-11-12  9:17       ` Import/Export as a fast way to purge files from Git? Ævar Arnfjörð Bjarmason
2018-11-12 15:34         ` Elijah Newren

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181111062312.16342-8-newren@gmail.com \
    --to=newren@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=jrnieder@gmail.com \
    --cc=larsxschneider@gmail.com \
    --cc=me@ttaylorr.com \
    --cc=peff@peff.net \
    --cc=sandals@crustytoothpaste.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

git@vger.kernel.org mailing list mirror (one of many)

Archives are clonable:
	git clone --mirror https://public-inbox.org/git
	git clone --mirror http://ou63pmih66umazou.onion/git
	git clone --mirror http://czquwvybam4bgbro.onion/git
	git clone --mirror http://hjrcffqmbrq6wope.onion/git

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.version-control.git
	nntp://ou63pmih66umazou.onion/inbox.comp.version-control.git
	nntp://czquwvybam4bgbro.onion/inbox.comp.version-control.git
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.version-control.git
	nntp://news.gmane.org/gmane.comp.version-control.git

 note: .onion URLs require Tor: https://www.torproject.org/
       or Tor2web: https://www.tor2web.org/

AGPL code for this site: git clone https://public-inbox.org/ public-inbox