git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "René Scharfe" <l.s.r@web.de>
To: git@vger.kernel.org
Cc: "Cristian Le" <cristian.le@mpsd.mpg.de>,
	"Matthias Görgens" <matthias.goergens@gmail.com>,
	"Junio C Hamano" <gitster@pobox.com>
Subject: [PATCH v2] archive: improve support for running in subdirectory
Date: Fri, 24 Mar 2023 23:27:11 +0100	[thread overview]
Message-ID: <7c33b01b-7b2a-25fa-9a66-1e65cd12bc84@web.de> (raw)
In-Reply-To: <e923e844-6891-76dc-e7db-4771b2d91792@web.de>

When git archive is started in a subdirectory, it archives its
corresponding tree and its child objects, only.  That is intended.  It
does that by effectively cd'ing into that tree and setting "prefix" to
the empty string.

This has unfortunate consequences, though: Attributes are anchored at
the root of the repository and git archive still applies them to
subtrees, causing mismatches.  And when checking pathspecs it cannot
tell the difference between one that doesn't match anthing or one that
matches some actual blob outside of the subdirectory, leading to a
confusing error message.

Fix that by keeping the "prefix" value and passing it to pathspec and
attribute functions, and shortening it using relative_path() for paths
written to the archive and (if --verbose is given) to stdout.

Still reject attempts to archive files outside the current directory,
but print a more specific error in that case.  Recognizing it requires a
full traversal of the subtree for each pathspec, however.  Allowing them
would be easier, but archive entry paths starting with "../" can be
problematic to extract -- e.g. bsdtar skips them by default.

Reported-by: Cristian Le <cristian.le@mpsd.mpg.de>
Reported-by: Matthias Görgens <matthias.goergens@gmail.com>
Signed-off-by: René Scharfe <l.s.r@web.de>
---
Interdiff:
  diff --git a/archive.c b/archive.c
  index c8d66169d1..a61615d180 100644
  --- a/archive.c
  +++ b/archive.c
  @@ -169,6 +169,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
   	}

   	if (args->prefix) {
  +		static struct strbuf new_path = STRBUF_INIT;
   		static struct strbuf buf = STRBUF_INIT;
   		const char *rel;

  @@ -183,8 +184,11 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
   		if (!strcmp(rel, "./") || starts_with(rel, "../"))
   			return S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0;

  -		strbuf_setlen(&path, args->baselen);
  -		strbuf_addstr(&path, rel);
  +		/* rel can refer to path, so don't edit it in place */
  +		strbuf_reset(&new_path);
  +		strbuf_add(&new_path, args->base, args->baselen);
  +		strbuf_addstr(&new_path, rel);
  +		strbuf_swap(&path, &new_path);
   	}

   	if (args->verbose)

 archive.c               | 75 +++++++++++++++++++++++++++++------------
 t/t5000-tar-tree.sh     | 13 +++++++
 t/t5001-archive-attr.sh | 16 +++++++++
 3 files changed, 83 insertions(+), 21 deletions(-)

diff --git a/archive.c b/archive.c
index 1c2ca78e52..a61615d180 100644
--- a/archive.c
+++ b/archive.c
@@ -168,6 +168,29 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 		args->convert = check_attr_export_subst(check);
 	}

+	if (args->prefix) {
+		static struct strbuf new_path = STRBUF_INIT;
+		static struct strbuf buf = STRBUF_INIT;
+		const char *rel;
+
+		rel = relative_path(path_without_prefix, args->prefix, &buf);
+
+		/*
+		 * We don't add an entry for the current working
+		 * directory when we are at the root; skip it also when
+		 * we're in a subdirectory or submodule.  Skip entries
+		 * higher up as well.
+		 */
+		if (!strcmp(rel, "./") || starts_with(rel, "../"))
+			return S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0;
+
+		/* rel can refer to path, so don't edit it in place */
+		strbuf_reset(&new_path);
+		strbuf_add(&new_path, args->base, args->baselen);
+		strbuf_addstr(&new_path, rel);
+		strbuf_swap(&path, &new_path);
+	}
+
 	if (args->verbose)
 		fprintf(stderr, "%.*s\n", (int)path.len, path.buf);

@@ -403,6 +426,27 @@ static int reject_entry(const struct object_id *oid UNUSED,
 	return ret;
 }

+static int reject_outside(const struct object_id *oid UNUSED,
+			  struct strbuf *base, const char *filename,
+			  unsigned mode, void *context)
+{
+	struct archiver_args *args = context;
+	struct strbuf buf = STRBUF_INIT;
+	struct strbuf path = STRBUF_INIT;
+	int ret = 0;
+
+	if (S_ISDIR(mode))
+		return READ_TREE_RECURSIVE;
+
+	strbuf_addbuf(&path, base);
+	strbuf_addstr(&path, filename);
+	if (starts_with(relative_path(path.buf, args->prefix, &buf), "../"))
+		ret = -1;
+	strbuf_release(&buf);
+	strbuf_release(&path);
+	return ret;
+}
+
 static int path_exists(struct archiver_args *args, const char *path)
 {
 	const char *paths[] = { path, NULL };
@@ -410,8 +454,13 @@ static int path_exists(struct archiver_args *args, const char *path)
 	int ret;

 	ctx.args = args;
-	parse_pathspec(&ctx.pathspec, 0, 0, "", paths);
+	parse_pathspec(&ctx.pathspec, 0, PATHSPEC_PREFER_CWD,
+		       args->prefix, paths);
 	ctx.pathspec.recursive = 1;
+	if (args->prefix && read_tree(args->repo, args->tree, &ctx.pathspec,
+				      reject_outside, args))
+		die(_("pathspec '%s' matches files outside the "
+		      "current directory"), path);
 	ret = read_tree(args->repo, args->tree,
 			&ctx.pathspec,
 			reject_entry, &ctx);
@@ -427,9 +476,8 @@ static void parse_pathspec_arg(const char **pathspec,
 	 * Also if pathspec patterns are dependent, we're in big
 	 * trouble as we test each one separately
 	 */
-	parse_pathspec(&ar_args->pathspec, 0,
-		       PATHSPEC_PREFER_FULL,
-		       "", pathspec);
+	parse_pathspec(&ar_args->pathspec, 0, PATHSPEC_PREFER_CWD,
+		       ar_args->prefix, pathspec);
 	ar_args->pathspec.recursive = 1;
 	if (pathspec) {
 		while (*pathspec) {
@@ -441,8 +489,7 @@ static void parse_pathspec_arg(const char **pathspec,
 }

 static void parse_treeish_arg(const char **argv,
-		struct archiver_args *ar_args, const char *prefix,
-		int remote)
+			      struct archiver_args *ar_args, int remote)
 {
 	const char *name = argv[0];
 	const struct object_id *commit_oid;
@@ -481,20 +528,6 @@ static void parse_treeish_arg(const char **argv,
 	if (!tree)
 		die(_("not a tree object: %s"), oid_to_hex(&oid));

-	if (prefix) {
-		struct object_id tree_oid;
-		unsigned short mode;
-		int err;
-
-		err = get_tree_entry(ar_args->repo,
-				     &tree->object.oid,
-				     prefix, &tree_oid,
-				     &mode);
-		if (err || !S_ISDIR(mode))
-			die(_("current working directory is untracked"));
-
-		tree = parse_tree_indirect(&tree_oid);
-	}
 	ar_args->refname = ref;
 	ar_args->tree = tree;
 	ar_args->commit_oid = commit_oid;
@@ -712,7 +745,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
 		setup_git_directory();
 	}

-	parse_treeish_arg(argv, &args, prefix, remote);
+	parse_treeish_arg(argv, &args, remote);
 	parse_pathspec_arg(argv + 1, &args);

 	rc = ar->write_archive(ar, &args);
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 918a2fc7c6..a60ae6145e 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -433,6 +433,19 @@ test_expect_success 'catch non-matching pathspec' '
 	test_must_fail git archive -v HEAD -- "*.abc" >/dev/null
 '

+test_expect_success 'reject paths outside the current directory' '
+	test_must_fail git -C a/bin archive HEAD .. >/dev/null 2>err &&
+	grep "outside the current directory" err
+'
+
+test_expect_success 'allow pathspecs that resolve to the current directory' '
+	git -C a/bin archive -v HEAD ../bin >/dev/null 2>actual &&
+	cat >expect <<-\EOF &&
+	sh
+	EOF
+	test_cmp expect actual
+'
+
 # Pull the size and date of each entry in a tarfile using the system tar.
 #
 # We'll pull out only the year from the date; that avoids any question of
diff --git a/t/t5001-archive-attr.sh b/t/t5001-archive-attr.sh
index 04d300eeda..0ff47a239d 100755
--- a/t/t5001-archive-attr.sh
+++ b/t/t5001-archive-attr.sh
@@ -33,6 +33,13 @@ test_expect_success 'setup' '
 	echo ignored-by-tree.d export-ignore >>.gitattributes &&
 	git add ignored-by-tree ignored-by-tree.d .gitattributes &&

+	mkdir subdir &&
+	>subdir/included &&
+	>subdir/ignored-by-subtree &&
+	>subdir/ignored-by-tree &&
+	echo ignored-by-subtree export-ignore >subdir/.gitattributes &&
+	git add subdir &&
+
 	echo ignored by worktree >ignored-by-worktree &&
 	echo ignored-by-worktree export-ignore >.gitattributes &&
 	git add ignored-by-worktree &&
@@ -93,6 +100,15 @@ test_expect_exists	archive-pathspec-wildcard/ignored-by-worktree
 test_expect_missing	archive-pathspec-wildcard/excluded-by-pathspec.d
 test_expect_missing	archive-pathspec-wildcard/excluded-by-pathspec.d/file

+test_expect_success 'git -C subdir archive' '
+	git -C subdir archive HEAD >archive-subdir.tar &&
+	extract_tar_to_dir archive-subdir
+'
+
+test_expect_exists	archive-subdir/included
+test_expect_missing	archive-subdir/ignored-by-subtree
+test_expect_missing	archive-subdir/ignored-by-tree
+
 test_expect_success 'git archive with worktree attributes' '
 	git archive --worktree-attributes HEAD >worktree.tar &&
 	(mkdir worktree && cd worktree && "$TAR" xf -) <worktree.tar
--
2.40.0


  parent reply	other threads:[~2023-03-24 22:27 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-03 10:25 Bug in git archive + .gitattributes + relative path Cristian Le
2023-03-03 15:19 ` René Scharfe
2023-03-03 15:38   ` Cristian Le
2023-03-04 13:58     ` René Scharfe
2023-03-04 15:11       ` Cristian Le
2023-03-05  9:32         ` René Scharfe
2023-03-06 16:56       ` Junio C Hamano
2023-03-06 17:51         ` René Scharfe
2023-03-06 17:27       ` Junio C Hamano
2023-03-06 18:28         ` René Scharfe
2023-03-06 18:59           ` Junio C Hamano
2023-03-06 21:32             ` René Scharfe
2023-03-06 22:34               ` Junio C Hamano
2023-03-11 20:47                 ` René Scharfe
2023-03-12 21:25                   ` Junio C Hamano
2023-03-18 21:30                     ` René Scharfe
2023-03-20 16:16                       ` Junio C Hamano
2023-03-20 20:02                       ` [PATCH] archive: improve support for running in a subdirectory René Scharfe
2023-03-21 22:59                         ` Junio C Hamano
2023-03-24 22:26                           ` René Scharfe
2023-03-24 22:27                         ` René Scharfe [this message]
2023-03-27 16:09                           ` [PATCH v2] archive: improve support for running in subdirectory Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7c33b01b-7b2a-25fa-9a66-1e65cd12bc84@web.de \
    --to=l.s.r@web.de \
    --cc=cristian.le@mpsd.mpg.de \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=matthias.goergens@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).