git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] archive: add --recurse-submodules to git-archive command
@ 2022-10-12 17:52 Heather Lapointe via GitGitGadget
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
  0 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-12 17:52 UTC (permalink / raw)
  To: git; +Cc: Heather Lapointe, Alphadelta14

From: Alphadelta14 <alpha@alphaservcomputing.solutions>

This makes it possible to include submodule contents in an archive command.

This required updating the general read_tree callbacks to support sub-repos
by not using the_repository global references where possible.

archive: update streaming to use target repo
archive: add test cases for git archive --recurse-submodules

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
    archive: Add --recurse-submodules to git-archive command
    
    This makes it possible to include submodule contents in an archive
    command.
    
    The inspiration for this change comes from this Github thread,
    https://github.com/dear-github/dear-github/issues/214, with at least 160
    👍🏻 's at the time of writing. (I stumbled upon it because I wanted it
    as well).
    
    I figured the underlying implementation wouldn't be too difficult with
    most of the plumbing already in place, so I decided to add the relevant
    logic to the client git-archive command.
    
    One of the trickier parts of this implementation involved teaching
    read_tree about submodules. Some of the troublesome areas were still
    using the the_repository references to look up commit or tree or oid
    information. I ended up deciding that read_tree_fn_t would probably be
    best off having a concrete repo reference since it allows changing the
    context to a subrepo where needed (even though some of the usages did
    not need it specifically).
    
    I am open to feedback since this is all quite new to me :)
    
    TODO:
    
     * working implementation
     * valgrind
     * add regression tests
     * update documentation with new flag
     * submit to mailing list

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v1
Pull-Request: https://github.com/git/git/pull/1359

 archive-tar.c                 | 14 +++--
 archive-zip.c                 | 14 ++---
 archive.c                     | 99 ++++++++++++++++++++++++-----------
 archive.h                     |  8 +--
 builtin/checkout.c            |  2 +-
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            | 10 ++--
 builtin/ls-tree.c             | 16 +++---
 list-objects.c                |  2 +-
 merge-recursive.c             |  2 +-
 revision.c                    |  4 +-
 sparse-index.c                |  2 +-
 t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++
 tree.c                        | 64 ++++++++++++++--------
 tree.h                        | 11 ++--
 wt-status.c                   |  2 +-
 16 files changed, 246 insertions(+), 90 deletions(-)
 create mode 100755 t/t5005-archive-submodules.sh

diff --git a/archive-tar.c b/archive-tar.c
index 3e4822b6840..331fc10bca9 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -18,6 +18,7 @@ static unsigned long offset;
 static int tar_umask = 002;
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args);
 
 /*
@@ -246,7 +247,8 @@ static void write_extended_header(struct archiver_args *args,
 	write_blocked(buffer, size);
 }
 
-static int write_tar_entry(struct archiver_args *args,
+static int write_tar_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -316,7 +318,7 @@ static int write_tar_entry(struct archiver_args *args,
 		if (buffer)
 			write_blocked(buffer, size);
 		else
-			err = stream_blocked(args->repo, oid);
+			err = stream_blocked(repo, oid);
 	}
 	return err;
 }
@@ -422,12 +424,13 @@ static int git_tar_config(const char *var, const char *value, void *cb)
 }
 
 static int write_tar_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err = 0;
 
 	write_global_extended_header(args);
-	err = write_archive_entries(args, write_tar_entry);
+	err = write_archive_entries(repo, args, write_tar_entry);
 	if (!err)
 		write_trailer();
 	return err;
@@ -462,6 +465,7 @@ static void tgz_write_block(const void *data)
 static const char internal_gzip_command[] = "git archive gzip";
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args)
 {
 #if ZLIB_VERNUM >= 0x1221
@@ -484,7 +488,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		gzstream.next_out = outbuf;
 		gzstream.avail_out = sizeof(outbuf);
 
-		r = write_tar_archive(ar, args);
+		r = write_tar_archive(ar, repo, args);
 
 		tgz_deflate(Z_FINISH);
 		git_deflate_end(&gzstream);
@@ -506,7 +510,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		die_errno(_("unable to redirect descriptor"));
 	close(filter.in);
 
-	r = write_tar_archive(ar, args);
+	r = write_tar_archive(ar, repo, args);
 
 	close(1);
 	if (finish_command(&filter) != 0)
diff --git a/archive-zip.c b/archive-zip.c
index 0456f1ebf15..6b27d004b5d 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -283,7 +283,8 @@ static int entry_is_binary(struct index_state *istate, const char *path,
 
 #define STREAM_BUFFER_SIZE (1024 * 16)
 
-static int write_zip_entry(struct archiver_args *args,
+static int write_zip_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -340,7 +341,7 @@ static int write_zip_entry(struct archiver_args *args,
 
 		if (!buffer) {
 			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
+			stream = open_istream(repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
@@ -349,7 +350,7 @@ static int write_zip_entry(struct archiver_args *args,
 			out = NULL;
 		} else {
 			crc = crc32(crc, buffer, size);
-			is_binary = entry_is_binary(args->repo->index,
+			is_binary = entry_is_binary(repo->index,
 						    path_without_prefix,
 						    buffer, size);
 			out = buffer;
@@ -426,7 +427,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
@@ -459,7 +460,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 
@@ -619,6 +620,7 @@ static int archive_zip_config(const char *var, const char *value,
 }
 
 static int write_zip_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err;
@@ -629,7 +631,7 @@ static int write_zip_archive(const struct archiver *ar UNUSED,
 
 	strbuf_init(&zip_dir, 0);
 
-	err = write_archive_entries(args, write_zip_entry);
+	err = write_archive_entries(repo, args, write_zip_entry);
 	if (!err)
 		write_zip_trailer(args->commit_oid);
 
diff --git a/archive.c b/archive.c
index 61a79e4a227..2ee7205679d 100644
--- a/archive.c
+++ b/archive.c
@@ -10,6 +10,7 @@
 #include "unpack-trees.h"
 #include "dir.h"
 #include "quote.h"
+#include "submodule.h"
 
 static char const * const archive_usage[] = {
 	N_("git archive [<options>] <tree-ish> [<path>...]"),
@@ -69,6 +70,7 @@ static void format_subst(const struct commit *commit,
 }
 
 static void *object_file_to_archive(const struct archiver_args *args,
+				    struct repository *repo,
 				    const char *path,
 				    const struct object_id *oid,
 				    unsigned int mode,
@@ -84,13 +86,13 @@ static void *object_file_to_archive(const struct archiver_args *args,
 			       (args->tree ? &args->tree->object.oid : NULL), oid);
 
 	path += args->baselen;
-	buffer = read_object_file(oid, type, sizep);
+	buffer = repo_read_object_file(repo, oid, type, sizep);
 	if (buffer && S_ISREG(mode)) {
 		struct strbuf buf = STRBUF_INIT;
 		size_t size = 0;
 
 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
-		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
 		if (commit)
 			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
 		buffer = strbuf_detach(&buf, &size);
@@ -134,7 +136,7 @@ static int check_attr_export_subst(const struct attr_check *check)
 	return check && ATTR_TRUE(check->items[1].value);
 }
 
-static int write_archive_entry(const struct object_id *oid, const char *base,
+static int write_archive_entry(struct repository *repo, const struct object_id *oid, const char *base,
 		int baselen, const char *filename, unsigned mode,
 		void *context)
 {
@@ -160,7 +162,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	if (!S_ISDIR(mode)) {
 		const struct attr_check *check;
-		check = get_archive_attrs(args->repo->index, path_without_prefix);
+		check = get_archive_attrs(repo->index, path_without_prefix);
 		if (check_attr_export_ignore(check))
 			return 0;
 		args->convert = check_attr_export_subst(check);
@@ -169,10 +171,10 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
+		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
-		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
+		return READ_TREE_RECURSIVE;
 	}
 
 	if (args->verbose)
@@ -180,14 +182,19 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	/* Stream it? */
 	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
-	    size > big_file_threshold)
-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
+	    size > big_file_threshold) {
+			err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
+			if (err) {
+				die("Failed to write file %.*s", (int)path.len, path.buf);
+			}
+			return err;
+		}
 
-	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	buffer = object_file_to_archive(args, repo, path.buf, oid, mode, &type, &size);
 	if (!buffer)
 		return error(_("cannot read '%s'"), oid_to_hex(oid));
-	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
 	free(buffer);
 	return err;
 }
@@ -207,7 +214,25 @@ static void queue_directory(const struct object_id *oid,
 	oidcpy(&d->oid, oid);
 }
 
-static int write_directory(struct archiver_context *c)
+static void queue_submodule(struct repository *superproject,
+		const struct object_id *oid,
+		struct strbuf *base, const char *filename,
+		unsigned mode, struct archiver_context *c)
+{
+	struct repository subrepo;
+
+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
+		return;
+
+	if (repo_read_index(&subrepo) < 0)
+		die("index file corrupt");
+
+    queue_directory(oid, base, filename, mode, c);
+
+	repo_clear(&subrepo);
+}
+
+static int write_directory(struct repository *repo, struct archiver_context *c)
 {
 	struct directory *d = c->bottom;
 	int ret;
@@ -217,15 +242,18 @@ static int write_directory(struct archiver_context *c)
 	c->bottom = d->up;
 	d->path[d->len - 1] = '\0'; /* no trailing slash */
 	ret =
-		write_directory(c) ||
-		write_archive_entry(&d->oid, d->path, d->baselen,
+		write_directory(repo, c) ||
+		write_archive_entry(repo, &d->oid, d->path, d->baselen,
 				    d->path + d->baselen, d->mode,
-				    c) != READ_TREE_RECURSIVE;
+				    c);
 	free(d);
-	return ret ? -1 : 0;
+	if (ret == READ_TREE_RECURSIVE)
+		return 0;
+	return ret;
 }
 
-static int queue_or_write_archive_entry(const struct object_id *oid,
+static int queue_or_write_archive_entry(
+		struct repository *repo, const struct object_id *oid,
 		struct strbuf *base, const char *filename,
 		unsigned mode, void *context)
 {
@@ -246,18 +274,23 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
 		/* Borrow base, but restore its original value when done. */
 		strbuf_addstr(base, filename);
 		strbuf_addch(base, '/');
-		check = get_archive_attrs(c->args->repo->index, base->buf);
+		check = get_archive_attrs(repo->index, base->buf);
 		strbuf_setlen(base, baselen);
 
 		if (check_attr_export_ignore(check))
 			return 0;
 		queue_directory(oid, base, filename, mode, c);
 		return READ_TREE_RECURSIVE;
+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
+		if (is_submodule_active(repo, filename)) {
+			queue_submodule(repo, oid, base, filename, mode, c);
+			return READ_TREE_RECURSIVE;
+		}
 	}
 
-	if (write_directory(c))
+	if (write_directory(repo, c))
 		return -1;
-	return write_archive_entry(oid, base->buf, base->len, filename, mode,
+	return write_archive_entry(repo, oid, base->buf, base->len, filename, mode,
 				   context);
 }
 
@@ -267,7 +300,8 @@ struct extra_file_info {
 	void *content;
 };
 
-int write_archive_entries(struct archiver_args *args,
+int write_archive_entries(struct repository *repo,
+		struct archiver_args *args,
 		write_archive_entry_fn_t write_entry)
 {
 	struct archiver_context context;
@@ -288,7 +322,7 @@ int write_archive_entries(struct archiver_args *args,
 			len--;
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
-		err = write_entry(args, &args->tree->object.oid, args->base,
+		err = write_entry(repo, args, &args->tree->object.oid, args->base,
 				  len, 040777, NULL, 0);
 		if (err)
 			return err;
@@ -305,8 +339,8 @@ int write_archive_entries(struct archiver_args *args,
 		memset(&opts, 0, sizeof(opts));
 		opts.index_only = 1;
 		opts.head_idx = -1;
-		opts.src_index = args->repo->index;
-		opts.dst_index = args->repo->index;
+		opts.src_index = repo->index;
+		opts.dst_index = repo->index;
 		opts.fn = oneway_merge;
 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
 		if (unpack_trees(1, &t, &opts))
@@ -314,7 +348,7 @@ int write_archive_entries(struct archiver_args *args,
 		git_attr_set_direction(GIT_ATTR_INDEX);
 	}
 
-	err = read_tree(args->repo, args->tree,
+	err = read_tree(repo, args->tree,
 			&args->pathspec,
 			queue_or_write_archive_entry,
 			&context);
@@ -343,12 +377,12 @@ int write_archive_entries(struct archiver_args *args,
 			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
 				err = error_errno(_("cannot read '%s'"), path);
 			else
-				err = write_entry(args, &fake_oid, path_in_archive.buf,
+				err = write_entry(repo, args, &fake_oid, path_in_archive.buf,
 						  path_in_archive.len,
 						  canon_mode(info->stat.st_mode),
 						  content.buf, content.len);
 		} else {
-			err = write_entry(args, &fake_oid,
+			err = write_entry(repo, args, &fake_oid,
 					  path, strlen(path),
 					  canon_mode(info->stat.st_mode),
 					  info->content, info->stat.st_size);
@@ -382,7 +416,7 @@ struct path_exists_context {
 	struct archiver_args *args;
 };
 
-static int reject_entry(const struct object_id *oid UNUSED,
+static int reject_entry(struct repository *repo, const struct object_id *oid UNUSED,
 			struct strbuf *base,
 			const char *filename, unsigned mode,
 			void *context)
@@ -394,7 +428,7 @@ static int reject_entry(const struct object_id *oid UNUSED,
 		struct strbuf sb = STRBUF_INIT;
 		strbuf_addbuf(&sb, base);
 		strbuf_addstr(&sb, filename);
-		if (!match_pathspec(ctx->args->repo->index,
+		if (!match_pathspec(repo->index,
 				    &ctx->pathspec,
 				    sb.buf, sb.len, 0, NULL, 1))
 			ret = READ_TREE_RECURSIVE;
@@ -431,6 +465,7 @@ static void parse_pathspec_arg(const char **pathspec,
 		       PATHSPEC_PREFER_FULL,
 		       "", pathspec);
 	ar_args->pathspec.recursive = 1;
+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
 	if (pathspec) {
 		while (*pathspec) {
 			if (**pathspec && !path_exists(ar_args, *pathspec))
@@ -592,6 +627,7 @@ static int parse_archive_args(int argc, const char **argv,
 	int verbose = 0;
 	int i;
 	int list = 0;
+	int recurse_submodules = 0;
 	int worktree_attributes = 0;
 	struct option opts[] = {
 		OPT_GROUP(""),
@@ -606,6 +642,8 @@ static int parse_archive_args(int argc, const char **argv,
 		  add_file_cb, (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("include submodules in archive")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
 			N_("read .gitattributes in working directory")),
 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
@@ -670,6 +708,7 @@ static int parse_archive_args(int argc, const char **argv,
 	args->base = base;
 	args->baselen = strlen(base);
 	args->worktree_attributes = worktree_attributes;
+	args->recurse_submodules = recurse_submodules;
 
 	return argc;
 }
@@ -708,7 +747,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
 	parse_treeish_arg(argv, &args, prefix, remote);
 	parse_pathspec_arg(argv + 1, &args);
 
-	rc = ar->write_archive(ar, &args);
+	rc = ar->write_archive(ar, repo, &args);
 
 	string_list_clear_func(&args.extra_files, extra_file_info_clear);
 	free(args.refname);
diff --git a/archive.h b/archive.h
index 08bed3ed3af..91b318f9ad8 100644
--- a/archive.h
+++ b/archive.h
@@ -19,6 +19,7 @@ struct archiver_args {
 	timestamp_t time;
 	struct pathspec pathspec;
 	unsigned int verbose : 1;
+	unsigned int recurse_submodules : 1;
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
@@ -41,7 +42,7 @@ const char *archive_format_from_filename(const char *filename);
 #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
 struct archiver {
 	const char *name;
-	int (*write_archive)(const struct archiver *, struct archiver_args *);
+	int (*write_archive)(const struct archiver *, struct repository *repo, struct archiver_args *);
 	unsigned flags;
 	char *filter_command;
 };
@@ -51,12 +52,13 @@ void init_tar_archiver(void);
 void init_zip_archiver(void);
 void init_archivers(void);
 
-typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
+typedef int (*write_archive_entry_fn_t)(struct repository *repo,
+					struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
 					unsigned int mode,
 					void *buffer, unsigned long size);
 
-int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
+int write_archive_entries(struct repository *repo, struct archiver_args *args, write_archive_entry_fn_t write_entry);
 
 #endif	/* ARCHIVE_H */
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 2a132392fbe..1238774b245 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -124,7 +124,7 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
 
 }
 
-static int update_some(const struct object_id *oid, struct strbuf *base,
+static int update_some(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 		       const char *pathname, unsigned mode, void *context UNUSED)
 {
 	int len;
diff --git a/builtin/log.c b/builtin/log.c
index ee19dc5d450..da73bbef836 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -698,7 +698,7 @@ static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 	return 0;
 }
 
-static int show_tree_object(const struct object_id *oid UNUSED,
+static int show_tree_object(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			    struct strbuf *base UNUSED,
 			    const char *pathname, unsigned mode,
 			    void *context)
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 4cf8a236483..a08768dbd2a 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -509,7 +509,7 @@ static int get_common_prefix_len(const char *common_prefix)
 	return common_prefix_len;
 }
 
-static int read_one_entry_opt(struct index_state *istate,
+static int read_one_entry_opt(struct repository *repo UNUSED, struct index_state *istate,
 			      const struct object_id *oid,
 			      struct strbuf *base,
 			      const char *pathname,
@@ -533,12 +533,12 @@ static int read_one_entry_opt(struct index_state *istate,
 	return add_index_entry(istate, ce, opt);
 }
 
-static int read_one_entry(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode,
 				  ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
 }
@@ -547,12 +547,12 @@ static int read_one_entry(const struct object_id *oid, struct strbuf *base,
  * This is used when the caller knows there is no existing entries at
  * the stage that will conflict with the entry being added.
  */
-static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry_quick(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 				const char *pathname, unsigned mode,
 				void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode, ADD_CACHE_JUST_APPEND);
 }
 
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index c3ea09281af..c8d6ff95ac0 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -141,7 +141,7 @@ static int show_recursive(const char *base, size_t baselen, const char *pathname
 	return 0;
 }
 
-static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
+static int show_tree_fmt(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			 const char *pathname, unsigned mode, void *context UNUSED)
 {
 	size_t baselen;
@@ -211,7 +211,7 @@ static void show_tree_common_default_long(struct strbuf *base,
 	strbuf_setlen(base, baselen);
 }
 
-static int show_tree_default(const struct object_id *oid, struct strbuf *base,
+static int show_tree_default(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			     const char *pathname, unsigned mode,
 			     void *context UNUSED)
 {
@@ -229,7 +229,7 @@ static int show_tree_default(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_long(const struct object_id *oid, struct strbuf *base,
+static int show_tree_long(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context UNUSED)
 {
@@ -244,7 +244,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 
 	if (data.type == OBJ_BLOB) {
 		unsigned long size;
-		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
+		if (oid_object_info(repo, data.oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
 			xsnprintf(size_text, sizeof(size_text),
@@ -254,12 +254,12 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 	}
 
 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
-	       find_unique_abbrev(data.oid, abbrev), size_text);
+	       repo_find_unique_abbrev(repo, data.oid, abbrev), size_text);
 	show_tree_common_default_long(base, pathname, data.base->len);
 	return recurse;
 }
 
-static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
+static int show_tree_name_only(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			       const char *pathname, unsigned mode,
 			       void *context UNUSED)
 {
@@ -280,7 +280,7 @@ static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_object(const struct object_id *oid, struct strbuf *base,
+static int show_tree_object(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			    const char *pathname, unsigned mode,
 			    void *context UNUSED)
 {
@@ -292,7 +292,7 @@ static int show_tree_object(const struct object_id *oid, struct strbuf *base,
 	if (early >= 0)
 		return early;
 
-	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
+	printf("%s%c", repo_find_unique_abbrev(repo, oid, abbrev), line_termination);
 	return recurse;
 }
 
diff --git a/list-objects.c b/list-objects.c
index 250d9de41cb..4f463ae32ad 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -185,7 +185,7 @@ static void process_tree(struct traversal_context *ctx,
 	    !revs->include_check_obj(&tree->object, revs->include_check_data))
 		return;
 
-	failed_parse = parse_tree_gently(tree, 1);
+	failed_parse = parse_tree_gently(revs->repo, tree, 1);
 	if (failed_parse) {
 		if (revs->ignore_missing_links)
 			return;
diff --git a/merge-recursive.c b/merge-recursive.c
index 4ddd3adea00..119e86090f9 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -456,7 +456,7 @@ static void unpack_trees_finish(struct merge_options *opt)
 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
 }
 
-static int save_files_dirs(const struct object_id *oid UNUSED,
+static int save_files_dirs(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			   struct strbuf *base, const char *path,
 			   unsigned int mode, void *context)
 {
diff --git a/revision.c b/revision.c
index 36e31942cee..39e023a19ea 100644
--- a/revision.c
+++ b/revision.c
@@ -74,7 +74,7 @@ static void mark_tree_contents_uninteresting(struct repository *r,
 	struct tree_desc desc;
 	struct name_entry entry;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
@@ -181,7 +181,7 @@ static void add_children_by_path(struct repository *r,
 	if (!tree)
 		return;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
diff --git a/sparse-index.c b/sparse-index.c
index e4a54ce1943..05ca8aec1fb 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -232,7 +232,7 @@ static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
 	add_name_hash(istate, ce);
 }
 
-static int add_path_to_index(const struct object_id *oid,
+static int add_path_to_index(struct repository *repo UNUSED, const struct object_id *oid,
 			     struct strbuf *base, const char *path,
 			     unsigned int mode, void *context)
 {
diff --git a/t/t5005-archive-submodules.sh b/t/t5005-archive-submodules.sh
new file mode 100755
index 00000000000..49d5ff74ad5
--- /dev/null
+++ b/t/t5005-archive-submodules.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+test_description='git archive --recurse-submodules test'
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-submodule-update.sh
+
+test_expect_success 'setup' '
+	create_lib_submodule_repo &&
+	git -C submodule_update_repo checkout valid_sub1 &&
+	git -C submodule_update_repo submodule update
+'
+
+check_tar() {
+	tarfile=$1.tar
+	listfile=$1.lst
+	dir=$1
+	dir_with_prefix=$dir/$2
+
+	test_expect_success ' extract tar archive' '
+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
+	'
+}
+
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success " validate extra file $path_in_archive" '
+		test -f $dir/$path_in_archive &&
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
+check_not_added() {
+	dir=$1
+	path_in_archive=$2
+
+	test_expect_success " validate unpresent file $path_in_archive" '
+		! test -f $dir/$path_in_archive &&
+		! test -d $dir/$path_in_archive
+	'
+}
+
+test_expect_success 'archive without recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update archive HEAD >b.tar
+'
+
+check_tar b
+check_added b submodule_update/file1 file1
+check_not_added b sub1/file1
+
+test_expect_success 'archive with recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	! git -C submodule_update archive --recurse-submodules HEAD >b2-err.tar
+'
+
+test_expect_success 'archive with recurse, init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b2.tar
+'
+
+check_tar b2
+check_added b2 submodule_update/sub1/file1 sub1/file1
+
+test_expect_success 'archive with recurse with big files' '
+	reset_work_tree_to valid_sub1 &&
+	test_config core.bigfilethreshold 1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b3.tar
+'
+
+check_tar b3
+check_added b3 submodule_update/sub1/file1 sub1/file1
+
+
+test_done
diff --git a/tree.c b/tree.c
index 410e3b477e5..c5b5a0ac08f 100644
--- a/tree.c
+++ b/tree.c
@@ -8,6 +8,7 @@
 #include "alloc.h"
 #include "tree-walk.h"
 #include "repository.h"
+#include "pathspec.h"
 
 const char *tree_type = "tree";
 
@@ -22,8 +23,8 @@ int read_tree_at(struct repository *r,
 	int len, oldlen = base->len;
 	enum interesting retval = entry_not_interesting;
 
-	if (parse_tree(tree))
-		return -1;
+	if (repo_parse_tree(r, tree))
+		die("Failed to parse tree");
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
 
@@ -37,7 +38,7 @@ int read_tree_at(struct repository *r,
 				continue;
 		}
 
-		switch (fn(&entry.oid, base,
+		switch (fn(r, &entry.oid, base,
 			   entry.path, entry.mode, context)) {
 		case 0:
 			continue;
@@ -47,36 +48,57 @@ int read_tree_at(struct repository *r,
 			return -1;
 		}
 
-		if (S_ISDIR(entry.mode))
+		if (S_ISDIR(entry.mode)) {
 			oidcpy(&oid, &entry.oid);
-		else if (S_ISGITLINK(entry.mode)) {
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(r, lookup_tree(r, &oid),
+						base, pathspec,
+						fn, context);
+			strbuf_setlen(base, oldlen);
+			if (retval)
+				return -1;
+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
 			struct commit *commit;
+			struct repository subrepo;
+			struct repository* subrepo_p = &subrepo;
+			struct tree* submodule_tree;
 
-			commit = lookup_commit(r, &entry.oid);
+			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
+				die("couldn't init submodule %s%s", base->buf, entry.path);
+
+			if (repo_read_index(subrepo_p) < 0)
+				die("index file corrupt");
+
+			commit = lookup_commit(subrepo_p, &entry.oid);
 			if (!commit)
 				die("Commit %s in submodule path %s%s not found",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			if (parse_commit(commit))
+			if (repo_parse_commit(subrepo_p, commit))
 				die("Invalid commit %s in submodule path %s%s",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			oidcpy(&oid, get_commit_tree_oid(commit));
+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
+
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
+						base, pathspec,
+						fn, context);
+			if (retval) {
+			    die("failed to read tree for %s%s", base->buf, entry.path);
+			    return -1;
+			}
+			strbuf_setlen(base, oldlen);
+			repo_clear(subrepo_p);
 		}
-		else
-			continue;
 
-		len = tree_entry_len(&entry);
-		strbuf_add(base, entry.path, len);
-		strbuf_addch(base, '/');
-		retval = read_tree_at(r, lookup_tree(r, &oid),
-				      base, pathspec,
-				      fn, context);
-		strbuf_setlen(base, oldlen);
-		if (retval)
-			return -1;
 	}
 	return 0;
 }
@@ -121,7 +143,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 	return 0;
 }
 
-int parse_tree_gently(struct tree *item, int quiet_on_missing)
+int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
 {
 	 enum object_type type;
 	 void *buffer;
@@ -129,7 +151,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing)
 
 	if (item->object.parsed)
 		return 0;
-	buffer = read_object_file(&item->object.oid, &type, &size);
+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
 	if (!buffer)
 		return quiet_on_missing ? -1 :
 			error("Could not read %s",
diff --git a/tree.h b/tree.h
index 6efff003e21..3dc8f151760 100644
--- a/tree.h
+++ b/tree.h
@@ -18,11 +18,14 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
 
 int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
 
-int parse_tree_gently(struct tree *tree, int quiet_on_missing);
-static inline int parse_tree(struct tree *tree)
+int parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
 {
-	return parse_tree_gently(tree, 0);
+	return parse_tree_gently(r, tree, 0);
 }
+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
+#endif
 void free_tree_buffer(struct tree *tree);
 
 /* Parses and returns the tree in the given ent, chasing tags and commits. */
@@ -31,7 +34,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid);
 int cmp_cache_name_compare(const void *a_, const void *b_);
 
 #define READ_TREE_RECURSIVE 1
-typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
+typedef int (*read_tree_fn_t)(struct repository *, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
 
 int read_tree_at(struct repository *r,
 		 struct tree *tree, struct strbuf *base,
diff --git a/wt-status.c b/wt-status.c
index 5813174896c..fefe4c55e1b 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -665,7 +665,7 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	release_revisions(&rev);
 }
 
-static int add_file_to_list(const struct object_id *oid,
+static int add_file_to_list(struct repository *repo UNUSED, const struct object_id *oid,
 			    struct strbuf *base, const char *path,
 			    unsigned int mode, void *context)
 {

base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2022-10-28 18:19 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-12 17:52 [PATCH] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
2022-10-13 17:53     ` René Scharfe
2022-10-13 21:37       ` Heather Lapointe
2022-10-13 11:36   ` [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal Alphadelta14 via GitGitGadget
2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
2022-10-13 21:23     ` Heather Lapointe
2022-10-14  9:47       ` René Scharfe
2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
2022-10-17 13:26       ` Junio C Hamano
2022-10-26 22:33       ` Glen Choo
2022-10-27 18:09       ` Jonathan Tan
2022-10-27 18:50         ` Junio C Hamano
2022-10-17  2:23     ` [PATCH v3 2/9] tree: update cases to use repo_ tree methods Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
2022-10-17 13:34       ` Phillip Wood
2022-10-17 13:36       ` Junio C Hamano
2022-10-27 18:28       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
2022-10-17 13:48       ` Phillip Wood
2022-10-17 13:56       ` Junio C Hamano
2022-10-26 22:48       ` Glen Choo
2022-10-27 18:43       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
2022-10-17 13:50       ` Phillip Wood
2022-10-17  2:23     ` [PATCH v3 7/9] archive: remove global repository from archive_args Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-26 23:34       ` Glen Choo
2022-10-27  7:09         ` René Scharfe
2022-10-27 17:29           ` Glen Choo
2022-10-27 17:30           ` Glen Choo
2022-10-27 17:33           ` Glen Choo
2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
2022-10-27 18:54       ` Jonathan Tan
2022-10-27 23:30         ` Glen Choo
2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
2022-10-17 13:57     ` [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command Phillip Wood
2022-10-18 18:34     ` Junio C Hamano
2022-10-18 18:48       ` Heather Lapointe
2022-10-19 16:16         ` Junio C Hamano
2022-10-19 20:44           ` Junio C Hamano
2022-10-20  1:21             ` Junio C Hamano
2022-10-21  1:43               ` Junio C Hamano
2022-10-26 22:14     ` Glen Choo
2022-10-28 18:18       ` Heather Lapointe

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).