git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] archive: add --recurse-submodules to git-archive command
@ 2022-10-12 17:52 Heather Lapointe via GitGitGadget
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
  0 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-12 17:52 UTC (permalink / raw)
  To: git; +Cc: Heather Lapointe, Alphadelta14

From: Alphadelta14 <alpha@alphaservcomputing.solutions>

This makes it possible to include submodule contents in an archive command.

This required updating the general read_tree callbacks to support sub-repos
by not using the_repository global references where possible.

archive: update streaming to use target repo
archive: add test cases for git archive --recurse-submodules

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
    archive: Add --recurse-submodules to git-archive command
    
    This makes it possible to include submodule contents in an archive
    command.
    
    The inspiration for this change comes from this Github thread,
    https://github.com/dear-github/dear-github/issues/214, with at least 160
    👍🏻 's at the time of writing. (I stumbled upon it because I wanted it
    as well).
    
    I figured the underlying implementation wouldn't be too difficult with
    most of the plumbing already in place, so I decided to add the relevant
    logic to the client git-archive command.
    
    One of the trickier parts of this implementation involved teaching
    read_tree about submodules. Some of the troublesome areas were still
    using the the_repository references to look up commit or tree or oid
    information. I ended up deciding that read_tree_fn_t would probably be
    best off having a concrete repo reference since it allows changing the
    context to a subrepo where needed (even though some of the usages did
    not need it specifically).
    
    I am open to feedback since this is all quite new to me :)
    
    TODO:
    
     * working implementation
     * valgrind
     * add regression tests
     * update documentation with new flag
     * submit to mailing list

Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v1
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v1
Pull-Request: https://github.com/git/git/pull/1359

 archive-tar.c                 | 14 +++--
 archive-zip.c                 | 14 ++---
 archive.c                     | 99 ++++++++++++++++++++++++-----------
 archive.h                     |  8 +--
 builtin/checkout.c            |  2 +-
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            | 10 ++--
 builtin/ls-tree.c             | 16 +++---
 list-objects.c                |  2 +-
 merge-recursive.c             |  2 +-
 revision.c                    |  4 +-
 sparse-index.c                |  2 +-
 t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++
 tree.c                        | 64 ++++++++++++++--------
 tree.h                        | 11 ++--
 wt-status.c                   |  2 +-
 16 files changed, 246 insertions(+), 90 deletions(-)
 create mode 100755 t/t5005-archive-submodules.sh

diff --git a/archive-tar.c b/archive-tar.c
index 3e4822b6840..331fc10bca9 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -18,6 +18,7 @@ static unsigned long offset;
 static int tar_umask = 002;
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args);
 
 /*
@@ -246,7 +247,8 @@ static void write_extended_header(struct archiver_args *args,
 	write_blocked(buffer, size);
 }
 
-static int write_tar_entry(struct archiver_args *args,
+static int write_tar_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -316,7 +318,7 @@ static int write_tar_entry(struct archiver_args *args,
 		if (buffer)
 			write_blocked(buffer, size);
 		else
-			err = stream_blocked(args->repo, oid);
+			err = stream_blocked(repo, oid);
 	}
 	return err;
 }
@@ -422,12 +424,13 @@ static int git_tar_config(const char *var, const char *value, void *cb)
 }
 
 static int write_tar_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err = 0;
 
 	write_global_extended_header(args);
-	err = write_archive_entries(args, write_tar_entry);
+	err = write_archive_entries(repo, args, write_tar_entry);
 	if (!err)
 		write_trailer();
 	return err;
@@ -462,6 +465,7 @@ static void tgz_write_block(const void *data)
 static const char internal_gzip_command[] = "git archive gzip";
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args)
 {
 #if ZLIB_VERNUM >= 0x1221
@@ -484,7 +488,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		gzstream.next_out = outbuf;
 		gzstream.avail_out = sizeof(outbuf);
 
-		r = write_tar_archive(ar, args);
+		r = write_tar_archive(ar, repo, args);
 
 		tgz_deflate(Z_FINISH);
 		git_deflate_end(&gzstream);
@@ -506,7 +510,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		die_errno(_("unable to redirect descriptor"));
 	close(filter.in);
 
-	r = write_tar_archive(ar, args);
+	r = write_tar_archive(ar, repo, args);
 
 	close(1);
 	if (finish_command(&filter) != 0)
diff --git a/archive-zip.c b/archive-zip.c
index 0456f1ebf15..6b27d004b5d 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -283,7 +283,8 @@ static int entry_is_binary(struct index_state *istate, const char *path,
 
 #define STREAM_BUFFER_SIZE (1024 * 16)
 
-static int write_zip_entry(struct archiver_args *args,
+static int write_zip_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -340,7 +341,7 @@ static int write_zip_entry(struct archiver_args *args,
 
 		if (!buffer) {
 			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
+			stream = open_istream(repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
@@ -349,7 +350,7 @@ static int write_zip_entry(struct archiver_args *args,
 			out = NULL;
 		} else {
 			crc = crc32(crc, buffer, size);
-			is_binary = entry_is_binary(args->repo->index,
+			is_binary = entry_is_binary(repo->index,
 						    path_without_prefix,
 						    buffer, size);
 			out = buffer;
@@ -426,7 +427,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
@@ -459,7 +460,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 
@@ -619,6 +620,7 @@ static int archive_zip_config(const char *var, const char *value,
 }
 
 static int write_zip_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err;
@@ -629,7 +631,7 @@ static int write_zip_archive(const struct archiver *ar UNUSED,
 
 	strbuf_init(&zip_dir, 0);
 
-	err = write_archive_entries(args, write_zip_entry);
+	err = write_archive_entries(repo, args, write_zip_entry);
 	if (!err)
 		write_zip_trailer(args->commit_oid);
 
diff --git a/archive.c b/archive.c
index 61a79e4a227..2ee7205679d 100644
--- a/archive.c
+++ b/archive.c
@@ -10,6 +10,7 @@
 #include "unpack-trees.h"
 #include "dir.h"
 #include "quote.h"
+#include "submodule.h"
 
 static char const * const archive_usage[] = {
 	N_("git archive [<options>] <tree-ish> [<path>...]"),
@@ -69,6 +70,7 @@ static void format_subst(const struct commit *commit,
 }
 
 static void *object_file_to_archive(const struct archiver_args *args,
+				    struct repository *repo,
 				    const char *path,
 				    const struct object_id *oid,
 				    unsigned int mode,
@@ -84,13 +86,13 @@ static void *object_file_to_archive(const struct archiver_args *args,
 			       (args->tree ? &args->tree->object.oid : NULL), oid);
 
 	path += args->baselen;
-	buffer = read_object_file(oid, type, sizep);
+	buffer = repo_read_object_file(repo, oid, type, sizep);
 	if (buffer && S_ISREG(mode)) {
 		struct strbuf buf = STRBUF_INIT;
 		size_t size = 0;
 
 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
-		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
 		if (commit)
 			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
 		buffer = strbuf_detach(&buf, &size);
@@ -134,7 +136,7 @@ static int check_attr_export_subst(const struct attr_check *check)
 	return check && ATTR_TRUE(check->items[1].value);
 }
 
-static int write_archive_entry(const struct object_id *oid, const char *base,
+static int write_archive_entry(struct repository *repo, const struct object_id *oid, const char *base,
 		int baselen, const char *filename, unsigned mode,
 		void *context)
 {
@@ -160,7 +162,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	if (!S_ISDIR(mode)) {
 		const struct attr_check *check;
-		check = get_archive_attrs(args->repo->index, path_without_prefix);
+		check = get_archive_attrs(repo->index, path_without_prefix);
 		if (check_attr_export_ignore(check))
 			return 0;
 		args->convert = check_attr_export_subst(check);
@@ -169,10 +171,10 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
+		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
-		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
+		return READ_TREE_RECURSIVE;
 	}
 
 	if (args->verbose)
@@ -180,14 +182,19 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	/* Stream it? */
 	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
-	    size > big_file_threshold)
-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
+	    size > big_file_threshold) {
+			err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
+			if (err) {
+				die("Failed to write file %.*s", (int)path.len, path.buf);
+			}
+			return err;
+		}
 
-	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	buffer = object_file_to_archive(args, repo, path.buf, oid, mode, &type, &size);
 	if (!buffer)
 		return error(_("cannot read '%s'"), oid_to_hex(oid));
-	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
 	free(buffer);
 	return err;
 }
@@ -207,7 +214,25 @@ static void queue_directory(const struct object_id *oid,
 	oidcpy(&d->oid, oid);
 }
 
-static int write_directory(struct archiver_context *c)
+static void queue_submodule(struct repository *superproject,
+		const struct object_id *oid,
+		struct strbuf *base, const char *filename,
+		unsigned mode, struct archiver_context *c)
+{
+	struct repository subrepo;
+
+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
+		return;
+
+	if (repo_read_index(&subrepo) < 0)
+		die("index file corrupt");
+
+    queue_directory(oid, base, filename, mode, c);
+
+	repo_clear(&subrepo);
+}
+
+static int write_directory(struct repository *repo, struct archiver_context *c)
 {
 	struct directory *d = c->bottom;
 	int ret;
@@ -217,15 +242,18 @@ static int write_directory(struct archiver_context *c)
 	c->bottom = d->up;
 	d->path[d->len - 1] = '\0'; /* no trailing slash */
 	ret =
-		write_directory(c) ||
-		write_archive_entry(&d->oid, d->path, d->baselen,
+		write_directory(repo, c) ||
+		write_archive_entry(repo, &d->oid, d->path, d->baselen,
 				    d->path + d->baselen, d->mode,
-				    c) != READ_TREE_RECURSIVE;
+				    c);
 	free(d);
-	return ret ? -1 : 0;
+	if (ret == READ_TREE_RECURSIVE)
+		return 0;
+	return ret;
 }
 
-static int queue_or_write_archive_entry(const struct object_id *oid,
+static int queue_or_write_archive_entry(
+		struct repository *repo, const struct object_id *oid,
 		struct strbuf *base, const char *filename,
 		unsigned mode, void *context)
 {
@@ -246,18 +274,23 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
 		/* Borrow base, but restore its original value when done. */
 		strbuf_addstr(base, filename);
 		strbuf_addch(base, '/');
-		check = get_archive_attrs(c->args->repo->index, base->buf);
+		check = get_archive_attrs(repo->index, base->buf);
 		strbuf_setlen(base, baselen);
 
 		if (check_attr_export_ignore(check))
 			return 0;
 		queue_directory(oid, base, filename, mode, c);
 		return READ_TREE_RECURSIVE;
+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
+		if (is_submodule_active(repo, filename)) {
+			queue_submodule(repo, oid, base, filename, mode, c);
+			return READ_TREE_RECURSIVE;
+		}
 	}
 
-	if (write_directory(c))
+	if (write_directory(repo, c))
 		return -1;
-	return write_archive_entry(oid, base->buf, base->len, filename, mode,
+	return write_archive_entry(repo, oid, base->buf, base->len, filename, mode,
 				   context);
 }
 
@@ -267,7 +300,8 @@ struct extra_file_info {
 	void *content;
 };
 
-int write_archive_entries(struct archiver_args *args,
+int write_archive_entries(struct repository *repo,
+		struct archiver_args *args,
 		write_archive_entry_fn_t write_entry)
 {
 	struct archiver_context context;
@@ -288,7 +322,7 @@ int write_archive_entries(struct archiver_args *args,
 			len--;
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
-		err = write_entry(args, &args->tree->object.oid, args->base,
+		err = write_entry(repo, args, &args->tree->object.oid, args->base,
 				  len, 040777, NULL, 0);
 		if (err)
 			return err;
@@ -305,8 +339,8 @@ int write_archive_entries(struct archiver_args *args,
 		memset(&opts, 0, sizeof(opts));
 		opts.index_only = 1;
 		opts.head_idx = -1;
-		opts.src_index = args->repo->index;
-		opts.dst_index = args->repo->index;
+		opts.src_index = repo->index;
+		opts.dst_index = repo->index;
 		opts.fn = oneway_merge;
 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
 		if (unpack_trees(1, &t, &opts))
@@ -314,7 +348,7 @@ int write_archive_entries(struct archiver_args *args,
 		git_attr_set_direction(GIT_ATTR_INDEX);
 	}
 
-	err = read_tree(args->repo, args->tree,
+	err = read_tree(repo, args->tree,
 			&args->pathspec,
 			queue_or_write_archive_entry,
 			&context);
@@ -343,12 +377,12 @@ int write_archive_entries(struct archiver_args *args,
 			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
 				err = error_errno(_("cannot read '%s'"), path);
 			else
-				err = write_entry(args, &fake_oid, path_in_archive.buf,
+				err = write_entry(repo, args, &fake_oid, path_in_archive.buf,
 						  path_in_archive.len,
 						  canon_mode(info->stat.st_mode),
 						  content.buf, content.len);
 		} else {
-			err = write_entry(args, &fake_oid,
+			err = write_entry(repo, args, &fake_oid,
 					  path, strlen(path),
 					  canon_mode(info->stat.st_mode),
 					  info->content, info->stat.st_size);
@@ -382,7 +416,7 @@ struct path_exists_context {
 	struct archiver_args *args;
 };
 
-static int reject_entry(const struct object_id *oid UNUSED,
+static int reject_entry(struct repository *repo, const struct object_id *oid UNUSED,
 			struct strbuf *base,
 			const char *filename, unsigned mode,
 			void *context)
@@ -394,7 +428,7 @@ static int reject_entry(const struct object_id *oid UNUSED,
 		struct strbuf sb = STRBUF_INIT;
 		strbuf_addbuf(&sb, base);
 		strbuf_addstr(&sb, filename);
-		if (!match_pathspec(ctx->args->repo->index,
+		if (!match_pathspec(repo->index,
 				    &ctx->pathspec,
 				    sb.buf, sb.len, 0, NULL, 1))
 			ret = READ_TREE_RECURSIVE;
@@ -431,6 +465,7 @@ static void parse_pathspec_arg(const char **pathspec,
 		       PATHSPEC_PREFER_FULL,
 		       "", pathspec);
 	ar_args->pathspec.recursive = 1;
+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
 	if (pathspec) {
 		while (*pathspec) {
 			if (**pathspec && !path_exists(ar_args, *pathspec))
@@ -592,6 +627,7 @@ static int parse_archive_args(int argc, const char **argv,
 	int verbose = 0;
 	int i;
 	int list = 0;
+	int recurse_submodules = 0;
 	int worktree_attributes = 0;
 	struct option opts[] = {
 		OPT_GROUP(""),
@@ -606,6 +642,8 @@ static int parse_archive_args(int argc, const char **argv,
 		  add_file_cb, (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("include submodules in archive")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
 			N_("read .gitattributes in working directory")),
 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
@@ -670,6 +708,7 @@ static int parse_archive_args(int argc, const char **argv,
 	args->base = base;
 	args->baselen = strlen(base);
 	args->worktree_attributes = worktree_attributes;
+	args->recurse_submodules = recurse_submodules;
 
 	return argc;
 }
@@ -708,7 +747,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
 	parse_treeish_arg(argv, &args, prefix, remote);
 	parse_pathspec_arg(argv + 1, &args);
 
-	rc = ar->write_archive(ar, &args);
+	rc = ar->write_archive(ar, repo, &args);
 
 	string_list_clear_func(&args.extra_files, extra_file_info_clear);
 	free(args.refname);
diff --git a/archive.h b/archive.h
index 08bed3ed3af..91b318f9ad8 100644
--- a/archive.h
+++ b/archive.h
@@ -19,6 +19,7 @@ struct archiver_args {
 	timestamp_t time;
 	struct pathspec pathspec;
 	unsigned int verbose : 1;
+	unsigned int recurse_submodules : 1;
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
@@ -41,7 +42,7 @@ const char *archive_format_from_filename(const char *filename);
 #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
 struct archiver {
 	const char *name;
-	int (*write_archive)(const struct archiver *, struct archiver_args *);
+	int (*write_archive)(const struct archiver *, struct repository *repo, struct archiver_args *);
 	unsigned flags;
 	char *filter_command;
 };
@@ -51,12 +52,13 @@ void init_tar_archiver(void);
 void init_zip_archiver(void);
 void init_archivers(void);
 
-typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
+typedef int (*write_archive_entry_fn_t)(struct repository *repo,
+					struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
 					unsigned int mode,
 					void *buffer, unsigned long size);
 
-int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
+int write_archive_entries(struct repository *repo, struct archiver_args *args, write_archive_entry_fn_t write_entry);
 
 #endif	/* ARCHIVE_H */
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 2a132392fbe..1238774b245 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -124,7 +124,7 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
 
 }
 
-static int update_some(const struct object_id *oid, struct strbuf *base,
+static int update_some(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 		       const char *pathname, unsigned mode, void *context UNUSED)
 {
 	int len;
diff --git a/builtin/log.c b/builtin/log.c
index ee19dc5d450..da73bbef836 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -698,7 +698,7 @@ static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 	return 0;
 }
 
-static int show_tree_object(const struct object_id *oid UNUSED,
+static int show_tree_object(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			    struct strbuf *base UNUSED,
 			    const char *pathname, unsigned mode,
 			    void *context)
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 4cf8a236483..a08768dbd2a 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -509,7 +509,7 @@ static int get_common_prefix_len(const char *common_prefix)
 	return common_prefix_len;
 }
 
-static int read_one_entry_opt(struct index_state *istate,
+static int read_one_entry_opt(struct repository *repo UNUSED, struct index_state *istate,
 			      const struct object_id *oid,
 			      struct strbuf *base,
 			      const char *pathname,
@@ -533,12 +533,12 @@ static int read_one_entry_opt(struct index_state *istate,
 	return add_index_entry(istate, ce, opt);
 }
 
-static int read_one_entry(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode,
 				  ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
 }
@@ -547,12 +547,12 @@ static int read_one_entry(const struct object_id *oid, struct strbuf *base,
  * This is used when the caller knows there is no existing entries at
  * the stage that will conflict with the entry being added.
  */
-static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry_quick(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 				const char *pathname, unsigned mode,
 				void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode, ADD_CACHE_JUST_APPEND);
 }
 
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index c3ea09281af..c8d6ff95ac0 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -141,7 +141,7 @@ static int show_recursive(const char *base, size_t baselen, const char *pathname
 	return 0;
 }
 
-static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
+static int show_tree_fmt(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			 const char *pathname, unsigned mode, void *context UNUSED)
 {
 	size_t baselen;
@@ -211,7 +211,7 @@ static void show_tree_common_default_long(struct strbuf *base,
 	strbuf_setlen(base, baselen);
 }
 
-static int show_tree_default(const struct object_id *oid, struct strbuf *base,
+static int show_tree_default(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			     const char *pathname, unsigned mode,
 			     void *context UNUSED)
 {
@@ -229,7 +229,7 @@ static int show_tree_default(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_long(const struct object_id *oid, struct strbuf *base,
+static int show_tree_long(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context UNUSED)
 {
@@ -244,7 +244,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 
 	if (data.type == OBJ_BLOB) {
 		unsigned long size;
-		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
+		if (oid_object_info(repo, data.oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
 			xsnprintf(size_text, sizeof(size_text),
@@ -254,12 +254,12 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 	}
 
 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
-	       find_unique_abbrev(data.oid, abbrev), size_text);
+	       repo_find_unique_abbrev(repo, data.oid, abbrev), size_text);
 	show_tree_common_default_long(base, pathname, data.base->len);
 	return recurse;
 }
 
-static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
+static int show_tree_name_only(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			       const char *pathname, unsigned mode,
 			       void *context UNUSED)
 {
@@ -280,7 +280,7 @@ static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_object(const struct object_id *oid, struct strbuf *base,
+static int show_tree_object(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			    const char *pathname, unsigned mode,
 			    void *context UNUSED)
 {
@@ -292,7 +292,7 @@ static int show_tree_object(const struct object_id *oid, struct strbuf *base,
 	if (early >= 0)
 		return early;
 
-	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
+	printf("%s%c", repo_find_unique_abbrev(repo, oid, abbrev), line_termination);
 	return recurse;
 }
 
diff --git a/list-objects.c b/list-objects.c
index 250d9de41cb..4f463ae32ad 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -185,7 +185,7 @@ static void process_tree(struct traversal_context *ctx,
 	    !revs->include_check_obj(&tree->object, revs->include_check_data))
 		return;
 
-	failed_parse = parse_tree_gently(tree, 1);
+	failed_parse = parse_tree_gently(revs->repo, tree, 1);
 	if (failed_parse) {
 		if (revs->ignore_missing_links)
 			return;
diff --git a/merge-recursive.c b/merge-recursive.c
index 4ddd3adea00..119e86090f9 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -456,7 +456,7 @@ static void unpack_trees_finish(struct merge_options *opt)
 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
 }
 
-static int save_files_dirs(const struct object_id *oid UNUSED,
+static int save_files_dirs(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			   struct strbuf *base, const char *path,
 			   unsigned int mode, void *context)
 {
diff --git a/revision.c b/revision.c
index 36e31942cee..39e023a19ea 100644
--- a/revision.c
+++ b/revision.c
@@ -74,7 +74,7 @@ static void mark_tree_contents_uninteresting(struct repository *r,
 	struct tree_desc desc;
 	struct name_entry entry;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
@@ -181,7 +181,7 @@ static void add_children_by_path(struct repository *r,
 	if (!tree)
 		return;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
diff --git a/sparse-index.c b/sparse-index.c
index e4a54ce1943..05ca8aec1fb 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -232,7 +232,7 @@ static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
 	add_name_hash(istate, ce);
 }
 
-static int add_path_to_index(const struct object_id *oid,
+static int add_path_to_index(struct repository *repo UNUSED, const struct object_id *oid,
 			     struct strbuf *base, const char *path,
 			     unsigned int mode, void *context)
 {
diff --git a/t/t5005-archive-submodules.sh b/t/t5005-archive-submodules.sh
new file mode 100755
index 00000000000..49d5ff74ad5
--- /dev/null
+++ b/t/t5005-archive-submodules.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+test_description='git archive --recurse-submodules test'
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-submodule-update.sh
+
+test_expect_success 'setup' '
+	create_lib_submodule_repo &&
+	git -C submodule_update_repo checkout valid_sub1 &&
+	git -C submodule_update_repo submodule update
+'
+
+check_tar() {
+	tarfile=$1.tar
+	listfile=$1.lst
+	dir=$1
+	dir_with_prefix=$dir/$2
+
+	test_expect_success ' extract tar archive' '
+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
+	'
+}
+
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success " validate extra file $path_in_archive" '
+		test -f $dir/$path_in_archive &&
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
+check_not_added() {
+	dir=$1
+	path_in_archive=$2
+
+	test_expect_success " validate unpresent file $path_in_archive" '
+		! test -f $dir/$path_in_archive &&
+		! test -d $dir/$path_in_archive
+	'
+}
+
+test_expect_success 'archive without recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update archive HEAD >b.tar
+'
+
+check_tar b
+check_added b submodule_update/file1 file1
+check_not_added b sub1/file1
+
+test_expect_success 'archive with recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	! git -C submodule_update archive --recurse-submodules HEAD >b2-err.tar
+'
+
+test_expect_success 'archive with recurse, init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b2.tar
+'
+
+check_tar b2
+check_added b2 submodule_update/sub1/file1 sub1/file1
+
+test_expect_success 'archive with recurse with big files' '
+	reset_work_tree_to valid_sub1 &&
+	test_config core.bigfilethreshold 1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b3.tar
+'
+
+check_tar b3
+check_added b3 submodule_update/sub1/file1 sub1/file1
+
+
+test_done
diff --git a/tree.c b/tree.c
index 410e3b477e5..c5b5a0ac08f 100644
--- a/tree.c
+++ b/tree.c
@@ -8,6 +8,7 @@
 #include "alloc.h"
 #include "tree-walk.h"
 #include "repository.h"
+#include "pathspec.h"
 
 const char *tree_type = "tree";
 
@@ -22,8 +23,8 @@ int read_tree_at(struct repository *r,
 	int len, oldlen = base->len;
 	enum interesting retval = entry_not_interesting;
 
-	if (parse_tree(tree))
-		return -1;
+	if (repo_parse_tree(r, tree))
+		die("Failed to parse tree");
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
 
@@ -37,7 +38,7 @@ int read_tree_at(struct repository *r,
 				continue;
 		}
 
-		switch (fn(&entry.oid, base,
+		switch (fn(r, &entry.oid, base,
 			   entry.path, entry.mode, context)) {
 		case 0:
 			continue;
@@ -47,36 +48,57 @@ int read_tree_at(struct repository *r,
 			return -1;
 		}
 
-		if (S_ISDIR(entry.mode))
+		if (S_ISDIR(entry.mode)) {
 			oidcpy(&oid, &entry.oid);
-		else if (S_ISGITLINK(entry.mode)) {
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(r, lookup_tree(r, &oid),
+						base, pathspec,
+						fn, context);
+			strbuf_setlen(base, oldlen);
+			if (retval)
+				return -1;
+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
 			struct commit *commit;
+			struct repository subrepo;
+			struct repository* subrepo_p = &subrepo;
+			struct tree* submodule_tree;
 
-			commit = lookup_commit(r, &entry.oid);
+			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
+				die("couldn't init submodule %s%s", base->buf, entry.path);
+
+			if (repo_read_index(subrepo_p) < 0)
+				die("index file corrupt");
+
+			commit = lookup_commit(subrepo_p, &entry.oid);
 			if (!commit)
 				die("Commit %s in submodule path %s%s not found",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			if (parse_commit(commit))
+			if (repo_parse_commit(subrepo_p, commit))
 				die("Invalid commit %s in submodule path %s%s",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			oidcpy(&oid, get_commit_tree_oid(commit));
+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
+
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
+						base, pathspec,
+						fn, context);
+			if (retval) {
+			    die("failed to read tree for %s%s", base->buf, entry.path);
+			    return -1;
+			}
+			strbuf_setlen(base, oldlen);
+			repo_clear(subrepo_p);
 		}
-		else
-			continue;
 
-		len = tree_entry_len(&entry);
-		strbuf_add(base, entry.path, len);
-		strbuf_addch(base, '/');
-		retval = read_tree_at(r, lookup_tree(r, &oid),
-				      base, pathspec,
-				      fn, context);
-		strbuf_setlen(base, oldlen);
-		if (retval)
-			return -1;
 	}
 	return 0;
 }
@@ -121,7 +143,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 	return 0;
 }
 
-int parse_tree_gently(struct tree *item, int quiet_on_missing)
+int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
 {
 	 enum object_type type;
 	 void *buffer;
@@ -129,7 +151,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing)
 
 	if (item->object.parsed)
 		return 0;
-	buffer = read_object_file(&item->object.oid, &type, &size);
+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
 	if (!buffer)
 		return quiet_on_missing ? -1 :
 			error("Could not read %s",
diff --git a/tree.h b/tree.h
index 6efff003e21..3dc8f151760 100644
--- a/tree.h
+++ b/tree.h
@@ -18,11 +18,14 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
 
 int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
 
-int parse_tree_gently(struct tree *tree, int quiet_on_missing);
-static inline int parse_tree(struct tree *tree)
+int parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
 {
-	return parse_tree_gently(tree, 0);
+	return parse_tree_gently(r, tree, 0);
 }
+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
+#endif
 void free_tree_buffer(struct tree *tree);
 
 /* Parses and returns the tree in the given ent, chasing tags and commits. */
@@ -31,7 +34,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid);
 int cmp_cache_name_compare(const void *a_, const void *b_);
 
 #define READ_TREE_RECURSIVE 1
-typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
+typedef int (*read_tree_fn_t)(struct repository *, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
 
 int read_tree_at(struct repository *r,
 		 struct tree *tree, struct strbuf *base,
diff --git a/wt-status.c b/wt-status.c
index 5813174896c..fefe4c55e1b 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -665,7 +665,7 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	release_revisions(&rev);
 }
 
-static int add_file_to_list(const struct object_id *oid,
+static int add_file_to_list(struct repository *repo UNUSED, const struct object_id *oid,
 			    struct strbuf *base, const char *path,
 			    unsigned int mode, void *context)
 {

base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command
  2022-10-12 17:52 [PATCH] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
@ 2022-10-13 11:35 ` Heather Lapointe via GitGitGadget
  2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
                     ` (3 more replies)
  0 siblings, 4 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-13 11:35 UTC (permalink / raw)
  To: git; +Cc: Heather Lapointe

This makes it possible to include submodule contents in an archive command.

The inspiration for this change comes from this Github thread,
https://github.com/dear-github/dear-github/issues/214, with at least 160
👍🏻 's at the time of writing. (I stumbled upon it because I wanted it as
well).

I figured the underlying implementation wouldn't be too difficult with most
of the plumbing already in place, so I decided to add the relevant logic to
the client git-archive command.

One of the trickier parts of this implementation involved teaching read_tree
about submodules. Some of the troublesome areas were still using the
the_repository references to look up commit or tree or oid information. I
ended up deciding that read_tree_fn_t would probably be best off having a
concrete repo reference since it allows changing the context to a subrepo
where needed (even though some of the usages did not need it specifically).

I am open to feedback since this is all quite new to me :)

TODO:

 * working implementation
 * valgrind
 * add regression tests
 * update documentation with new flag
 * submit to mailing list

Alphadelta14 (2):
  archive: add --recurse-submodules to git-archive command
  archive: fix a case of submodule in submodule traversal

 archive-tar.c                 | 14 +++--
 archive-zip.c                 | 14 ++---
 archive.c                     | 99 ++++++++++++++++++++++++-----------
 archive.h                     |  8 +--
 builtin/checkout.c            |  2 +-
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            | 10 ++--
 builtin/ls-tree.c             | 16 +++---
 list-objects.c                |  2 +-
 merge-recursive.c             |  2 +-
 revision.c                    |  4 +-
 sparse-index.c                |  2 +-
 t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++
 tree.c                        | 93 ++++++++++++++++++++++----------
 tree.h                        | 11 ++--
 wt-status.c                   |  2 +-
 16 files changed, 269 insertions(+), 96 deletions(-)
 create mode 100755 t/t5005-archive-submodules.sh


base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v2
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v2
Pull-Request: https://github.com/git/git/pull/1359

Range-diff vs v1:

 1:  41664a59029 = 1:  41664a59029 archive: add --recurse-submodules to git-archive command
 -:  ----------- > 2:  68f7830c6d9 archive: fix a case of submodule in submodule traversal

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v2 1/2] archive: add --recurse-submodules to git-archive command
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
@ 2022-10-13 11:35   ` Alphadelta14 via GitGitGadget
  2022-10-13 17:53     ` René Scharfe
  2022-10-13 11:36   ` [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal Alphadelta14 via GitGitGadget
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 48+ messages in thread
From: Alphadelta14 via GitGitGadget @ 2022-10-13 11:35 UTC (permalink / raw)
  To: git; +Cc: Heather Lapointe, Alphadelta14

From: Alphadelta14 <alpha@alphaservcomputing.solutions>

This makes it possible to include submodule contents in an archive command.

This required updating the general read_tree callbacks to support sub-repos
by not using the_repository global references where possible.

archive: update streaming to use target repo
archive: add test cases for git archive --recurse-submodules

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 archive-tar.c                 | 14 +++--
 archive-zip.c                 | 14 ++---
 archive.c                     | 99 ++++++++++++++++++++++++-----------
 archive.h                     |  8 +--
 builtin/checkout.c            |  2 +-
 builtin/log.c                 |  2 +-
 builtin/ls-files.c            | 10 ++--
 builtin/ls-tree.c             | 16 +++---
 list-objects.c                |  2 +-
 merge-recursive.c             |  2 +-
 revision.c                    |  4 +-
 sparse-index.c                |  2 +-
 t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++
 tree.c                        | 64 ++++++++++++++--------
 tree.h                        | 11 ++--
 wt-status.c                   |  2 +-
 16 files changed, 246 insertions(+), 90 deletions(-)
 create mode 100755 t/t5005-archive-submodules.sh

diff --git a/archive-tar.c b/archive-tar.c
index 3e4822b6840..331fc10bca9 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -18,6 +18,7 @@ static unsigned long offset;
 static int tar_umask = 002;
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args);
 
 /*
@@ -246,7 +247,8 @@ static void write_extended_header(struct archiver_args *args,
 	write_blocked(buffer, size);
 }
 
-static int write_tar_entry(struct archiver_args *args,
+static int write_tar_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -316,7 +318,7 @@ static int write_tar_entry(struct archiver_args *args,
 		if (buffer)
 			write_blocked(buffer, size);
 		else
-			err = stream_blocked(args->repo, oid);
+			err = stream_blocked(repo, oid);
 	}
 	return err;
 }
@@ -422,12 +424,13 @@ static int git_tar_config(const char *var, const char *value, void *cb)
 }
 
 static int write_tar_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err = 0;
 
 	write_global_extended_header(args);
-	err = write_archive_entries(args, write_tar_entry);
+	err = write_archive_entries(repo, args, write_tar_entry);
 	if (!err)
 		write_trailer();
 	return err;
@@ -462,6 +465,7 @@ static void tgz_write_block(const void *data)
 static const char internal_gzip_command[] = "git archive gzip";
 
 static int write_tar_filter_archive(const struct archiver *ar,
+				    struct repository *repo,
 				    struct archiver_args *args)
 {
 #if ZLIB_VERNUM >= 0x1221
@@ -484,7 +488,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		gzstream.next_out = outbuf;
 		gzstream.avail_out = sizeof(outbuf);
 
-		r = write_tar_archive(ar, args);
+		r = write_tar_archive(ar, repo, args);
 
 		tgz_deflate(Z_FINISH);
 		git_deflate_end(&gzstream);
@@ -506,7 +510,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		die_errno(_("unable to redirect descriptor"));
 	close(filter.in);
 
-	r = write_tar_archive(ar, args);
+	r = write_tar_archive(ar, repo, args);
 
 	close(1);
 	if (finish_command(&filter) != 0)
diff --git a/archive-zip.c b/archive-zip.c
index 0456f1ebf15..6b27d004b5d 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -283,7 +283,8 @@ static int entry_is_binary(struct index_state *istate, const char *path,
 
 #define STREAM_BUFFER_SIZE (1024 * 16)
 
-static int write_zip_entry(struct archiver_args *args,
+static int write_zip_entry(struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -340,7 +341,7 @@ static int write_zip_entry(struct archiver_args *args,
 
 		if (!buffer) {
 			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
+			stream = open_istream(repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
@@ -349,7 +350,7 @@ static int write_zip_entry(struct archiver_args *args,
 			out = NULL;
 		} else {
 			crc = crc32(crc, buffer, size);
-			is_binary = entry_is_binary(args->repo->index,
+			is_binary = entry_is_binary(repo->index,
 						    path_without_prefix,
 						    buffer, size);
 			out = buffer;
@@ -426,7 +427,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
@@ -459,7 +460,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 
@@ -619,6 +620,7 @@ static int archive_zip_config(const char *var, const char *value,
 }
 
 static int write_zip_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err;
@@ -629,7 +631,7 @@ static int write_zip_archive(const struct archiver *ar UNUSED,
 
 	strbuf_init(&zip_dir, 0);
 
-	err = write_archive_entries(args, write_zip_entry);
+	err = write_archive_entries(repo, args, write_zip_entry);
 	if (!err)
 		write_zip_trailer(args->commit_oid);
 
diff --git a/archive.c b/archive.c
index 61a79e4a227..2ee7205679d 100644
--- a/archive.c
+++ b/archive.c
@@ -10,6 +10,7 @@
 #include "unpack-trees.h"
 #include "dir.h"
 #include "quote.h"
+#include "submodule.h"
 
 static char const * const archive_usage[] = {
 	N_("git archive [<options>] <tree-ish> [<path>...]"),
@@ -69,6 +70,7 @@ static void format_subst(const struct commit *commit,
 }
 
 static void *object_file_to_archive(const struct archiver_args *args,
+				    struct repository *repo,
 				    const char *path,
 				    const struct object_id *oid,
 				    unsigned int mode,
@@ -84,13 +86,13 @@ static void *object_file_to_archive(const struct archiver_args *args,
 			       (args->tree ? &args->tree->object.oid : NULL), oid);
 
 	path += args->baselen;
-	buffer = read_object_file(oid, type, sizep);
+	buffer = repo_read_object_file(repo, oid, type, sizep);
 	if (buffer && S_ISREG(mode)) {
 		struct strbuf buf = STRBUF_INIT;
 		size_t size = 0;
 
 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
-		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
 		if (commit)
 			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
 		buffer = strbuf_detach(&buf, &size);
@@ -134,7 +136,7 @@ static int check_attr_export_subst(const struct attr_check *check)
 	return check && ATTR_TRUE(check->items[1].value);
 }
 
-static int write_archive_entry(const struct object_id *oid, const char *base,
+static int write_archive_entry(struct repository *repo, const struct object_id *oid, const char *base,
 		int baselen, const char *filename, unsigned mode,
 		void *context)
 {
@@ -160,7 +162,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	if (!S_ISDIR(mode)) {
 		const struct attr_check *check;
-		check = get_archive_attrs(args->repo->index, path_without_prefix);
+		check = get_archive_attrs(repo->index, path_without_prefix);
 		if (check_attr_export_ignore(check))
 			return 0;
 		args->convert = check_attr_export_subst(check);
@@ -169,10 +171,10 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
+		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
-		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
+		return READ_TREE_RECURSIVE;
 	}
 
 	if (args->verbose)
@@ -180,14 +182,19 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	/* Stream it? */
 	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
-	    size > big_file_threshold)
-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
+	    size > big_file_threshold) {
+			err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
+			if (err) {
+				die("Failed to write file %.*s", (int)path.len, path.buf);
+			}
+			return err;
+		}
 
-	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	buffer = object_file_to_archive(args, repo, path.buf, oid, mode, &type, &size);
 	if (!buffer)
 		return error(_("cannot read '%s'"), oid_to_hex(oid));
-	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
 	free(buffer);
 	return err;
 }
@@ -207,7 +214,25 @@ static void queue_directory(const struct object_id *oid,
 	oidcpy(&d->oid, oid);
 }
 
-static int write_directory(struct archiver_context *c)
+static void queue_submodule(struct repository *superproject,
+		const struct object_id *oid,
+		struct strbuf *base, const char *filename,
+		unsigned mode, struct archiver_context *c)
+{
+	struct repository subrepo;
+
+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
+		return;
+
+	if (repo_read_index(&subrepo) < 0)
+		die("index file corrupt");
+
+    queue_directory(oid, base, filename, mode, c);
+
+	repo_clear(&subrepo);
+}
+
+static int write_directory(struct repository *repo, struct archiver_context *c)
 {
 	struct directory *d = c->bottom;
 	int ret;
@@ -217,15 +242,18 @@ static int write_directory(struct archiver_context *c)
 	c->bottom = d->up;
 	d->path[d->len - 1] = '\0'; /* no trailing slash */
 	ret =
-		write_directory(c) ||
-		write_archive_entry(&d->oid, d->path, d->baselen,
+		write_directory(repo, c) ||
+		write_archive_entry(repo, &d->oid, d->path, d->baselen,
 				    d->path + d->baselen, d->mode,
-				    c) != READ_TREE_RECURSIVE;
+				    c);
 	free(d);
-	return ret ? -1 : 0;
+	if (ret == READ_TREE_RECURSIVE)
+		return 0;
+	return ret;
 }
 
-static int queue_or_write_archive_entry(const struct object_id *oid,
+static int queue_or_write_archive_entry(
+		struct repository *repo, const struct object_id *oid,
 		struct strbuf *base, const char *filename,
 		unsigned mode, void *context)
 {
@@ -246,18 +274,23 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
 		/* Borrow base, but restore its original value when done. */
 		strbuf_addstr(base, filename);
 		strbuf_addch(base, '/');
-		check = get_archive_attrs(c->args->repo->index, base->buf);
+		check = get_archive_attrs(repo->index, base->buf);
 		strbuf_setlen(base, baselen);
 
 		if (check_attr_export_ignore(check))
 			return 0;
 		queue_directory(oid, base, filename, mode, c);
 		return READ_TREE_RECURSIVE;
+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
+		if (is_submodule_active(repo, filename)) {
+			queue_submodule(repo, oid, base, filename, mode, c);
+			return READ_TREE_RECURSIVE;
+		}
 	}
 
-	if (write_directory(c))
+	if (write_directory(repo, c))
 		return -1;
-	return write_archive_entry(oid, base->buf, base->len, filename, mode,
+	return write_archive_entry(repo, oid, base->buf, base->len, filename, mode,
 				   context);
 }
 
@@ -267,7 +300,8 @@ struct extra_file_info {
 	void *content;
 };
 
-int write_archive_entries(struct archiver_args *args,
+int write_archive_entries(struct repository *repo,
+		struct archiver_args *args,
 		write_archive_entry_fn_t write_entry)
 {
 	struct archiver_context context;
@@ -288,7 +322,7 @@ int write_archive_entries(struct archiver_args *args,
 			len--;
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
-		err = write_entry(args, &args->tree->object.oid, args->base,
+		err = write_entry(repo, args, &args->tree->object.oid, args->base,
 				  len, 040777, NULL, 0);
 		if (err)
 			return err;
@@ -305,8 +339,8 @@ int write_archive_entries(struct archiver_args *args,
 		memset(&opts, 0, sizeof(opts));
 		opts.index_only = 1;
 		opts.head_idx = -1;
-		opts.src_index = args->repo->index;
-		opts.dst_index = args->repo->index;
+		opts.src_index = repo->index;
+		opts.dst_index = repo->index;
 		opts.fn = oneway_merge;
 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
 		if (unpack_trees(1, &t, &opts))
@@ -314,7 +348,7 @@ int write_archive_entries(struct archiver_args *args,
 		git_attr_set_direction(GIT_ATTR_INDEX);
 	}
 
-	err = read_tree(args->repo, args->tree,
+	err = read_tree(repo, args->tree,
 			&args->pathspec,
 			queue_or_write_archive_entry,
 			&context);
@@ -343,12 +377,12 @@ int write_archive_entries(struct archiver_args *args,
 			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
 				err = error_errno(_("cannot read '%s'"), path);
 			else
-				err = write_entry(args, &fake_oid, path_in_archive.buf,
+				err = write_entry(repo, args, &fake_oid, path_in_archive.buf,
 						  path_in_archive.len,
 						  canon_mode(info->stat.st_mode),
 						  content.buf, content.len);
 		} else {
-			err = write_entry(args, &fake_oid,
+			err = write_entry(repo, args, &fake_oid,
 					  path, strlen(path),
 					  canon_mode(info->stat.st_mode),
 					  info->content, info->stat.st_size);
@@ -382,7 +416,7 @@ struct path_exists_context {
 	struct archiver_args *args;
 };
 
-static int reject_entry(const struct object_id *oid UNUSED,
+static int reject_entry(struct repository *repo, const struct object_id *oid UNUSED,
 			struct strbuf *base,
 			const char *filename, unsigned mode,
 			void *context)
@@ -394,7 +428,7 @@ static int reject_entry(const struct object_id *oid UNUSED,
 		struct strbuf sb = STRBUF_INIT;
 		strbuf_addbuf(&sb, base);
 		strbuf_addstr(&sb, filename);
-		if (!match_pathspec(ctx->args->repo->index,
+		if (!match_pathspec(repo->index,
 				    &ctx->pathspec,
 				    sb.buf, sb.len, 0, NULL, 1))
 			ret = READ_TREE_RECURSIVE;
@@ -431,6 +465,7 @@ static void parse_pathspec_arg(const char **pathspec,
 		       PATHSPEC_PREFER_FULL,
 		       "", pathspec);
 	ar_args->pathspec.recursive = 1;
+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
 	if (pathspec) {
 		while (*pathspec) {
 			if (**pathspec && !path_exists(ar_args, *pathspec))
@@ -592,6 +627,7 @@ static int parse_archive_args(int argc, const char **argv,
 	int verbose = 0;
 	int i;
 	int list = 0;
+	int recurse_submodules = 0;
 	int worktree_attributes = 0;
 	struct option opts[] = {
 		OPT_GROUP(""),
@@ -606,6 +642,8 @@ static int parse_archive_args(int argc, const char **argv,
 		  add_file_cb, (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("include submodules in archive")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
 			N_("read .gitattributes in working directory")),
 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
@@ -670,6 +708,7 @@ static int parse_archive_args(int argc, const char **argv,
 	args->base = base;
 	args->baselen = strlen(base);
 	args->worktree_attributes = worktree_attributes;
+	args->recurse_submodules = recurse_submodules;
 
 	return argc;
 }
@@ -708,7 +747,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
 	parse_treeish_arg(argv, &args, prefix, remote);
 	parse_pathspec_arg(argv + 1, &args);
 
-	rc = ar->write_archive(ar, &args);
+	rc = ar->write_archive(ar, repo, &args);
 
 	string_list_clear_func(&args.extra_files, extra_file_info_clear);
 	free(args.refname);
diff --git a/archive.h b/archive.h
index 08bed3ed3af..91b318f9ad8 100644
--- a/archive.h
+++ b/archive.h
@@ -19,6 +19,7 @@ struct archiver_args {
 	timestamp_t time;
 	struct pathspec pathspec;
 	unsigned int verbose : 1;
+	unsigned int recurse_submodules : 1;
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
@@ -41,7 +42,7 @@ const char *archive_format_from_filename(const char *filename);
 #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
 struct archiver {
 	const char *name;
-	int (*write_archive)(const struct archiver *, struct archiver_args *);
+	int (*write_archive)(const struct archiver *, struct repository *repo, struct archiver_args *);
 	unsigned flags;
 	char *filter_command;
 };
@@ -51,12 +52,13 @@ void init_tar_archiver(void);
 void init_zip_archiver(void);
 void init_archivers(void);
 
-typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
+typedef int (*write_archive_entry_fn_t)(struct repository *repo,
+					struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
 					unsigned int mode,
 					void *buffer, unsigned long size);
 
-int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
+int write_archive_entries(struct repository *repo, struct archiver_args *args, write_archive_entry_fn_t write_entry);
 
 #endif	/* ARCHIVE_H */
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 2a132392fbe..1238774b245 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -124,7 +124,7 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
 
 }
 
-static int update_some(const struct object_id *oid, struct strbuf *base,
+static int update_some(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 		       const char *pathname, unsigned mode, void *context UNUSED)
 {
 	int len;
diff --git a/builtin/log.c b/builtin/log.c
index ee19dc5d450..da73bbef836 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -698,7 +698,7 @@ static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 	return 0;
 }
 
-static int show_tree_object(const struct object_id *oid UNUSED,
+static int show_tree_object(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			    struct strbuf *base UNUSED,
 			    const char *pathname, unsigned mode,
 			    void *context)
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 4cf8a236483..a08768dbd2a 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -509,7 +509,7 @@ static int get_common_prefix_len(const char *common_prefix)
 	return common_prefix_len;
 }
 
-static int read_one_entry_opt(struct index_state *istate,
+static int read_one_entry_opt(struct repository *repo UNUSED, struct index_state *istate,
 			      const struct object_id *oid,
 			      struct strbuf *base,
 			      const char *pathname,
@@ -533,12 +533,12 @@ static int read_one_entry_opt(struct index_state *istate,
 	return add_index_entry(istate, ce, opt);
 }
 
-static int read_one_entry(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode,
 				  ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
 }
@@ -547,12 +547,12 @@ static int read_one_entry(const struct object_id *oid, struct strbuf *base,
  * This is used when the caller knows there is no existing entries at
  * the stage that will conflict with the entry being added.
  */
-static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry_quick(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 				const char *pathname, unsigned mode,
 				void *context)
 {
 	struct index_state *istate = context;
-	return read_one_entry_opt(istate, oid, base, pathname,
+	return read_one_entry_opt(repo, istate, oid, base, pathname,
 				  mode, ADD_CACHE_JUST_APPEND);
 }
 
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index c3ea09281af..c8d6ff95ac0 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -141,7 +141,7 @@ static int show_recursive(const char *base, size_t baselen, const char *pathname
 	return 0;
 }
 
-static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
+static int show_tree_fmt(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			 const char *pathname, unsigned mode, void *context UNUSED)
 {
 	size_t baselen;
@@ -211,7 +211,7 @@ static void show_tree_common_default_long(struct strbuf *base,
 	strbuf_setlen(base, baselen);
 }
 
-static int show_tree_default(const struct object_id *oid, struct strbuf *base,
+static int show_tree_default(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			     const char *pathname, unsigned mode,
 			     void *context UNUSED)
 {
@@ -229,7 +229,7 @@ static int show_tree_default(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_long(const struct object_id *oid, struct strbuf *base,
+static int show_tree_long(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context UNUSED)
 {
@@ -244,7 +244,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 
 	if (data.type == OBJ_BLOB) {
 		unsigned long size;
-		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
+		if (oid_object_info(repo, data.oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
 			xsnprintf(size_text, sizeof(size_text),
@@ -254,12 +254,12 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 	}
 
 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
-	       find_unique_abbrev(data.oid, abbrev), size_text);
+	       repo_find_unique_abbrev(repo, data.oid, abbrev), size_text);
 	show_tree_common_default_long(base, pathname, data.base->len);
 	return recurse;
 }
 
-static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
+static int show_tree_name_only(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
 			       const char *pathname, unsigned mode,
 			       void *context UNUSED)
 {
@@ -280,7 +280,7 @@ static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_object(const struct object_id *oid, struct strbuf *base,
+static int show_tree_object(struct repository *repo, const struct object_id *oid, struct strbuf *base,
 			    const char *pathname, unsigned mode,
 			    void *context UNUSED)
 {
@@ -292,7 +292,7 @@ static int show_tree_object(const struct object_id *oid, struct strbuf *base,
 	if (early >= 0)
 		return early;
 
-	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
+	printf("%s%c", repo_find_unique_abbrev(repo, oid, abbrev), line_termination);
 	return recurse;
 }
 
diff --git a/list-objects.c b/list-objects.c
index 250d9de41cb..4f463ae32ad 100644
--- a/list-objects.c
+++ b/list-objects.c
@@ -185,7 +185,7 @@ static void process_tree(struct traversal_context *ctx,
 	    !revs->include_check_obj(&tree->object, revs->include_check_data))
 		return;
 
-	failed_parse = parse_tree_gently(tree, 1);
+	failed_parse = parse_tree_gently(revs->repo, tree, 1);
 	if (failed_parse) {
 		if (revs->ignore_missing_links)
 			return;
diff --git a/merge-recursive.c b/merge-recursive.c
index 4ddd3adea00..119e86090f9 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -456,7 +456,7 @@ static void unpack_trees_finish(struct merge_options *opt)
 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
 }
 
-static int save_files_dirs(const struct object_id *oid UNUSED,
+static int save_files_dirs(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
 			   struct strbuf *base, const char *path,
 			   unsigned int mode, void *context)
 {
diff --git a/revision.c b/revision.c
index 36e31942cee..39e023a19ea 100644
--- a/revision.c
+++ b/revision.c
@@ -74,7 +74,7 @@ static void mark_tree_contents_uninteresting(struct repository *r,
 	struct tree_desc desc;
 	struct name_entry entry;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
@@ -181,7 +181,7 @@ static void add_children_by_path(struct repository *r,
 	if (!tree)
 		return;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
diff --git a/sparse-index.c b/sparse-index.c
index e4a54ce1943..05ca8aec1fb 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -232,7 +232,7 @@ static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
 	add_name_hash(istate, ce);
 }
 
-static int add_path_to_index(const struct object_id *oid,
+static int add_path_to_index(struct repository *repo UNUSED, const struct object_id *oid,
 			     struct strbuf *base, const char *path,
 			     unsigned int mode, void *context)
 {
diff --git a/t/t5005-archive-submodules.sh b/t/t5005-archive-submodules.sh
new file mode 100755
index 00000000000..49d5ff74ad5
--- /dev/null
+++ b/t/t5005-archive-submodules.sh
@@ -0,0 +1,84 @@
+#!/bin/sh
+
+test_description='git archive --recurse-submodules test'
+
+. ./test-lib.sh
+. "$TEST_DIRECTORY"/lib-submodule-update.sh
+
+test_expect_success 'setup' '
+	create_lib_submodule_repo &&
+	git -C submodule_update_repo checkout valid_sub1 &&
+	git -C submodule_update_repo submodule update
+'
+
+check_tar() {
+	tarfile=$1.tar
+	listfile=$1.lst
+	dir=$1
+	dir_with_prefix=$dir/$2
+
+	test_expect_success ' extract tar archive' '
+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
+	'
+}
+
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success " validate extra file $path_in_archive" '
+		test -f $dir/$path_in_archive &&
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
+check_not_added() {
+	dir=$1
+	path_in_archive=$2
+
+	test_expect_success " validate unpresent file $path_in_archive" '
+		! test -f $dir/$path_in_archive &&
+		! test -d $dir/$path_in_archive
+	'
+}
+
+test_expect_success 'archive without recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update archive HEAD >b.tar
+'
+
+check_tar b
+check_added b submodule_update/file1 file1
+check_not_added b sub1/file1
+
+test_expect_success 'archive with recurse, non-init' '
+	reset_work_tree_to valid_sub1 &&
+	! git -C submodule_update archive --recurse-submodules HEAD >b2-err.tar
+'
+
+test_expect_success 'archive with recurse, init' '
+	reset_work_tree_to valid_sub1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b2.tar
+'
+
+check_tar b2
+check_added b2 submodule_update/sub1/file1 sub1/file1
+
+test_expect_success 'archive with recurse with big files' '
+	reset_work_tree_to valid_sub1 &&
+	test_config core.bigfilethreshold 1 &&
+	git -C submodule_update submodule update --init &&
+	git -C submodule_update ls-files --recurse-submodules &&
+	git -C submodule_update ls-tree HEAD &&
+	git -C submodule_update archive --recurse-submodules HEAD >b3.tar
+'
+
+check_tar b3
+check_added b3 submodule_update/sub1/file1 sub1/file1
+
+
+test_done
diff --git a/tree.c b/tree.c
index 410e3b477e5..c5b5a0ac08f 100644
--- a/tree.c
+++ b/tree.c
@@ -8,6 +8,7 @@
 #include "alloc.h"
 #include "tree-walk.h"
 #include "repository.h"
+#include "pathspec.h"
 
 const char *tree_type = "tree";
 
@@ -22,8 +23,8 @@ int read_tree_at(struct repository *r,
 	int len, oldlen = base->len;
 	enum interesting retval = entry_not_interesting;
 
-	if (parse_tree(tree))
-		return -1;
+	if (repo_parse_tree(r, tree))
+		die("Failed to parse tree");
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
 
@@ -37,7 +38,7 @@ int read_tree_at(struct repository *r,
 				continue;
 		}
 
-		switch (fn(&entry.oid, base,
+		switch (fn(r, &entry.oid, base,
 			   entry.path, entry.mode, context)) {
 		case 0:
 			continue;
@@ -47,36 +48,57 @@ int read_tree_at(struct repository *r,
 			return -1;
 		}
 
-		if (S_ISDIR(entry.mode))
+		if (S_ISDIR(entry.mode)) {
 			oidcpy(&oid, &entry.oid);
-		else if (S_ISGITLINK(entry.mode)) {
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(r, lookup_tree(r, &oid),
+						base, pathspec,
+						fn, context);
+			strbuf_setlen(base, oldlen);
+			if (retval)
+				return -1;
+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
 			struct commit *commit;
+			struct repository subrepo;
+			struct repository* subrepo_p = &subrepo;
+			struct tree* submodule_tree;
 
-			commit = lookup_commit(r, &entry.oid);
+			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
+				die("couldn't init submodule %s%s", base->buf, entry.path);
+
+			if (repo_read_index(subrepo_p) < 0)
+				die("index file corrupt");
+
+			commit = lookup_commit(subrepo_p, &entry.oid);
 			if (!commit)
 				die("Commit %s in submodule path %s%s not found",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			if (parse_commit(commit))
+			if (repo_parse_commit(subrepo_p, commit))
 				die("Invalid commit %s in submodule path %s%s",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			oidcpy(&oid, get_commit_tree_oid(commit));
+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
+
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
+						base, pathspec,
+						fn, context);
+			if (retval) {
+			    die("failed to read tree for %s%s", base->buf, entry.path);
+			    return -1;
+			}
+			strbuf_setlen(base, oldlen);
+			repo_clear(subrepo_p);
 		}
-		else
-			continue;
 
-		len = tree_entry_len(&entry);
-		strbuf_add(base, entry.path, len);
-		strbuf_addch(base, '/');
-		retval = read_tree_at(r, lookup_tree(r, &oid),
-				      base, pathspec,
-				      fn, context);
-		strbuf_setlen(base, oldlen);
-		if (retval)
-			return -1;
 	}
 	return 0;
 }
@@ -121,7 +143,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 	return 0;
 }
 
-int parse_tree_gently(struct tree *item, int quiet_on_missing)
+int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
 {
 	 enum object_type type;
 	 void *buffer;
@@ -129,7 +151,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing)
 
 	if (item->object.parsed)
 		return 0;
-	buffer = read_object_file(&item->object.oid, &type, &size);
+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
 	if (!buffer)
 		return quiet_on_missing ? -1 :
 			error("Could not read %s",
diff --git a/tree.h b/tree.h
index 6efff003e21..3dc8f151760 100644
--- a/tree.h
+++ b/tree.h
@@ -18,11 +18,14 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
 
 int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
 
-int parse_tree_gently(struct tree *tree, int quiet_on_missing);
-static inline int parse_tree(struct tree *tree)
+int parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
 {
-	return parse_tree_gently(tree, 0);
+	return parse_tree_gently(r, tree, 0);
 }
+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
+#endif
 void free_tree_buffer(struct tree *tree);
 
 /* Parses and returns the tree in the given ent, chasing tags and commits. */
@@ -31,7 +34,7 @@ struct tree *parse_tree_indirect(const struct object_id *oid);
 int cmp_cache_name_compare(const void *a_, const void *b_);
 
 #define READ_TREE_RECURSIVE 1
-typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
+typedef int (*read_tree_fn_t)(struct repository *, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
 
 int read_tree_at(struct repository *r,
 		 struct tree *tree, struct strbuf *base,
diff --git a/wt-status.c b/wt-status.c
index 5813174896c..fefe4c55e1b 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -665,7 +665,7 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	release_revisions(&rev);
 }
 
-static int add_file_to_list(const struct object_id *oid,
+static int add_file_to_list(struct repository *repo UNUSED, const struct object_id *oid,
 			    struct strbuf *base, const char *path,
 			    unsigned int mode, void *context)
 {
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
  2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
@ 2022-10-13 11:36   ` Alphadelta14 via GitGitGadget
  2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
  3 siblings, 0 replies; 48+ messages in thread
From: Alphadelta14 via GitGitGadget @ 2022-10-13 11:36 UTC (permalink / raw)
  To: git; +Cc: Heather Lapointe, Alphadelta14

From: Alphadelta14 <alpha@alphaservcomputing.solutions>

repo_submodule_init actually expects the path relative to submodule_prefix.
We preform a simple strip to the correct path.

Signed-off-by: Alphadelta14 <alpha@alphaservcomputing.solutions>
---
 tree.c | 35 ++++++++++++++++++++++++++---------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/tree.c b/tree.c
index c5b5a0ac08f..c327d210f47 100644
--- a/tree.c
+++ b/tree.c
@@ -64,35 +64,52 @@ int read_tree_at(struct repository *r,
 			struct repository subrepo;
 			struct repository* subrepo_p = &subrepo;
 			struct tree* submodule_tree;
+			char *submodule_rel_path;
+			int name_base_len = 0;
 
-			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
-				die("couldn't init submodule %s%s", base->buf, entry.path);
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			submodule_rel_path = base->buf;
+			// repo_submodule_init expects a path relative to submodule_prefix
+			if (r->submodule_prefix) {
+				name_base_len = strlen(r->submodule_prefix);
+				// we should always expect to start with submodule_prefix
+				assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len));
+				// strip the prefix
+				submodule_rel_path += name_base_len;
+				// if submodule_prefix doesn't end with a /, we want to get rid of that too
+				if (is_dir_sep(submodule_rel_path[0])) {
+					submodule_rel_path++;
+				}
+			}
+
+			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
+				die("couldn't init submodule %s", base->buf);
 
 			if (repo_read_index(subrepo_p) < 0)
 				die("index file corrupt");
 
 			commit = lookup_commit(subrepo_p, &entry.oid);
 			if (!commit)
-				die("Commit %s in submodule path %s%s not found",
+				die("Commit %s in submodule path %s not found",
 				    oid_to_hex(&entry.oid),
-				    base->buf, entry.path);
+				    base->buf);
 
 			if (repo_parse_commit(subrepo_p, commit))
-				die("Invalid commit %s in submodule path %s%s",
+				die("Invalid commit %s in submodule path %s",
 				    oid_to_hex(&entry.oid),
-				    base->buf, entry.path);
+				    base->buf);
 
 			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
 			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
 
-			len = tree_entry_len(&entry);
-			strbuf_add(base, entry.path, len);
 			strbuf_addch(base, '/');
+
 			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
 						base, pathspec,
 						fn, context);
 			if (retval) {
-			    die("failed to read tree for %s%s", base->buf, entry.path);
+			    die("failed to read tree for %s", base->buf);
 			    return -1;
 			}
 			strbuf_setlen(base, oldlen);
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
  2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
  2022-10-13 11:36   ` [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal Alphadelta14 via GitGitGadget
@ 2022-10-13 17:53   ` René Scharfe
  2022-10-13 21:23     ` Heather Lapointe
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
  3 siblings, 1 reply; 48+ messages in thread
From: René Scharfe @ 2022-10-13 17:53 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git; +Cc: Heather Lapointe

Am 13.10.22 um 13:35 schrieb Heather Lapointe via GitGitGadget:
> This makes it possible to include submodule contents in an archive command.

Great!

> The inspiration for this change comes from this Github thread,
> https://github.com/dear-github/dear-github/issues/214, with at least 160
> 👍🏻 's at the time of writing. (I stumbled upon it because I wanted it as
> well).
>
> I figured the underlying implementation wouldn't be too difficult with most
> of the plumbing already in place, so I decided to add the relevant logic to
> the client git-archive command.
>
> One of the trickier parts of this implementation involved teaching read_tree
> about submodules. Some of the troublesome areas were still using the
> the_repository references to look up commit or tree or oid information. I
> ended up deciding that read_tree_fn_t would probably be best off having a
> concrete repo reference since it allows changing the context to a subrepo
> where needed (even though some of the usages did not need it specifically).
>
> I am open to feedback since this is all quite new to me :)
>
> TODO:

This list confuses me:

>  * working implementation

What exactly is not working, yet?

>  * valgrind

What's up with it?  Does is report errors or leaks?

>  * add regression tests

This series adds a new test script.  Do you plan to add more checks?

>  * update documentation with new flag

That I can understand: Indeed Documentation/git-archive.txt would need
an update.

>  * submit to mailing list

But you already did submit two iterations of this series to the Git
mailing list!?

>
> Alphadelta14 (2):
>   archive: add --recurse-submodules to git-archive command
>   archive: fix a case of submodule in submodule traversal

We prefer to keep known bugs out of the repo.  It helps when bisecting,
for example.  So it would be better to squash the fix into the patch
that adds the feature.  But...

>  archive-tar.c                 | 14 +++--
>  archive-zip.c                 | 14 ++---
>  archive.c                     | 99 ++++++++++++++++++++++++-----------
>  archive.h                     |  8 +--
>  builtin/checkout.c            |  2 +-
>  builtin/log.c                 |  2 +-
>  builtin/ls-files.c            | 10 ++--
>  builtin/ls-tree.c             | 16 +++---
>  list-objects.c                |  2 +-
>  merge-recursive.c             |  2 +-
>  revision.c                    |  4 +-
>  sparse-index.c                |  2 +-
>  t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++
>  tree.c                        | 93 ++++++++++++++++++++++----------
>  tree.h                        | 11 ++--
>  wt-status.c                   |  2 +-
>  16 files changed, 269 insertions(+), 96 deletions(-)
>  create mode 100755 t/t5005-archive-submodules.sh

... this is all a bit much for a single patch, I feel.  Giving
parse_tree_gently() a repo parameter, adding repo_parse_tree(), using
it in read_tree_at(), adding a repo parameter to read_tree_fn_t,
letting read_tree_at() recurse into submodules and adding the new
option to git archive all seem like topics worth their own patch and
rationale.

You probably have all of that in your head right now, but at least my
attention span and working memory capacity requires smaller morsels.

>
>
> base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v2
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v2
> Pull-Request: https://github.com/git/git/pull/1359
>
> Range-diff vs v1:
>
>  1:  41664a59029 = 1:  41664a59029 archive: add --recurse-submodules to git-archive command
>  -:  ----------- > 2:  68f7830c6d9 archive: fix a case of submodule in submodule traversal
>


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/2] archive: add --recurse-submodules to git-archive command
  2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
@ 2022-10-13 17:53     ` René Scharfe
  2022-10-13 21:37       ` Heather Lapointe
  0 siblings, 1 reply; 48+ messages in thread
From: René Scharfe @ 2022-10-13 17:53 UTC (permalink / raw)
  To: Alphadelta14 via GitGitGadget, git; +Cc: Heather Lapointe

Am 13.10.22 um 13:35 schrieb Alphadelta14 via GitGitGadget:
> From: Alphadelta14 <alpha@alphaservcomputing.solutions>
>

> diff --git a/tree.c b/tree.c
> index 410e3b477e5..c5b5a0ac08f 100644
> --- a/tree.c
> +++ b/tree.c
> @@ -8,6 +8,7 @@
>  #include "alloc.h"
>  #include "tree-walk.h"
>  #include "repository.h"
> +#include "pathspec.h"
>
>  const char *tree_type = "tree";
>
> @@ -22,8 +23,8 @@ int read_tree_at(struct repository *r,
>  	int len, oldlen = base->len;
>  	enum interesting retval = entry_not_interesting;
>
> -	if (parse_tree(tree))
> -		return -1;
> +	if (repo_parse_tree(r, tree))
> +		die("Failed to parse tree");
>
>  	init_tree_desc(&desc, tree->buffer, tree->size);
>
> @@ -37,7 +38,7 @@ int read_tree_at(struct repository *r,
>  				continue;
>  		}
>
> -		switch (fn(&entry.oid, base,
> +		switch (fn(r, &entry.oid, base,
>  			   entry.path, entry.mode, context)) {
>  		case 0:
>  			continue;
> @@ -47,36 +48,57 @@ int read_tree_at(struct repository *r,
>  			return -1;
>  		}
>
> -		if (S_ISDIR(entry.mode))
> +		if (S_ISDIR(entry.mode)) {
>  			oidcpy(&oid, &entry.oid);
> -		else if (S_ISGITLINK(entry.mode)) {

So you remove the non-recursive handling of submodules here...

> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			strbuf_addch(base, '/');
> +			retval = read_tree_at(r, lookup_tree(r, &oid),
> +						base, pathspec,
> +						fn, context);
> +			strbuf_setlen(base, oldlen);
> +			if (retval)
> +				return -1;
> +		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {

... and add recursive handling here, and there is no further else
branch.  Why do we no longer need the non-recursive variant?

>  			struct commit *commit;
> +			struct repository subrepo;
> +			struct repository* subrepo_p = &subrepo;
> +			struct tree* submodule_tree;
>
> -			commit = lookup_commit(r, &entry.oid);
> +			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
> +				die("couldn't init submodule %s%s", base->buf, entry.path);
> +
> +			if (repo_read_index(subrepo_p) < 0)
> +				die("index file corrupt");
> +
> +			commit = lookup_commit(subrepo_p, &entry.oid);
>  			if (!commit)
>  				die("Commit %s in submodule path %s%s not found",
>  				    oid_to_hex(&entry.oid),
>  				    base->buf, entry.path);
>
> -			if (parse_commit(commit))
> +			if (repo_parse_commit(subrepo_p, commit))
>  				die("Invalid commit %s in submodule path %s%s",
>  				    oid_to_hex(&entry.oid),
>  				    base->buf, entry.path);
>
> -			oidcpy(&oid, get_commit_tree_oid(commit));
> +			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
> +			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
> +
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			strbuf_addch(base, '/');
> +			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
> +						base, pathspec,
> +						fn, context);
> +			if (retval) {
> +			    die("failed to read tree for %s%s", base->buf, entry.path);
> +			    return -1;
> +			}
> +			strbuf_setlen(base, oldlen);
> +			repo_clear(subrepo_p);
>  		}
> -		else
> -			continue;
>
> -		len = tree_entry_len(&entry);
> -		strbuf_add(base, entry.path, len);
> -		strbuf_addch(base, '/');
> -		retval = read_tree_at(r, lookup_tree(r, &oid),
> -				      base, pathspec,
> -				      fn, context);
> -		strbuf_setlen(base, oldlen);
> -		if (retval)
> -			return -1;
>  	}
>  	return 0;
>  }
> @@ -121,7 +143,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
>  	return 0;
>  }
>
> -int parse_tree_gently(struct tree *item, int quiet_on_missing)
> +int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
>  {
>  	 enum object_type type;
>  	 void *buffer;
> @@ -129,7 +151,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing)
>
>  	if (item->object.parsed)
>  		return 0;
> -	buffer = read_object_file(&item->object.oid, &type, &size);
> +	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
>  	if (!buffer)
>  		return quiet_on_missing ? -1 :
>  			error("Could not read %s",


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command
  2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
@ 2022-10-13 21:23     ` Heather Lapointe
  2022-10-14  9:47       ` René Scharfe
  0 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe @ 2022-10-13 21:23 UTC (permalink / raw)
  To: "René Scharfe"; +Cc: heather lapointe via gitgitgadget, git

---- On Thu, 13 Oct 2022 10:53:44 -0700 René Scharfe  wrote ---

 > > I am open to feedback since this is all quite new to me :) 
 > > 
 > > TODO: 
 >  
 > This list confuses me: 

I apologize. I'm new to this repo and workflow.
I had been using checkboxes in github, which look like `- [x]` for ones that I have completed.
They all got converted into items that look like they needed doing via GitGitGadget.

The only remaining one was to update documentation.

 >  
 > > 
 > > Alphadelta14 (2): 
 > >   archive: add --recurse-submodules to git-archive command 
 > >   archive: fix a case of submodule in submodule traversal 
 >  
 > We prefer to keep known bugs out of the repo.  It helps when bisecting, 
 > for example.  So it would be better to squash the fix into the patch 
 > that adds the feature.  But... 

Absolutely can do.

 >  
 > >  archive-tar.c                 | 14 +++-- 
 > >  archive-zip.c                 | 14 ++--- 
 > >  archive.c                     | 99 ++++++++++++++++++++++++----------- 
 > >  archive.h                     |  8 +-- 
 > >  builtin/checkout.c            |  2 +- 
 > >  builtin/log.c                 |  2 +- 
 > >  builtin/ls-files.c            | 10 ++-- 
 > >  builtin/ls-tree.c             | 16 +++--- 
 > >  list-objects.c                |  2 +- 
 > >  merge-recursive.c             |  2 +- 
 > >  revision.c                    |  4 +- 
 > >  sparse-index.c                |  2 +- 
 > >  t/t5005-archive-submodules.sh | 84 +++++++++++++++++++++++++++++ 
 > >  tree.c                        | 93 ++++++++++++++++++++++---------- 
 > >  tree.h                        | 11 ++-- 
 > >  wt-status.c                   |  2 +- 
 > >  16 files changed, 269 insertions(+), 96 deletions(-) 
 > >  create mode 100755 t/t5005-archive-submodules.sh 
 >  
 > ... this is all a bit much for a single patch, I feel.  Giving 
 > parse_tree_gently() a repo parameter, adding repo_parse_tree(), using 
 > it in read_tree_at(), adding a repo parameter to read_tree_fn_t, 
 > letting read_tree_at() recurse into submodules and adding the new 
 > option to git archive all seem like topics worth their own patch and 
 > rationale. 
 > You probably have all of that in your head right now, but at least my 
 > attention span and working memory capacity requires smaller morsels. 
 
Does this mean I should create multiple PRs?
Or should they just be split up into individual commits.
I will work off assuming the latter.

I am comfortable rewriting history as long as I understand the direction to go in.

Should each individual patch be completely standalone?
(To the point where each set, with the previous patches should produce a working application?
Or is having the patch broken up by groups of changes, with some level of expecting the final
functionality good?)

But thanks so far. I will get working on this (and review your next set of messages).


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 1/2] archive: add --recurse-submodules to git-archive command
  2022-10-13 17:53     ` René Scharfe
@ 2022-10-13 21:37       ` Heather Lapointe
  0 siblings, 0 replies; 48+ messages in thread
From: Heather Lapointe @ 2022-10-13 21:37 UTC (permalink / raw)
  To: "René Scharfe"; +Cc: alphadelta14 via gitgitgadget, git


---- On Thu, 13 Oct 2022 10:53:46 -0700 René Scharfe  wrote ---

 > Am 13.10.22 um 13:35 schrieb Alphadelta14 via GitGitGadget: 
 > > From: Alphadelta14 alpha@alphaservcomputing.solutions> 
 > > 
 >  
 > > diff --git a/tree.c b/tree.c 
 > > index 410e3b477e5..c5b5a0ac08f 100644 
 > > --- a/tree.c 
 > > +++ b/tree.c 
 > > @@ -8,6 +8,7 @@ 
 > >  #include "alloc.h" 
 > >  #include "tree-walk.h" 
 > >  #include "repository.h" 
 > > +#include "pathspec.h" 
 > > 
 > >  const char *tree_type = "tree"; 
 > > 
 > > @@ -22,8 +23,8 @@ int read_tree_at(struct repository *r, 
 > >      int len, oldlen = base->len; 
 > >      enum interesting retval = entry_not_interesting; 
 > > 
 > > -    if (parse_tree(tree)) 
 > > -        return -1; 
 > > +    if (repo_parse_tree(r, tree)) 
 > > +        die("Failed to parse tree"); 
 > > 
 > >      init_tree_desc(&desc, tree->buffer, tree->size); 
 > > 
 > > @@ -37,7 +38,7 @@ int read_tree_at(struct repository *r, 
 > >                  continue; 
 > >          } 
 > > 
 > > -        switch (fn(&entry.oid, base, 
 > > +        switch (fn(r, &entry.oid, base, 
 > >                 entry.path, entry.mode, context)) { 
 > >          case 0: 
 > >              continue; 
 > > @@ -47,36 +48,57 @@ int read_tree_at(struct repository *r, 
 > >              return -1; 
 > >          } 
 > > 
 > > -        if (S_ISDIR(entry.mode)) 
 > > +        if (S_ISDIR(entry.mode)) { 
 > >              oidcpy(&oid, &entry.oid); 
 > > -        else if (S_ISGITLINK(entry.mode)) { 
 >  
 > So you remove the non-recursive handling of submodules here... 

The original logic looked like it had not been thoroughly used in the past years.
It was performing a commit lookup within the submodule using the superproject
repository, which would not actually work unless there was global state
being contextually pushed where `the_repository` was being replaced
with a submodule repository instance (it appeared to not be the case currently).

 >  
 > > +            len = tree_entry_len(&entry); 
 > > +            strbuf_add(base, entry.path, len); 
 > > +            strbuf_addch(base, '/'); 
 > > +            retval = read_tree_at(r, lookup_tree(r, &oid), 
 > > +                        base, pathspec, 
 > > +                        fn, context); 
 > > +            strbuf_setlen(base, oldlen); 
 > > +            if (retval) 
 > > +                return -1; 
 > > +        } else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) { 
 >  
 > ... and add recursive handling here, and there is no further else 
 > branch.  Why do we no longer need the non-recursive variant? 

The else case is that we either have submodules, but we don't have pathspec->recurse_submodules.
Both the dir case and submodule cases handle recursion
(but with enough difference that it didn't make sense to use the same logic).
The other else case we simply not recurse (for files).

 >  
 > >              struct commit *commit; 
 > > +            struct repository subrepo; 
 > > +            struct repository* subrepo_p = &subrepo; 
 > > +            struct tree* submodule_tree; 
 > > 
 > > -            commit = lookup_commit(r, &entry.oid); 
 > > +            if (repo_submodule_init(subrepo_p, r, entry.path, null_oid())) 
 > > +                die("couldn't init submodule %s%s", base->buf, entry.path); 
 > > + 
 > > +            if (repo_read_index(subrepo_p) < 0) 
 > > +                die("index file corrupt"); 
 > > + 
 > > +            commit = lookup_commit(subrepo_p, &entry.oid); 
 > >              if (!commit) 
 > >                  die("Commit %s in submodule path %s%s not found", 
 > >                      oid_to_hex(&entry.oid), 
 > >                      base->buf, entry.path); 
 > > 
 > > -            if (parse_commit(commit)) 
 > > +            if (repo_parse_commit(subrepo_p, commit)) 
 > >                  die("Invalid commit %s in submodule path %s%s", 
 > >                      oid_to_hex(&entry.oid), 
 > >                      base->buf, entry.path); 
 > > 
 > > -            oidcpy(&oid, get_commit_tree_oid(commit)); 
 > > +            submodule_tree = repo_get_commit_tree(subrepo_p, commit); 
 > > +            oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL); 
 > > + 
 > > +            len = tree_entry_len(&entry); 
 > > +            strbuf_add(base, entry.path, len); 
 > > +            strbuf_addch(base, '/'); 
 > > +            retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid), 
 > > +                        base, pathspec, 
 > > +                        fn, context); 
 > > +            if (retval) { 
 > > +                die("failed to read tree for %s%s", base->buf, entry.path); 
 > > +                return -1; 
 > > +            } 
 > > +            strbuf_setlen(base, oldlen); 
 > > +            repo_clear(subrepo_p); 
 > >          } 
 > > -        else 
 > > -            continue; 
 > > 
 > > -        len = tree_entry_len(&entry); 
 > > -        strbuf_add(base, entry.path, len); 
 > > -        strbuf_addch(base, '/'); 
 > > -        retval = read_tree_at(r, lookup_tree(r, &oid), 
 > > -                      base, pathspec, 
 > > -                      fn, context); 
 > > -        strbuf_setlen(base, oldlen); 
 > > -        if (retval) 
 > > -            return -1; 
 > >      } 
 > >      return 0; 
 > >  } 
 > > @@ -121,7 +143,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size) 
 > >      return 0; 
 > >  } 
 > > 
 > > -int parse_tree_gently(struct tree *item, int quiet_on_missing) 
 > > +int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing) 
 > >  { 
 > >       enum object_type type; 
 > >       void *buffer; 
 > > @@ -129,7 +151,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing) 
 > > 
 > >      if (item->object.parsed) 
 > >          return 0; 
 > > -    buffer = read_object_file(&item->object.oid, &type, &size); 
 > > +    buffer = repo_read_object_file(r, &item->object.oid, &type, &size); 
 > >      if (!buffer) 
 > >          return quiet_on_missing ? -1 : 
 > >              error("Could not read %s", 
 >  
 > 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command
  2022-10-13 21:23     ` Heather Lapointe
@ 2022-10-14  9:47       ` René Scharfe
  0 siblings, 0 replies; 48+ messages in thread
From: René Scharfe @ 2022-10-14  9:47 UTC (permalink / raw)
  To: Heather Lapointe; +Cc: heather lapointe via gitgitgadget, git

Am 13.10.22 um 23:23 schrieb Heather Lapointe:
> I had been using checkboxes in github, which look like `- [x]` for
> ones that I have completed. They all got converted into items that
> look like they needed doing via GitGitGadget.
>
> The only remaining one was to update documentation.

I opened https://github.com/html-to-text/node-html-to-text/issues/260 to
see if they are willing to support this kind of to-do lists better.

> Does this mean I should create multiple PRs? Or should they just be
> split up into individual commits. I will work off assuming the
> latter.

Right, I meant multiple commits.

> Should each individual patch be completely standalone? (To the point
> where each set, with the previous patches should produce a working
> application? Or is having the patch broken up by groups of changes,
> with some level of expecting the final functionality good?)

Yes, each commit should leave Git in a working state.  Adding new global
functions that are only used by later commits in the series is fine.

René

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
                     ` (2 preceding siblings ...)
  2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
@ 2022-10-17  2:23   ` Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
                       ` (11 more replies)
  3 siblings, 12 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe

This makes it possible to include submodule contents in an archive command.

The inspiration for this change comes from this Github thread,
https://github.com/dear-github/dear-github/issues/214, with at least 160
👍🏻 's at the time of writing. (I stumbled upon it because I wanted it as
well).

I figured the underlying implementation wouldn't be too difficult with most
of the plumbing already in place, so I decided to add the relevant logic to
the client git-archive command.

One of the trickier parts of this implementation involved teaching read_tree
about submodules. Some of the troublesome areas were still using the
the_repository references to look up commit or tree or oid information. I
ended up deciding that read_tree_fn_t would probably be best off having a
concrete repo reference since it allows changing the context to a subrepo
where needed (even though some of the usages did not need it specifically).

I am open to feedback since this is all quite new to me :)

Alphadelta14 (1):
  tree: do not use the_repository for tree traversal methods.

Heather Lapointe (8):
  tree: update cases to use repo_ tree methods
  tree: increase test coverage for tree.c
  tree: handle submodule case for read_tree_at properly
  tree: add repository parameter to read_tree_fn_t
  archive: pass repo objects to write_archive handlers
  archive: remove global repository from archive_args
  archive: add --recurse-submodules to git-archive command
  archive: add tests for git archive --recurse-submodules

 Documentation/git-archive.txt     |   6 +-
 Makefile                          |   1 +
 archive-tar.c                     |  15 ++--
 archive-zip.c                     |  15 ++--
 archive.c                         | 138 ++++++++++++++++++++----------
 archive.h                         |  16 +++-
 builtin/checkout.c                |   4 +-
 builtin/log.c                     |   4 +-
 builtin/ls-files.c                |   8 +-
 builtin/ls-tree.c                 |  34 +++++---
 merge-recursive.c                 |   4 +-
 merge.c                           |   4 +-
 reset.c                           |   2 +-
 revision.c                        |   4 +-
 sequencer.c                       |   6 +-
 sparse-index.c                    |   4 +-
 t/helper/test-tool.c              |   1 +
 t/helper/test-tool.h              |   1 +
 t/helper/test-tree-read-tree-at.c |  41 +++++++++
 t/t1023-tree-read-tree-at.sh      |  65 ++++++++++++++
 t/t5005-archive-submodules.sh     |  83 ++++++++++++++++++
 tree.c                            |  93 ++++++++++++++------
 tree.h                            |  16 ++--
 wt-status.c                       |   4 +-
 24 files changed, 448 insertions(+), 121 deletions(-)
 create mode 100644 t/helper/test-tree-read-tree-at.c
 create mode 100755 t/t1023-tree-read-tree-at.sh
 create mode 100755 t/t5005-archive-submodules.sh


base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v3
Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v3
Pull-Request: https://github.com/git/git/pull/1359

Range-diff vs v2:

  -:  ----------- >  1:  79959a54eb4 tree: do not use the_repository for tree traversal methods.
  -:  ----------- >  2:  2291e0f9b5c tree: update cases to use repo_ tree methods
  -:  ----------- >  3:  9a07c6932f4 tree: increase test coverage for tree.c
  2:  68f7830c6d9 !  4:  d3d1738e670 archive: fix a case of submodule in submodule traversal
     @@
       ## Metadata ##
     -Author: Alphadelta14 <alpha@alphaservcomputing.solutions>
     +Author: Heather Lapointe <alpha@alphaservcomputing.solutions>
      
       ## Commit message ##
     -    archive: fix a case of submodule in submodule traversal
     +    tree: handle submodule case for read_tree_at properly
      
     -    repo_submodule_init actually expects the path relative to submodule_prefix.
     -    We preform a simple strip to the correct path.
     +    This supports traversal into an actual submodule for read_tree_at.
     +    The logic is blocked on pathspec->recurse_submodules now,
     +    but previously hadn't been executed due to all fn() cases
     +    returning early for submodules.
      
     -    Signed-off-by: Alphadelta14 <alpha@alphaservcomputing.solutions>
     +    Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
      
       ## tree.c ##
     +@@
     + #include "alloc.h"
     + #include "tree-walk.h"
     + #include "repository.h"
     ++#include "pathspec.h"
     + 
     + const char *tree_type = "tree";
     + 
      @@ tree.c: int read_tree_at(struct repository *r,
     - 			struct repository subrepo;
     - 			struct repository* subrepo_p = &subrepo;
     - 			struct tree* submodule_tree;
     + 			return -1;
     + 		}
     + 
     +-		if (S_ISDIR(entry.mode))
     ++		if (S_ISDIR(entry.mode)) {
     + 			oidcpy(&oid, &entry.oid);
     +-		else if (S_ISGITLINK(entry.mode)) {
     +-			struct commit *commit;
     + 
     +-			commit = lookup_commit(r, &entry.oid);
     ++			len = tree_entry_len(&entry);
     ++			strbuf_add(base, entry.path, len);
     ++			strbuf_addch(base, '/');
     ++			retval = read_tree_at(r, lookup_tree(r, &oid),
     ++						base, pathspec,
     ++						fn, context);
     ++			strbuf_setlen(base, oldlen);
     ++			if (retval)
     ++				return -1;
     ++		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
     ++			struct commit *commit;
     ++			struct repository subrepo;
     ++			struct repository* subrepo_p = &subrepo;
     ++			struct tree* submodule_tree;
      +			char *submodule_rel_path;
      +			int name_base_len = 0;
     - 
     --			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
     --				die("couldn't init submodule %s%s", base->buf, entry.path);
     ++
      +			len = tree_entry_len(&entry);
      +			strbuf_add(base, entry.path, len);
      +			submodule_rel_path = base->buf;
     @@ tree.c: int read_tree_at(struct repository *r,
      +
      +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
      +				die("couldn't init submodule %s", base->buf);
     - 
     - 			if (repo_read_index(subrepo_p) < 0)
     - 				die("index file corrupt");
     - 
     - 			commit = lookup_commit(subrepo_p, &entry.oid);
     ++
     ++			if (repo_read_index(subrepo_p) < 0)
     ++				die("index file corrupt");
     ++
     ++			commit = lookup_commit(subrepo_p, &entry.oid);
       			if (!commit)
      -				die("Commit %s in submodule path %s%s not found",
      +				die("Commit %s in submodule path %s not found",
       				    oid_to_hex(&entry.oid),
      -				    base->buf, entry.path);
     -+				    base->buf);
     - 
     - 			if (repo_parse_commit(subrepo_p, commit))
     +-
     +-			// FIXME: This is the wrong repo instance (it refers to the superproject)
     +-			// it will always fail as is (will fix in later patch)
     +-			// This current codepath isn't executed by any existing callbacks
     +-			// so it wouldn't show up as an issue at this time.
     +-			if (repo_parse_commit(r, commit))
      -				die("Invalid commit %s in submodule path %s%s",
     ++				    base->buf);
     ++
     ++			if (repo_parse_commit(subrepo_p, commit))
      +				die("Invalid commit %s in submodule path %s",
       				    oid_to_hex(&entry.oid),
      -				    base->buf, entry.path);
      +				    base->buf);
       
     - 			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
     - 			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
     +-			oidcpy(&oid, get_commit_tree_oid(commit));
     +-		}
     +-		else
     +-			continue;
     ++			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
     ++			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
       
     --			len = tree_entry_len(&entry);
     --			strbuf_add(base, entry.path, len);
     - 			strbuf_addch(base, '/');
     +-		len = tree_entry_len(&entry);
     +-		strbuf_add(base, entry.path, len);
     +-		strbuf_addch(base, '/');
     +-		retval = read_tree_at(r, lookup_tree(r, &oid),
     +-				      base, pathspec,
     +-				      fn, context);
     +-		strbuf_setlen(base, oldlen);
     +-		if (retval)
     +-			return -1;
     ++			strbuf_addch(base, '/');
      +
     - 			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
     - 						base, pathspec,
     - 						fn, context);
     - 			if (retval) {
     --			    die("failed to read tree for %s%s", base->buf, entry.path);
     ++			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
     ++						base, pathspec,
     ++						fn, context);
     ++			if (retval)
      +			    die("failed to read tree for %s", base->buf);
     - 			    return -1;
     - 			}
     - 			strbuf_setlen(base, oldlen);
     ++			strbuf_setlen(base, oldlen);
     ++			repo_clear(subrepo_p);
     ++		}
     ++		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)
     + 	}
     + 	return 0;
     + }
  -:  ----------- >  5:  376345fdf66 tree: add repository parameter to read_tree_fn_t
  1:  41664a59029 !  6:  1b9b049d64f archive: add --recurse-submodules to git-archive command
     @@
       ## Metadata ##
     -Author: Alphadelta14 <alpha@alphaservcomputing.solutions>
     +Author: Heather Lapointe <alpha@alphaservcomputing.solutions>
      
       ## Commit message ##
     -    archive: add --recurse-submodules to git-archive command
     +    archive: pass repo objects to write_archive handlers
      
     -    This makes it possible to include submodule contents in an archive command.
     -
     -    This required updating the general read_tree callbacks to support sub-repos
     -    by not using the_repository global references where possible.
     -
     -    archive: update streaming to use target repo
     -    archive: add test cases for git archive --recurse-submodules
     +    Use contextual repos instead of the_repository or args->repo
     +    to ensure that submodules will be handled correctly
     +    since they use multiple repo instances.
      
          Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
      
     @@ archive-tar.c: static unsigned long offset;
       static int tar_umask = 002;
       
       static int write_tar_filter_archive(const struct archiver *ar,
     -+				    struct repository *repo,
     ++					struct repository *repo,
       				    struct archiver_args *args);
       
       /*
     @@ archive-tar.c: static void write_extended_header(struct archiver_args *args,
       }
       
      -static int write_tar_entry(struct archiver_args *args,
     -+static int write_tar_entry(struct repository *repo,
     ++static int write_tar_entry(
     ++			   struct repository *repo,
      +			   struct archiver_args *args,
       			   const struct object_id *oid,
       			   const char *path, size_t pathlen,
     @@ archive-tar.c: static void tgz_write_block(const void *data)
       static const char internal_gzip_command[] = "git archive gzip";
       
       static int write_tar_filter_archive(const struct archiver *ar,
     -+				    struct repository *repo,
     ++					struct repository *repo,
       				    struct archiver_args *args)
       {
       #if ZLIB_VERNUM >= 0x1221
     @@ archive-zip.c: static int entry_is_binary(struct index_state *istate, const char
       #define STREAM_BUFFER_SIZE (1024 * 16)
       
      -static int write_zip_entry(struct archiver_args *args,
     -+static int write_zip_entry(struct repository *repo,
     ++static int write_zip_entry(
     ++			   struct repository *repo,
      +			   struct archiver_args *args,
       			   const struct object_id *oid,
       			   const char *path, size_t pathlen,
     @@ archive-zip.c: static int archive_zip_config(const char *var, const char *value,
       }
       
       static int write_zip_archive(const struct archiver *ar UNUSED,
     -+			     struct repository *repo,
     ++				 struct repository *repo,
       			     struct archiver_args *args)
       {
       	int err;
     @@ archive-zip.c: static int write_zip_archive(const struct archiver *ar UNUSED,
       
      
       ## archive.c ##
     -@@
     - #include "unpack-trees.h"
     - #include "dir.h"
     - #include "quote.h"
     -+#include "submodule.h"
     - 
     - static char const * const archive_usage[] = {
     - 	N_("git archive [<options>] <tree-ish> [<path>...]"),
     -@@ archive.c: static void format_subst(const struct commit *commit,
     - }
     - 
     - static void *object_file_to_archive(const struct archiver_args *args,
     -+				    struct repository *repo,
     - 				    const char *path,
     - 				    const struct object_id *oid,
     - 				    unsigned int mode,
     -@@ archive.c: static void *object_file_to_archive(const struct archiver_args *args,
     - 			       (args->tree ? &args->tree->object.oid : NULL), oid);
     - 
     - 	path += args->baselen;
     --	buffer = read_object_file(oid, type, sizep);
     -+	buffer = repo_read_object_file(repo, oid, type, sizep);
     - 	if (buffer && S_ISREG(mode)) {
     - 		struct strbuf buf = STRBUF_INIT;
     - 		size_t size = 0;
     - 
     - 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
     --		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
     -+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
     - 		if (commit)
     - 			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
     - 		buffer = strbuf_detach(&buf, &size);
      @@ archive.c: static int check_attr_export_subst(const struct attr_check *check)
       	return check && ATTR_TRUE(check->items[1].value);
       }
       
      -static int write_archive_entry(const struct object_id *oid, const char *base,
     -+static int write_archive_entry(struct repository *repo, const struct object_id *oid, const char *base,
     ++static int write_archive_entry(
     ++		struct repository *repo,
     ++		const struct object_id *oid, const char *base,
       		int baselen, const char *filename, unsigned mode,
       		void *context)
       {
     @@ archive.c: static int write_archive_entry(const struct object_id *oid, const cha
      +		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
       		if (err)
       			return err;
     --		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
     -+		return READ_TREE_RECURSIVE;
     - 	}
     - 
     - 	if (args->verbose)
     + 		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
      @@ archive.c: static int write_archive_entry(const struct object_id *oid, const char *base,
       
       	/* Stream it? */
       	if (S_ISREG(mode) && !args->convert &&
      -	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
     --	    size > big_file_threshold)
     --		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
      +	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
     -+	    size > big_file_threshold) {
     -+			err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
     -+			if (err) {
     -+				die("Failed to write file %.*s", (int)path.len, path.buf);
     -+			}
     -+			return err;
     -+		}
     + 	    size > big_file_threshold)
     +-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
     ++		return write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
       
     --	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
     -+	buffer = object_file_to_archive(args, repo, path.buf, oid, mode, &type, &size);
     + 	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
       	if (!buffer)
       		return error(_("cannot read '%s'"), oid_to_hex(oid));
      -	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
     @@ archive.c: static void queue_directory(const struct object_id *oid,
       }
       
      -static int write_directory(struct archiver_context *c)
     -+static void queue_submodule(struct repository *superproject,
     -+		const struct object_id *oid,
     -+		struct strbuf *base, const char *filename,
     -+		unsigned mode, struct archiver_context *c)
     -+{
     -+	struct repository subrepo;
     -+
     -+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
     -+		return;
     -+
     -+	if (repo_read_index(&subrepo) < 0)
     -+		die("index file corrupt");
     -+
     -+    queue_directory(oid, base, filename, mode, c);
     -+
     -+	repo_clear(&subrepo);
     -+}
     -+
     -+static int write_directory(struct repository *repo, struct archiver_context *c)
     ++static int write_directory(
     ++		struct repository *repo,
     ++		struct archiver_context *c)
       {
       	struct directory *d = c->bottom;
       	int ret;
     @@ archive.c: static int write_directory(struct archiver_context *c)
      +		write_directory(repo, c) ||
      +		write_archive_entry(repo, &d->oid, d->path, d->baselen,
       				    d->path + d->baselen, d->mode,
     --				    c) != READ_TREE_RECURSIVE;
     -+				    c);
     + 				    c) != READ_TREE_RECURSIVE;
       	free(d);
     --	return ret ? -1 : 0;
     -+	if (ret == READ_TREE_RECURSIVE)
     -+		return 0;
     -+	return ret;
     - }
     - 
     --static int queue_or_write_archive_entry(const struct object_id *oid,
     -+static int queue_or_write_archive_entry(
     -+		struct repository *repo, const struct object_id *oid,
     - 		struct strbuf *base, const char *filename,
     - 		unsigned mode, void *context)
     - {
     -@@ archive.c: static int queue_or_write_archive_entry(const struct object_id *oid,
     - 		/* Borrow base, but restore its original value when done. */
     - 		strbuf_addstr(base, filename);
     - 		strbuf_addch(base, '/');
     --		check = get_archive_attrs(c->args->repo->index, base->buf);
     -+		check = get_archive_attrs(repo->index, base->buf);
     - 		strbuf_setlen(base, baselen);
     - 
     - 		if (check_attr_export_ignore(check))
     - 			return 0;
     - 		queue_directory(oid, base, filename, mode, c);
     +@@ archive.c: static int queue_or_write_archive_entry(
       		return READ_TREE_RECURSIVE;
     -+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
     -+		if (is_submodule_active(repo, filename)) {
     -+			queue_submodule(repo, oid, base, filename, mode, c);
     -+			return READ_TREE_RECURSIVE;
     -+		}
       	}
       
      -	if (write_directory(c))
     -+	if (write_directory(repo, c))
     ++	if (write_directory(r, c))
       		return -1;
      -	return write_archive_entry(oid, base->buf, base->len, filename, mode,
     -+	return write_archive_entry(repo, oid, base->buf, base->len, filename, mode,
     ++	return write_archive_entry(r, oid, base->buf, base->len, filename, mode,
       				   context);
       }
       
     @@ archive.c: struct extra_file_info {
       };
       
      -int write_archive_entries(struct archiver_args *args,
     -+int write_archive_entries(struct repository *repo,
     ++int write_archive_entries(
     ++		struct repository *repo,
      +		struct archiver_args *args,
       		write_archive_entry_fn_t write_entry)
       {
     @@ archive.c: int write_archive_entries(struct archiver_args *args,
       				  len, 040777, NULL, 0);
       		if (err)
       			return err;
     -@@ archive.c: int write_archive_entries(struct archiver_args *args,
     - 		memset(&opts, 0, sizeof(opts));
     - 		opts.index_only = 1;
     - 		opts.head_idx = -1;
     --		opts.src_index = args->repo->index;
     --		opts.dst_index = args->repo->index;
     -+		opts.src_index = repo->index;
     -+		opts.dst_index = repo->index;
     - 		opts.fn = oneway_merge;
     - 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
     - 		if (unpack_trees(1, &t, &opts))
     -@@ archive.c: int write_archive_entries(struct archiver_args *args,
     - 		git_attr_set_direction(GIT_ATTR_INDEX);
     - 	}
     - 
     --	err = read_tree(args->repo, args->tree,
     -+	err = read_tree(repo, args->tree,
     - 			&args->pathspec,
     - 			queue_or_write_archive_entry,
     - 			&context);
      @@ archive.c: int write_archive_entries(struct archiver_args *args,
       			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
       				err = error_errno(_("cannot read '%s'"), path);
     @@ archive.c: int write_archive_entries(struct archiver_args *args,
       					  path, strlen(path),
       					  canon_mode(info->stat.st_mode),
       					  info->content, info->stat.st_size);
     -@@ archive.c: struct path_exists_context {
     - 	struct archiver_args *args;
     - };
     - 
     --static int reject_entry(const struct object_id *oid UNUSED,
     -+static int reject_entry(struct repository *repo, const struct object_id *oid UNUSED,
     - 			struct strbuf *base,
     - 			const char *filename, unsigned mode,
     - 			void *context)
     -@@ archive.c: static int reject_entry(const struct object_id *oid UNUSED,
     - 		struct strbuf sb = STRBUF_INIT;
     - 		strbuf_addbuf(&sb, base);
     - 		strbuf_addstr(&sb, filename);
     --		if (!match_pathspec(ctx->args->repo->index,
     -+		if (!match_pathspec(repo->index,
     - 				    &ctx->pathspec,
     - 				    sb.buf, sb.len, 0, NULL, 1))
     - 			ret = READ_TREE_RECURSIVE;
     -@@ archive.c: static void parse_pathspec_arg(const char **pathspec,
     - 		       PATHSPEC_PREFER_FULL,
     - 		       "", pathspec);
     - 	ar_args->pathspec.recursive = 1;
     -+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
     - 	if (pathspec) {
     - 		while (*pathspec) {
     - 			if (**pathspec && !path_exists(ar_args, *pathspec))
     -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
     - 	int verbose = 0;
     - 	int i;
     - 	int list = 0;
     -+	int recurse_submodules = 0;
     - 	int worktree_attributes = 0;
     - 	struct option opts[] = {
     - 		OPT_GROUP(""),
     -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
     - 		  add_file_cb, (intptr_t)&base },
     - 		OPT_STRING('o', "output", &output, N_("file"),
     - 			N_("write the archive to this file")),
     -+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
     -+			N_("include submodules in archive")),
     - 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
     - 			N_("read .gitattributes in working directory")),
     - 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
     -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
     - 	args->base = base;
     - 	args->baselen = strlen(base);
     - 	args->worktree_attributes = worktree_attributes;
     -+	args->recurse_submodules = recurse_submodules;
     - 
     - 	return argc;
     - }
      @@ archive.c: int write_archive(int argc, const char **argv, const char *prefix,
       	parse_treeish_arg(argv, &args, prefix, remote);
       	parse_pathspec_arg(argv + 1, &args);
     @@ archive.c: int write_archive(int argc, const char **argv, const char *prefix,
       	free(args.refname);
      
       ## archive.h ##
     -@@ archive.h: struct archiver_args {
     - 	timestamp_t time;
     - 	struct pathspec pathspec;
     - 	unsigned int verbose : 1;
     -+	unsigned int recurse_submodules : 1;
     - 	unsigned int worktree_attributes : 1;
     - 	unsigned int convert : 1;
     - 	int compression_level;
      @@ archive.h: const char *archive_format_from_filename(const char *filename);
       #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
       struct archiver {
       	const char *name;
      -	int (*write_archive)(const struct archiver *, struct archiver_args *);
     -+	int (*write_archive)(const struct archiver *, struct repository *repo, struct archiver_args *);
     ++	int (*write_archive)(
     ++		const struct archiver *,
     ++		struct repository *,
     ++		struct archiver_args *);
       	unsigned flags;
       	char *filter_command;
       };
     @@ archive.h: void init_tar_archiver(void);
       void init_archivers(void);
       
      -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
     -+typedef int (*write_archive_entry_fn_t)(struct repository *repo,
     ++typedef int (*write_archive_entry_fn_t)(
     ++					struct repository *repo,
      +					struct archiver_args *args,
       					const struct object_id *oid,
       					const char *path, size_t pathlen,
     @@ archive.h: void init_tar_archiver(void);
       					void *buffer, unsigned long size);
       
      -int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
     -+int write_archive_entries(struct repository *repo, struct archiver_args *args, write_archive_entry_fn_t write_entry);
     ++int write_archive_entries(
     ++	struct repository *repo,
     ++	struct archiver_args *args,
     ++	write_archive_entry_fn_t write_entry);
       
       #endif	/* ARCHIVE_H */
     -
     - ## builtin/checkout.c ##
     -@@ builtin/checkout.c: static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
     - 
     - }
     - 
     --static int update_some(const struct object_id *oid, struct strbuf *base,
     -+static int update_some(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
     - 		       const char *pathname, unsigned mode, void *context UNUSED)
     - {
     - 	int len;
     -
     - ## builtin/log.c ##
     -@@ builtin/log.c: static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
     - 	return 0;
     - }
     - 
     --static int show_tree_object(const struct object_id *oid UNUSED,
     -+static int show_tree_object(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
     - 			    struct strbuf *base UNUSED,
     - 			    const char *pathname, unsigned mode,
     - 			    void *context)
     -
     - ## builtin/ls-files.c ##
     -@@ builtin/ls-files.c: static int get_common_prefix_len(const char *common_prefix)
     - 	return common_prefix_len;
     - }
     - 
     --static int read_one_entry_opt(struct index_state *istate,
     -+static int read_one_entry_opt(struct repository *repo UNUSED, struct index_state *istate,
     - 			      const struct object_id *oid,
     - 			      struct strbuf *base,
     - 			      const char *pathname,
     -@@ builtin/ls-files.c: static int read_one_entry_opt(struct index_state *istate,
     - 	return add_index_entry(istate, ce, opt);
     - }
     - 
     --static int read_one_entry(const struct object_id *oid, struct strbuf *base,
     -+static int read_one_entry(struct repository *repo, const struct object_id *oid, struct strbuf *base,
     - 			  const char *pathname, unsigned mode,
     - 			  void *context)
     - {
     - 	struct index_state *istate = context;
     --	return read_one_entry_opt(istate, oid, base, pathname,
     -+	return read_one_entry_opt(repo, istate, oid, base, pathname,
     - 				  mode,
     - 				  ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
     - }
     -@@ builtin/ls-files.c: static int read_one_entry(const struct object_id *oid, struct strbuf *base,
     -  * This is used when the caller knows there is no existing entries at
     -  * the stage that will conflict with the entry being added.
     -  */
     --static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
     -+static int read_one_entry_quick(struct repository *repo, const struct object_id *oid, struct strbuf *base,
     - 				const char *pathname, unsigned mode,
     - 				void *context)
     - {
     - 	struct index_state *istate = context;
     --	return read_one_entry_opt(istate, oid, base, pathname,
     -+	return read_one_entry_opt(repo, istate, oid, base, pathname,
     - 				  mode, ADD_CACHE_JUST_APPEND);
     - }
     - 
     -
     - ## builtin/ls-tree.c ##
     -@@ builtin/ls-tree.c: static int show_recursive(const char *base, size_t baselen, const char *pathname
     - 	return 0;
     - }
     - 
     --static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
     -+static int show_tree_fmt(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
     - 			 const char *pathname, unsigned mode, void *context UNUSED)
     - {
     - 	size_t baselen;
     -@@ builtin/ls-tree.c: static void show_tree_common_default_long(struct strbuf *base,
     - 	strbuf_setlen(base, baselen);
     - }
     - 
     --static int show_tree_default(const struct object_id *oid, struct strbuf *base,
     -+static int show_tree_default(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
     - 			     const char *pathname, unsigned mode,
     - 			     void *context UNUSED)
     - {
     -@@ builtin/ls-tree.c: static int show_tree_default(const struct object_id *oid, struct strbuf *base,
     - 	return recurse;
     - }
     - 
     --static int show_tree_long(const struct object_id *oid, struct strbuf *base,
     -+static int show_tree_long(struct repository *repo, const struct object_id *oid, struct strbuf *base,
     - 			  const char *pathname, unsigned mode,
     - 			  void *context UNUSED)
     - {
     -@@ builtin/ls-tree.c: static int show_tree_long(const struct object_id *oid, struct strbuf *base,
     - 
     - 	if (data.type == OBJ_BLOB) {
     - 		unsigned long size;
     --		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
     -+		if (oid_object_info(repo, data.oid, &size) == OBJ_BAD)
     - 			xsnprintf(size_text, sizeof(size_text), "BAD");
     - 		else
     - 			xsnprintf(size_text, sizeof(size_text),
     -@@ builtin/ls-tree.c: static int show_tree_long(const struct object_id *oid, struct strbuf *base,
     - 	}
     - 
     - 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
     --	       find_unique_abbrev(data.oid, abbrev), size_text);
     -+	       repo_find_unique_abbrev(repo, data.oid, abbrev), size_text);
     - 	show_tree_common_default_long(base, pathname, data.base->len);
     - 	return recurse;
     - }
     - 
     --static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
     -+static int show_tree_name_only(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
     - 			       const char *pathname, unsigned mode,
     - 			       void *context UNUSED)
     - {
     -@@ builtin/ls-tree.c: static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
     - 	return recurse;
     - }
     - 
     --static int show_tree_object(const struct object_id *oid, struct strbuf *base,
     -+static int show_tree_object(struct repository *repo, const struct object_id *oid, struct strbuf *base,
     - 			    const char *pathname, unsigned mode,
     - 			    void *context UNUSED)
     - {
     -@@ builtin/ls-tree.c: static int show_tree_object(const struct object_id *oid, struct strbuf *base,
     - 	if (early >= 0)
     - 		return early;
     - 
     --	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
     -+	printf("%s%c", repo_find_unique_abbrev(repo, oid, abbrev), line_termination);
     - 	return recurse;
     - }
     - 
     -
     - ## list-objects.c ##
     -@@ list-objects.c: static void process_tree(struct traversal_context *ctx,
     - 	    !revs->include_check_obj(&tree->object, revs->include_check_data))
     - 		return;
     - 
     --	failed_parse = parse_tree_gently(tree, 1);
     -+	failed_parse = parse_tree_gently(revs->repo, tree, 1);
     - 	if (failed_parse) {
     - 		if (revs->ignore_missing_links)
     - 			return;
     -
     - ## merge-recursive.c ##
     -@@ merge-recursive.c: static void unpack_trees_finish(struct merge_options *opt)
     - 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
     - }
     - 
     --static int save_files_dirs(const struct object_id *oid UNUSED,
     -+static int save_files_dirs(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
     - 			   struct strbuf *base, const char *path,
     - 			   unsigned int mode, void *context)
     - {
     -
     - ## revision.c ##
     -@@ revision.c: static void mark_tree_contents_uninteresting(struct repository *r,
     - 	struct tree_desc desc;
     - 	struct name_entry entry;
     - 
     --	if (parse_tree_gently(tree, 1) < 0)
     -+	if (parse_tree_gently(r, tree, 1) < 0)
     - 		return;
     - 
     - 	init_tree_desc(&desc, tree->buffer, tree->size);
     -@@ revision.c: static void add_children_by_path(struct repository *r,
     - 	if (!tree)
     - 		return;
     - 
     --	if (parse_tree_gently(tree, 1) < 0)
     -+	if (parse_tree_gently(r, tree, 1) < 0)
     - 		return;
     - 
     - 	init_tree_desc(&desc, tree->buffer, tree->size);
     -
     - ## sparse-index.c ##
     -@@ sparse-index.c: static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
     - 	add_name_hash(istate, ce);
     - }
     - 
     --static int add_path_to_index(const struct object_id *oid,
     -+static int add_path_to_index(struct repository *repo UNUSED, const struct object_id *oid,
     - 			     struct strbuf *base, const char *path,
     - 			     unsigned int mode, void *context)
     - {
     -
     - ## t/t5005-archive-submodules.sh (new) ##
     -@@
     -+#!/bin/sh
     -+
     -+test_description='git archive --recurse-submodules test'
     -+
     -+. ./test-lib.sh
     -+. "$TEST_DIRECTORY"/lib-submodule-update.sh
     -+
     -+test_expect_success 'setup' '
     -+	create_lib_submodule_repo &&
     -+	git -C submodule_update_repo checkout valid_sub1 &&
     -+	git -C submodule_update_repo submodule update
     -+'
     -+
     -+check_tar() {
     -+	tarfile=$1.tar
     -+	listfile=$1.lst
     -+	dir=$1
     -+	dir_with_prefix=$dir/$2
     -+
     -+	test_expect_success ' extract tar archive' '
     -+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
     -+	'
     -+}
     -+
     -+check_added() {
     -+	dir=$1
     -+	path_in_fs=$2
     -+	path_in_archive=$3
     -+
     -+	test_expect_success " validate extra file $path_in_archive" '
     -+		test -f $dir/$path_in_archive &&
     -+		diff -r $path_in_fs $dir/$path_in_archive
     -+	'
     -+}
     -+
     -+check_not_added() {
     -+	dir=$1
     -+	path_in_archive=$2
     -+
     -+	test_expect_success " validate unpresent file $path_in_archive" '
     -+		! test -f $dir/$path_in_archive &&
     -+		! test -d $dir/$path_in_archive
     -+	'
     -+}
     -+
     -+test_expect_success 'archive without recurse, non-init' '
     -+	reset_work_tree_to valid_sub1 &&
     -+	git -C submodule_update archive HEAD >b.tar
     -+'
     -+
     -+check_tar b
     -+check_added b submodule_update/file1 file1
     -+check_not_added b sub1/file1
     -+
     -+test_expect_success 'archive with recurse, non-init' '
     -+	reset_work_tree_to valid_sub1 &&
     -+	! git -C submodule_update archive --recurse-submodules HEAD >b2-err.tar
     -+'
     -+
     -+test_expect_success 'archive with recurse, init' '
     -+	reset_work_tree_to valid_sub1 &&
     -+	git -C submodule_update submodule update --init &&
     -+	git -C submodule_update ls-files --recurse-submodules &&
     -+	git -C submodule_update ls-tree HEAD &&
     -+	git -C submodule_update archive --recurse-submodules HEAD >b2.tar
     -+'
     -+
     -+check_tar b2
     -+check_added b2 submodule_update/sub1/file1 sub1/file1
     -+
     -+test_expect_success 'archive with recurse with big files' '
     -+	reset_work_tree_to valid_sub1 &&
     -+	test_config core.bigfilethreshold 1 &&
     -+	git -C submodule_update submodule update --init &&
     -+	git -C submodule_update ls-files --recurse-submodules &&
     -+	git -C submodule_update ls-tree HEAD &&
     -+	git -C submodule_update archive --recurse-submodules HEAD >b3.tar
     -+'
     -+
     -+check_tar b3
     -+check_added b3 submodule_update/sub1/file1 sub1/file1
     -+
     -+
     -+test_done
     -
     - ## tree.c ##
     -@@
     - #include "alloc.h"
     - #include "tree-walk.h"
     - #include "repository.h"
     -+#include "pathspec.h"
     - 
     - const char *tree_type = "tree";
     - 
     -@@ tree.c: int read_tree_at(struct repository *r,
     - 	int len, oldlen = base->len;
     - 	enum interesting retval = entry_not_interesting;
     - 
     --	if (parse_tree(tree))
     --		return -1;
     -+	if (repo_parse_tree(r, tree))
     -+		die("Failed to parse tree");
     - 
     - 	init_tree_desc(&desc, tree->buffer, tree->size);
     - 
     -@@ tree.c: int read_tree_at(struct repository *r,
     - 				continue;
     - 		}
     - 
     --		switch (fn(&entry.oid, base,
     -+		switch (fn(r, &entry.oid, base,
     - 			   entry.path, entry.mode, context)) {
     - 		case 0:
     - 			continue;
     -@@ tree.c: int read_tree_at(struct repository *r,
     - 			return -1;
     - 		}
     - 
     --		if (S_ISDIR(entry.mode))
     -+		if (S_ISDIR(entry.mode)) {
     - 			oidcpy(&oid, &entry.oid);
     --		else if (S_ISGITLINK(entry.mode)) {
     -+			len = tree_entry_len(&entry);
     -+			strbuf_add(base, entry.path, len);
     -+			strbuf_addch(base, '/');
     -+			retval = read_tree_at(r, lookup_tree(r, &oid),
     -+						base, pathspec,
     -+						fn, context);
     -+			strbuf_setlen(base, oldlen);
     -+			if (retval)
     -+				return -1;
     -+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
     - 			struct commit *commit;
     -+			struct repository subrepo;
     -+			struct repository* subrepo_p = &subrepo;
     -+			struct tree* submodule_tree;
     - 
     --			commit = lookup_commit(r, &entry.oid);
     -+			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
     -+				die("couldn't init submodule %s%s", base->buf, entry.path);
     -+
     -+			if (repo_read_index(subrepo_p) < 0)
     -+				die("index file corrupt");
     -+
     -+			commit = lookup_commit(subrepo_p, &entry.oid);
     - 			if (!commit)
     - 				die("Commit %s in submodule path %s%s not found",
     - 				    oid_to_hex(&entry.oid),
     - 				    base->buf, entry.path);
     - 
     --			if (parse_commit(commit))
     -+			if (repo_parse_commit(subrepo_p, commit))
     - 				die("Invalid commit %s in submodule path %s%s",
     - 				    oid_to_hex(&entry.oid),
     - 				    base->buf, entry.path);
     - 
     --			oidcpy(&oid, get_commit_tree_oid(commit));
     -+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
     -+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
     -+
     -+			len = tree_entry_len(&entry);
     -+			strbuf_add(base, entry.path, len);
     -+			strbuf_addch(base, '/');
     -+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
     -+						base, pathspec,
     -+						fn, context);
     -+			if (retval) {
     -+			    die("failed to read tree for %s%s", base->buf, entry.path);
     -+			    return -1;
     -+			}
     -+			strbuf_setlen(base, oldlen);
     -+			repo_clear(subrepo_p);
     - 		}
     --		else
     --			continue;
     - 
     --		len = tree_entry_len(&entry);
     --		strbuf_add(base, entry.path, len);
     --		strbuf_addch(base, '/');
     --		retval = read_tree_at(r, lookup_tree(r, &oid),
     --				      base, pathspec,
     --				      fn, context);
     --		strbuf_setlen(base, oldlen);
     --		if (retval)
     --			return -1;
     - 	}
     - 	return 0;
     - }
     -@@ tree.c: int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
     - 	return 0;
     - }
     - 
     --int parse_tree_gently(struct tree *item, int quiet_on_missing)
     -+int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
     - {
     - 	 enum object_type type;
     - 	 void *buffer;
     -@@ tree.c: int parse_tree_gently(struct tree *item, int quiet_on_missing)
     - 
     - 	if (item->object.parsed)
     - 		return 0;
     --	buffer = read_object_file(&item->object.oid, &type, &size);
     -+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
     - 	if (!buffer)
     - 		return quiet_on_missing ? -1 :
     - 			error("Could not read %s",
     -
     - ## tree.h ##
     -@@ tree.h: struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
     - 
     - int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
     - 
     --int parse_tree_gently(struct tree *tree, int quiet_on_missing);
     --static inline int parse_tree(struct tree *tree)
     -+int parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
     -+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
     - {
     --	return parse_tree_gently(tree, 0);
     -+	return parse_tree_gently(r, tree, 0);
     - }
     -+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
     -+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
     -+#endif
     - void free_tree_buffer(struct tree *tree);
     - 
     - /* Parses and returns the tree in the given ent, chasing tags and commits. */
     -@@ tree.h: struct tree *parse_tree_indirect(const struct object_id *oid);
     - int cmp_cache_name_compare(const void *a_, const void *b_);
     - 
     - #define READ_TREE_RECURSIVE 1
     --typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
     -+typedef int (*read_tree_fn_t)(struct repository *, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
     - 
     - int read_tree_at(struct repository *r,
     - 		 struct tree *tree, struct strbuf *base,
     -
     - ## wt-status.c ##
     -@@ wt-status.c: static void wt_status_collect_changes_index(struct wt_status *s)
     - 	release_revisions(&rev);
     - }
     - 
     --static int add_file_to_list(const struct object_id *oid,
     -+static int add_file_to_list(struct repository *repo UNUSED, const struct object_id *oid,
     - 			    struct strbuf *base, const char *path,
     - 			    unsigned int mode, void *context)
     - {
  -:  ----------- >  7:  2443c9b1b6e archive: remove global repository from archive_args
  -:  ----------- >  8:  4672e3d9586 archive: add --recurse-submodules to git-archive command
  -:  ----------- >  9:  f88ebbaf17c archive: add tests for git archive --recurse-submodules

-- 
gitgitgadget

^ permalink raw reply	[flat|nested] 48+ messages in thread

* [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods.
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Alphadelta14 via GitGitGadget
  2022-10-17 13:26       ` Junio C Hamano
                         ` (2 more replies)
  2022-10-17  2:23     ` [PATCH v3 2/9] tree: update cases to use repo_ tree methods Heather Lapointe via GitGitGadget
                       ` (10 subsequent siblings)
  11 siblings, 3 replies; 48+ messages in thread
From: Alphadelta14 via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Alphadelta14

From: Alphadelta14 <alpha@alphaservcomputing.solutions>

Expect that tree walking may switch repository contexts for cases
such as submodules.
Added compatibility macros for existing cases.

Annotate an existing issue where repo is wrong when traversing.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 tree.c | 15 +++++++++------
 tree.h | 14 ++++++++++----
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/tree.c b/tree.c
index 410e3b477e5..13f9173d45e 100644
--- a/tree.c
+++ b/tree.c
@@ -22,7 +22,7 @@ int read_tree_at(struct repository *r,
 	int len, oldlen = base->len;
 	enum interesting retval = entry_not_interesting;
 
-	if (parse_tree(tree))
+	if (repo_parse_tree(r, tree))
 		return -1;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
@@ -58,7 +58,11 @@ int read_tree_at(struct repository *r,
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
 
-			if (parse_commit(commit))
+			// FIXME: This is the wrong repo instance (it refers to the superproject)
+			// it will always fail as is (will fix in later patch)
+			// This current codepath isn't executed by any existing callbacks
+			// so it wouldn't show up as an issue at this time.
+			if (repo_parse_commit(r, commit))
 				die("Invalid commit %s in submodule path %s%s",
 				    oid_to_hex(&entry.oid),
 				    base->buf, entry.path);
@@ -121,7 +125,7 @@ int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
 	return 0;
 }
 
-int parse_tree_gently(struct tree *item, int quiet_on_missing)
+int repo_parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
 {
 	 enum object_type type;
 	 void *buffer;
@@ -129,7 +133,7 @@ int parse_tree_gently(struct tree *item, int quiet_on_missing)
 
 	if (item->object.parsed)
 		return 0;
-	buffer = read_object_file(&item->object.oid, &type, &size);
+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
 	if (!buffer)
 		return quiet_on_missing ? -1 :
 			error("Could not read %s",
@@ -149,9 +153,8 @@ void free_tree_buffer(struct tree *tree)
 	tree->object.parsed = 0;
 }
 
-struct tree *parse_tree_indirect(const struct object_id *oid)
+struct tree *repo_parse_tree_indirect(struct repository *r, const struct object_id *oid)
 {
-	struct repository *r = the_repository;
 	struct object *obj = parse_object(r, oid);
 	return (struct tree *)repo_peel_to_type(r, NULL, 0, obj, OBJ_TREE);
 }
diff --git a/tree.h b/tree.h
index 6efff003e21..cc6402e4738 100644
--- a/tree.h
+++ b/tree.h
@@ -18,15 +18,21 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
 
 int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
 
-int parse_tree_gently(struct tree *tree, int quiet_on_missing);
-static inline int parse_tree(struct tree *tree)
+int repo_parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
 {
-	return parse_tree_gently(tree, 0);
+	return repo_parse_tree_gently(r, tree, 0);
 }
+
+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
+#define parse_tree_gently(tree, quiet_on_missing) repo_parse_tree_gently(the_repository, tree, quiet_on_missing)
+#define parse_tree_indirect(oid) repo_parse_tree_indirect(the_repository, oid)
+#endif
 void free_tree_buffer(struct tree *tree);
 
 /* Parses and returns the tree in the given ent, chasing tags and commits. */
-struct tree *parse_tree_indirect(const struct object_id *oid);
+struct tree *repo_parse_tree_indirect(struct repository *r, const struct object_id *oid);
 
 int cmp_cache_name_compare(const void *a_, const void *b_);
 
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 2/9] tree: update cases to use repo_ tree methods
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
                       ` (9 subsequent siblings)
  11 siblings, 0 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

For cases which had already had a repository instance,
update those to use the repo_parse_tree* methods.

Leave the remaining invocations that were already using the_repository
alone.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 merge.c     | 4 ++--
 reset.c     | 2 +-
 revision.c  | 4 ++--
 sequencer.c | 6 +++---
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/merge.c b/merge.c
index 2382ff66d35..1efc4440c03 100644
--- a/merge.c
+++ b/merge.c
@@ -63,12 +63,12 @@ int checkout_fast_forward(struct repository *r,
 	memset(&trees, 0, sizeof(trees));
 	memset(&t, 0, sizeof(t));
 
-	trees[nr_trees] = parse_tree_indirect(head);
+	trees[nr_trees] = repo_parse_tree_indirect(r, head);
 	if (!trees[nr_trees++]) {
 		rollback_lock_file(&lock_file);
 		return -1;
 	}
-	trees[nr_trees] = parse_tree_indirect(remote);
+	trees[nr_trees] = repo_parse_tree_indirect(r, remote);
 	if (!trees[nr_trees++]) {
 		rollback_lock_file(&lock_file);
 		return -1;
diff --git a/reset.c b/reset.c
index e3383a93343..a0ac5e8a684 100644
--- a/reset.c
+++ b/reset.c
@@ -153,7 +153,7 @@ int reset_head(struct repository *r, const struct reset_head_opts *opts)
 		goto leave_reset_head;
 	}
 
-	tree = parse_tree_indirect(oid);
+	tree = repo_parse_tree_indirect(r, oid);
 	prime_cache_tree(r, r->index, tree);
 
 	if (write_locked_index(r->index, &lock, COMMIT_LOCK) < 0) {
diff --git a/revision.c b/revision.c
index 36e31942cee..dab5ddaf039 100644
--- a/revision.c
+++ b/revision.c
@@ -74,7 +74,7 @@ static void mark_tree_contents_uninteresting(struct repository *r,
 	struct tree_desc desc;
 	struct name_entry entry;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (repo_parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
@@ -181,7 +181,7 @@ static void add_children_by_path(struct repository *r,
 	if (!tree)
 		return;
 
-	if (parse_tree_gently(tree, 1) < 0)
+	if (repo_parse_tree_gently(r, tree, 1) < 0)
 		return;
 
 	init_tree_desc(&desc, tree->buffer, tree->size);
diff --git a/sequencer.c b/sequencer.c
index a4d85f1fbdd..a4c09dfa182 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -685,9 +685,9 @@ static int do_recursive_merge(struct repository *r,
 		o.buffer_output = 2;
 	o.show_rename_progress = 1;
 
-	head_tree = parse_tree_indirect(head);
-	next_tree = next ? get_commit_tree(next) : empty_tree(r);
-	base_tree = base ? get_commit_tree(base) : empty_tree(r);
+	head_tree = repo_parse_tree_indirect(r, head);
+	next_tree = next ? repo_get_commit_tree(r, next) : empty_tree(r);
+	base_tree = base ? repo_get_commit_tree(r, base) : empty_tree(r);
 
 	for (i = 0; i < opts->xopts_nr; i++)
 		parse_merge_opt(&o, opts->xopts[i]);
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 3/9] tree: increase test coverage for tree.c
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 2/9] tree: update cases to use repo_ tree methods Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17 13:34       ` Phillip Wood
                         ` (2 more replies)
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
                       ` (8 subsequent siblings)
  11 siblings, 3 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

This highlights some buggy behavior from read_tree for submodules that
was not being executed.

This introduces a test-tool tree-read-tree-at command
(the complex name is because it is not related to the read-tree command).

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 Makefile                          |  1 +
 t/helper/test-tool.c              |  1 +
 t/helper/test-tool.h              |  1 +
 t/helper/test-tree-read-tree-at.c | 40 +++++++++++++++++++
 t/t1023-tree-read-tree-at.sh      | 65 +++++++++++++++++++++++++++++++
 5 files changed, 108 insertions(+)
 create mode 100644 t/helper/test-tree-read-tree-at.c
 create mode 100755 t/t1023-tree-read-tree-at.sh

diff --git a/Makefile b/Makefile
index 6bfb62cbe94..52d17ca7276 100644
--- a/Makefile
+++ b/Makefile
@@ -788,6 +788,7 @@ TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
 TEST_BUILTINS_OBJS += test-submodule.o
 TEST_BUILTINS_OBJS += test-subprocess.o
 TEST_BUILTINS_OBJS += test-trace2.o
+TEST_BUILTINS_OBJS += test-tree-read-tree-at.o
 TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
 TEST_BUILTINS_OBJS += test-userdiff.o
 TEST_BUILTINS_OBJS += test-wildmatch.o
diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
index d1d013bcd92..a8a9bedec5f 100644
--- a/t/helper/test-tool.c
+++ b/t/helper/test-tool.c
@@ -82,6 +82,7 @@ static struct test_cmd cmds[] = {
 	{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
 	{ "subprocess", cmd__subprocess },
 	{ "trace2", cmd__trace2 },
+	{ "tree-read-tree-at", cmd__tree_read_tree_at },
 	{ "userdiff", cmd__userdiff },
 	{ "urlmatch-normalization", cmd__urlmatch_normalization },
 	{ "xml-encode", cmd__xml_encode },
diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
index 6b46b6444b6..409fddfaeb8 100644
--- a/t/helper/test-tool.h
+++ b/t/helper/test-tool.h
@@ -76,6 +76,7 @@ int cmd__submodule_config(int argc, const char **argv);
 int cmd__submodule_nested_repo_config(int argc, const char **argv);
 int cmd__subprocess(int argc, const char **argv);
 int cmd__trace2(int argc, const char **argv);
+int cmd__tree_read_tree_at(int argc, const char **argv);
 int cmd__userdiff(int argc, const char **argv);
 int cmd__urlmatch_normalization(int argc, const char **argv);
 int cmd__xml_encode(int argc, const char **argv);
diff --git a/t/helper/test-tree-read-tree-at.c b/t/helper/test-tree-read-tree-at.c
new file mode 100644
index 00000000000..bba759bb264
--- /dev/null
+++ b/t/helper/test-tree-read-tree-at.c
@@ -0,0 +1,40 @@
+/* This tests tree.c's read_tree / read_tree_at.
+We call it tree-read-tree-at to disambiguate with the read-tree tool.
+*/
+#include "cache.h"
+#include "pathspec.h"
+#include "test-tool.h"
+#include "tree.h"
+
+static int test_handle_entry(const struct object_id *oid,
+		struct strbuf *base, const char *filename,
+		unsigned mode, void *context UNUSED) {
+	printf("%i %s %s%s\n", mode, oid_to_hex(oid), base->buf, filename);
+	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
+		return READ_TREE_RECURSIVE;
+	}
+	return 0;
+}
+
+int cmd__tree_read_tree_at(int argc UNUSED, const char **argv)
+{
+	struct pathspec pathspec;
+	struct tree *tree;
+	struct repository *repo;
+	struct object_id oid;
+
+	setup_git_directory();
+	repo = the_repository;
+	assert(repo);
+
+	parse_pathspec(&pathspec, 0,
+		       PATHSPEC_PREFER_FULL,
+		       "", argv);
+
+	assert(repo_get_oid(repo, "HEAD", &oid) == 0);
+	tree = repo_parse_tree_indirect(repo, &oid);
+	assert(tree);
+	pathspec.recurse_submodules = 1;
+	read_tree(repo, tree, &pathspec, test_handle_entry, NULL);
+	return 0;
+}
diff --git a/t/t1023-tree-read-tree-at.sh b/t/t1023-tree-read-tree-at.sh
new file mode 100755
index 00000000000..9e5ce3abb4b
--- /dev/null
+++ b/t/t1023-tree-read-tree-at.sh
@@ -0,0 +1,65 @@
+#!/bin/sh
+
+# tests for tree.c (not read-tree.c)
+test_description='Test read_tree / read_tree_at'
+. ./test-lib.sh
+
+test_expect_success 'read_tree basic' '
+	rm -rf walk_tree_basic &&
+	git init walk_tree_basic &&
+	(
+		cd walk_tree_basic &&
+		set -x &&
+
+		mkdir -p dir1/dirA &&
+		mkdir -p dir1/dirB &&
+		mkdir -p dir2 &&
+		echo "file1" > file1.txt &&
+		echo "file2" > file2.txt &&
+		# uncommitted
+		echo "file3" > file3.txt &&
+
+		echo "file1A1" > dir1/dirA/file1.txt &&
+		git add file1.txt file2.txt dir1/dirA/file1.txt &&
+		git commit -m "initial commit" &&
+
+		test-tool tree-read-tree-at . > walk1.txt &&
+		grep " file1.txt" walk1.txt &&
+		! grep " file3.txt" walk1.txt &&
+		! grep " dir1/dirB" walk1.txt &&
+		grep " dir1/dirA/file1.txt" walk1.txt
+	)
+'
+
+test_expect_success 'read_tree submodules' '
+	rm -rf walk_tree_submodules &&
+	git init submodule1 &&
+	(
+		cd submodule1 &&
+		mkdir -p dir1/dirA &&
+		echo "dir2/sub1/file1.txt" > file1.txt &&
+		echo "dir2/sub1/file1A1.txt" > dir1/dirA/file1.txt &&
+		git add file1.txt dir1/dirA/file1.txt &&
+		git commit -m "initial commit"
+	) &&
+	git init walk_tree_submodules &&
+	(
+		cd walk_tree_submodules &&
+
+		mkdir -p dir2 &&
+		echo "file1" > file1.txt &&
+		echo "dir2/file2" > dir2/file2.txt &&
+		git add file1.txt dir2/file2.txt &&
+		git commit -m "initial commit" &&
+
+		git submodule add ../submodule1 dir2/sub1 &&
+		git commit -m "add submodule1" &&
+
+		test-tool tree-read-tree-at . > walk2.txt &&
+		grep " file1.txt" walk2.txt &&
+		grep " dir2/sub1/file1.txt" walk2.txt &&
+		grep " dir2/sub1/dir1/dirA/file1.txt" walk2.txt
+	)
+'
+
+test_done
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (2 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17 13:48       ` Phillip Wood
                         ` (3 more replies)
  2022-10-17  2:23     ` [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t Heather Lapointe via GitGitGadget
                       ` (7 subsequent siblings)
  11 siblings, 4 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

This supports traversal into an actual submodule for read_tree_at.
The logic is blocked on pathspec->recurse_submodules now,
but previously hadn't been executed due to all fn() cases
returning early for submodules.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 tree.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------
 1 file changed, 61 insertions(+), 27 deletions(-)

diff --git a/tree.c b/tree.c
index 13f9173d45e..2a087c010f9 100644
--- a/tree.c
+++ b/tree.c
@@ -8,6 +8,7 @@
 #include "alloc.h"
 #include "tree-walk.h"
 #include "repository.h"
+#include "pathspec.h"
 
 const char *tree_type = "tree";
 
@@ -47,40 +48,73 @@ int read_tree_at(struct repository *r,
 			return -1;
 		}
 
-		if (S_ISDIR(entry.mode))
+		if (S_ISDIR(entry.mode)) {
 			oidcpy(&oid, &entry.oid);
-		else if (S_ISGITLINK(entry.mode)) {
-			struct commit *commit;
 
-			commit = lookup_commit(r, &entry.oid);
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			strbuf_addch(base, '/');
+			retval = read_tree_at(r, lookup_tree(r, &oid),
+						base, pathspec,
+						fn, context);
+			strbuf_setlen(base, oldlen);
+			if (retval)
+				return -1;
+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
+			struct commit *commit;
+			struct repository subrepo;
+			struct repository* subrepo_p = &subrepo;
+			struct tree* submodule_tree;
+			char *submodule_rel_path;
+			int name_base_len = 0;
+
+			len = tree_entry_len(&entry);
+			strbuf_add(base, entry.path, len);
+			submodule_rel_path = base->buf;
+			// repo_submodule_init expects a path relative to submodule_prefix
+			if (r->submodule_prefix) {
+				name_base_len = strlen(r->submodule_prefix);
+				// we should always expect to start with submodule_prefix
+				assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len));
+				// strip the prefix
+				submodule_rel_path += name_base_len;
+				// if submodule_prefix doesn't end with a /, we want to get rid of that too
+				if (is_dir_sep(submodule_rel_path[0])) {
+					submodule_rel_path++;
+				}
+			}
+
+			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
+				die("couldn't init submodule %s", base->buf);
+
+			if (repo_read_index(subrepo_p) < 0)
+				die("index file corrupt");
+
+			commit = lookup_commit(subrepo_p, &entry.oid);
 			if (!commit)
-				die("Commit %s in submodule path %s%s not found",
+				die("Commit %s in submodule path %s not found",
 				    oid_to_hex(&entry.oid),
-				    base->buf, entry.path);
-
-			// FIXME: This is the wrong repo instance (it refers to the superproject)
-			// it will always fail as is (will fix in later patch)
-			// This current codepath isn't executed by any existing callbacks
-			// so it wouldn't show up as an issue at this time.
-			if (repo_parse_commit(r, commit))
-				die("Invalid commit %s in submodule path %s%s",
+				    base->buf);
+
+			if (repo_parse_commit(subrepo_p, commit))
+				die("Invalid commit %s in submodule path %s",
 				    oid_to_hex(&entry.oid),
-				    base->buf, entry.path);
+				    base->buf);
 
-			oidcpy(&oid, get_commit_tree_oid(commit));
-		}
-		else
-			continue;
+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
 
-		len = tree_entry_len(&entry);
-		strbuf_add(base, entry.path, len);
-		strbuf_addch(base, '/');
-		retval = read_tree_at(r, lookup_tree(r, &oid),
-				      base, pathspec,
-				      fn, context);
-		strbuf_setlen(base, oldlen);
-		if (retval)
-			return -1;
+			strbuf_addch(base, '/');
+
+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
+						base, pathspec,
+						fn, context);
+			if (retval)
+			    die("failed to read tree for %s", base->buf);
+			strbuf_setlen(base, oldlen);
+			repo_clear(subrepo_p);
+		}
+		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)
 	}
 	return 0;
 }
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (3 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
                       ` (6 subsequent siblings)
  11 siblings, 0 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

Add a repo paramter to read_tree_fn_t because most callbacks do
need some repo instance.
This avoids having to use the_repository functions otherwise
and improves repo context switching for submodules.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 archive.c                         | 11 ++++++----
 builtin/checkout.c                |  4 +++-
 builtin/log.c                     |  4 +++-
 builtin/ls-files.c                |  8 ++++++--
 builtin/ls-tree.c                 | 34 ++++++++++++++++++++-----------
 merge-recursive.c                 |  4 +++-
 sparse-index.c                    |  4 +++-
 t/helper/test-tree-read-tree-at.c |  3 ++-
 tree.c                            |  2 +-
 tree.h                            |  2 +-
 wt-status.c                       |  4 +++-
 11 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/archive.c b/archive.c
index 61a79e4a227..15f3ac92dfc 100644
--- a/archive.c
+++ b/archive.c
@@ -225,7 +225,9 @@ static int write_directory(struct archiver_context *c)
 	return ret ? -1 : 0;
 }
 
-static int queue_or_write_archive_entry(const struct object_id *oid,
+static int queue_or_write_archive_entry(
+		struct repository *r,
+		const struct object_id *oid,
 		struct strbuf *base, const char *filename,
 		unsigned mode, void *context)
 {
@@ -246,7 +248,7 @@ static int queue_or_write_archive_entry(const struct object_id *oid,
 		/* Borrow base, but restore its original value when done. */
 		strbuf_addstr(base, filename);
 		strbuf_addch(base, '/');
-		check = get_archive_attrs(c->args->repo->index, base->buf);
+		check = get_archive_attrs(r->index, base->buf);
 		strbuf_setlen(base, baselen);
 
 		if (check_attr_export_ignore(check))
@@ -382,7 +384,8 @@ struct path_exists_context {
 	struct archiver_args *args;
 };
 
-static int reject_entry(const struct object_id *oid UNUSED,
+static int reject_entry(
+			struct repository *r, const struct object_id *oid UNUSED,
 			struct strbuf *base,
 			const char *filename, unsigned mode,
 			void *context)
@@ -394,7 +397,7 @@ static int reject_entry(const struct object_id *oid UNUSED,
 		struct strbuf sb = STRBUF_INIT;
 		strbuf_addbuf(&sb, base);
 		strbuf_addstr(&sb, filename);
-		if (!match_pathspec(ctx->args->repo->index,
+		if (!match_pathspec(r->index,
 				    &ctx->pathspec,
 				    sb.buf, sb.len, 0, NULL, 1))
 			ret = READ_TREE_RECURSIVE;
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 2a132392fbe..ee98858afe6 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -124,7 +124,9 @@ static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
 
 }
 
-static int update_some(const struct object_id *oid, struct strbuf *base,
+static int update_some(
+		       struct repository *r UNUSED,
+			   const struct object_id *oid, struct strbuf *base,
 		       const char *pathname, unsigned mode, void *context UNUSED)
 {
 	int len;
diff --git a/builtin/log.c b/builtin/log.c
index ee19dc5d450..608a448fe4d 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -698,7 +698,9 @@ static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
 	return 0;
 }
 
-static int show_tree_object(const struct object_id *oid UNUSED,
+static int show_tree_object(
+			    struct repository *r UNUSED,
+			    const struct object_id *oid UNUSED,
 			    struct strbuf *base UNUSED,
 			    const char *pathname, unsigned mode,
 			    void *context)
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 4cf8a236483..fbb07fa08c2 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -533,7 +533,9 @@ static int read_one_entry_opt(struct index_state *istate,
 	return add_index_entry(istate, ce, opt);
 }
 
-static int read_one_entry(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry(
+			  struct repository *r UNUSED,
+			  const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context)
 {
@@ -547,7 +549,9 @@ static int read_one_entry(const struct object_id *oid, struct strbuf *base,
  * This is used when the caller knows there is no existing entries at
  * the stage that will conflict with the entry being added.
  */
-static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
+static int read_one_entry_quick(
+				struct repository *r UNUSED,
+				const struct object_id *oid, struct strbuf *base,
 				const char *pathname, unsigned mode,
 				void *context)
 {
diff --git a/builtin/ls-tree.c b/builtin/ls-tree.c
index c3ea09281af..dd571abad1c 100644
--- a/builtin/ls-tree.c
+++ b/builtin/ls-tree.c
@@ -141,8 +141,10 @@ static int show_recursive(const char *base, size_t baselen, const char *pathname
 	return 0;
 }
 
-static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
-			 const char *pathname, unsigned mode, void *context UNUSED)
+static int show_tree_fmt(
+			struct repository *r UNUSED,
+			const struct object_id *oid, struct strbuf *base,
+			const char *pathname, unsigned mode, void *context UNUSED)
 {
 	size_t baselen;
 	int recurse = 0;
@@ -211,9 +213,11 @@ static void show_tree_common_default_long(struct strbuf *base,
 	strbuf_setlen(base, baselen);
 }
 
-static int show_tree_default(const struct object_id *oid, struct strbuf *base,
-			     const char *pathname, unsigned mode,
-			     void *context UNUSED)
+static int show_tree_default(
+		struct repository *r,
+		const struct object_id *oid, struct strbuf *base,
+		const char *pathname, unsigned mode,
+		void *context UNUSED)
 {
 	int early;
 	int recurse;
@@ -224,12 +228,14 @@ static int show_tree_default(const struct object_id *oid, struct strbuf *base,
 		return early;
 
 	printf("%06o %s %s\t", data.mode, type_name(data.type),
-	       find_unique_abbrev(data.oid, abbrev));
+	       repo_find_unique_abbrev(r, data.oid, abbrev));
 	show_tree_common_default_long(base, pathname, data.base->len);
 	return recurse;
 }
 
-static int show_tree_long(const struct object_id *oid, struct strbuf *base,
+static int show_tree_long(
+			  struct repository *r,
+			  const struct object_id *oid, struct strbuf *base,
 			  const char *pathname, unsigned mode,
 			  void *context UNUSED)
 {
@@ -244,7 +250,7 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 
 	if (data.type == OBJ_BLOB) {
 		unsigned long size;
-		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
+		if (oid_object_info(r, data.oid, &size) == OBJ_BAD)
 			xsnprintf(size_text, sizeof(size_text), "BAD");
 		else
 			xsnprintf(size_text, sizeof(size_text),
@@ -254,12 +260,14 @@ static int show_tree_long(const struct object_id *oid, struct strbuf *base,
 	}
 
 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
-	       find_unique_abbrev(data.oid, abbrev), size_text);
+	       repo_find_unique_abbrev(r, data.oid, abbrev), size_text);
 	show_tree_common_default_long(base, pathname, data.base->len);
 	return recurse;
 }
 
-static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
+static int show_tree_name_only(
+			       struct repository *r UNUSED,
+			       const struct object_id *oid, struct strbuf *base,
 			       const char *pathname, unsigned mode,
 			       void *context UNUSED)
 {
@@ -280,7 +288,9 @@ static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
 	return recurse;
 }
 
-static int show_tree_object(const struct object_id *oid, struct strbuf *base,
+static int show_tree_object(
+			    struct repository *r,
+				const struct object_id *oid, struct strbuf *base,
 			    const char *pathname, unsigned mode,
 			    void *context UNUSED)
 {
@@ -292,7 +302,7 @@ static int show_tree_object(const struct object_id *oid, struct strbuf *base,
 	if (early >= 0)
 		return early;
 
-	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
+	printf("%s%c", repo_find_unique_abbrev(r, oid, abbrev), line_termination);
 	return recurse;
 }
 
diff --git a/merge-recursive.c b/merge-recursive.c
index 4ddd3adea00..dccde276655 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -456,7 +456,9 @@ static void unpack_trees_finish(struct merge_options *opt)
 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
 }
 
-static int save_files_dirs(const struct object_id *oid UNUSED,
+static int save_files_dirs(
+			   struct repository *r UNUSED,
+			   const struct object_id *oid UNUSED,
 			   struct strbuf *base, const char *path,
 			   unsigned int mode, void *context)
 {
diff --git a/sparse-index.c b/sparse-index.c
index e4a54ce1943..4187c7ce9c4 100644
--- a/sparse-index.c
+++ b/sparse-index.c
@@ -232,7 +232,9 @@ static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
 	add_name_hash(istate, ce);
 }
 
-static int add_path_to_index(const struct object_id *oid,
+static int add_path_to_index(
+			     struct repository *r UNUSED,
+			     const struct object_id *oid,
 			     struct strbuf *base, const char *path,
 			     unsigned int mode, void *context)
 {
diff --git a/t/helper/test-tree-read-tree-at.c b/t/helper/test-tree-read-tree-at.c
index bba759bb264..d2bcc8c849a 100644
--- a/t/helper/test-tree-read-tree-at.c
+++ b/t/helper/test-tree-read-tree-at.c
@@ -6,7 +6,8 @@ We call it tree-read-tree-at to disambiguate with the read-tree tool.
 #include "test-tool.h"
 #include "tree.h"
 
-static int test_handle_entry(const struct object_id *oid,
+static int test_handle_entry(
+		struct repository *r UNUSED, const struct object_id *oid,
 		struct strbuf *base, const char *filename,
 		unsigned mode, void *context UNUSED) {
 	printf("%i %s %s%s\n", mode, oid_to_hex(oid), base->buf, filename);
diff --git a/tree.c b/tree.c
index 2a087c010f9..17c3af819e2 100644
--- a/tree.c
+++ b/tree.c
@@ -38,7 +38,7 @@ int read_tree_at(struct repository *r,
 				continue;
 		}
 
-		switch (fn(&entry.oid, base,
+		switch (fn(r, &entry.oid, base,
 			   entry.path, entry.mode, context)) {
 		case 0:
 			continue;
diff --git a/tree.h b/tree.h
index cc6402e4738..94b1e11d9eb 100644
--- a/tree.h
+++ b/tree.h
@@ -37,7 +37,7 @@ struct tree *repo_parse_tree_indirect(struct repository *r, const struct object_
 int cmp_cache_name_compare(const void *a_, const void *b_);
 
 #define READ_TREE_RECURSIVE 1
-typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
+typedef int (*read_tree_fn_t)(struct repository *r, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
 
 int read_tree_at(struct repository *r,
 		 struct tree *tree, struct strbuf *base,
diff --git a/wt-status.c b/wt-status.c
index 5813174896c..cff2a780f32 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -665,7 +665,9 @@ static void wt_status_collect_changes_index(struct wt_status *s)
 	release_revisions(&rev);
 }
 
-static int add_file_to_list(const struct object_id *oid,
+static int add_file_to_list(
+			    struct repository *r UNUSED,
+			    const struct object_id *oid,
 			    struct strbuf *base, const char *path,
 			    unsigned int mode, void *context)
 {
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 6/9] archive: pass repo objects to write_archive handlers
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (4 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17 13:50       ` Phillip Wood
  2022-10-17  2:23     ` [PATCH v3 7/9] archive: remove global repository from archive_args Heather Lapointe via GitGitGadget
                       ` (5 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

Use contextual repos instead of the_repository or args->repo
to ensure that submodules will be handled correctly
since they use multiple repo instances.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 archive-tar.c | 15 ++++++++++-----
 archive-zip.c | 15 +++++++++------
 archive.c     | 38 ++++++++++++++++++++++----------------
 archive.h     | 14 +++++++++++---
 4 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 3e4822b6840..5a2d42ff229 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -18,6 +18,7 @@ static unsigned long offset;
 static int tar_umask = 002;
 
 static int write_tar_filter_archive(const struct archiver *ar,
+					struct repository *repo,
 				    struct archiver_args *args);
 
 /*
@@ -246,7 +247,9 @@ static void write_extended_header(struct archiver_args *args,
 	write_blocked(buffer, size);
 }
 
-static int write_tar_entry(struct archiver_args *args,
+static int write_tar_entry(
+			   struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -316,7 +319,7 @@ static int write_tar_entry(struct archiver_args *args,
 		if (buffer)
 			write_blocked(buffer, size);
 		else
-			err = stream_blocked(args->repo, oid);
+			err = stream_blocked(repo, oid);
 	}
 	return err;
 }
@@ -422,12 +425,13 @@ static int git_tar_config(const char *var, const char *value, void *cb)
 }
 
 static int write_tar_archive(const struct archiver *ar UNUSED,
+			     struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err = 0;
 
 	write_global_extended_header(args);
-	err = write_archive_entries(args, write_tar_entry);
+	err = write_archive_entries(repo, args, write_tar_entry);
 	if (!err)
 		write_trailer();
 	return err;
@@ -462,6 +466,7 @@ static void tgz_write_block(const void *data)
 static const char internal_gzip_command[] = "git archive gzip";
 
 static int write_tar_filter_archive(const struct archiver *ar,
+					struct repository *repo,
 				    struct archiver_args *args)
 {
 #if ZLIB_VERNUM >= 0x1221
@@ -484,7 +489,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		gzstream.next_out = outbuf;
 		gzstream.avail_out = sizeof(outbuf);
 
-		r = write_tar_archive(ar, args);
+		r = write_tar_archive(ar, repo, args);
 
 		tgz_deflate(Z_FINISH);
 		git_deflate_end(&gzstream);
@@ -506,7 +511,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
 		die_errno(_("unable to redirect descriptor"));
 	close(filter.in);
 
-	r = write_tar_archive(ar, args);
+	r = write_tar_archive(ar, repo, args);
 
 	close(1);
 	if (finish_command(&filter) != 0)
diff --git a/archive-zip.c b/archive-zip.c
index 0456f1ebf15..2c1f943a6cc 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -283,7 +283,9 @@ static int entry_is_binary(struct index_state *istate, const char *path,
 
 #define STREAM_BUFFER_SIZE (1024 * 16)
 
-static int write_zip_entry(struct archiver_args *args,
+static int write_zip_entry(
+			   struct repository *repo,
+			   struct archiver_args *args,
 			   const struct object_id *oid,
 			   const char *path, size_t pathlen,
 			   unsigned int mode,
@@ -340,7 +342,7 @@ static int write_zip_entry(struct archiver_args *args,
 
 		if (!buffer) {
 			enum object_type type;
-			stream = open_istream(args->repo, oid, &type, &size,
+			stream = open_istream(repo, oid, &type, &size,
 					      NULL);
 			if (!stream)
 				return error(_("cannot stream blob %s"),
@@ -349,7 +351,7 @@ static int write_zip_entry(struct archiver_args *args,
 			out = NULL;
 		} else {
 			crc = crc32(crc, buffer, size);
-			is_binary = entry_is_binary(args->repo->index,
+			is_binary = entry_is_binary(repo->index,
 						    path_without_prefix,
 						    buffer, size);
 			out = buffer;
@@ -426,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 			write_or_die(1, buf, readlen);
@@ -459,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args,
 				break;
 			crc = crc32(crc, buf, readlen);
 			if (is_binary == -1)
-				is_binary = entry_is_binary(args->repo->index,
+				is_binary = entry_is_binary(repo->index,
 							    path_without_prefix,
 							    buf, readlen);
 
@@ -619,6 +621,7 @@ static int archive_zip_config(const char *var, const char *value,
 }
 
 static int write_zip_archive(const struct archiver *ar UNUSED,
+				 struct repository *repo,
 			     struct archiver_args *args)
 {
 	int err;
@@ -629,7 +632,7 @@ static int write_zip_archive(const struct archiver *ar UNUSED,
 
 	strbuf_init(&zip_dir, 0);
 
-	err = write_archive_entries(args, write_zip_entry);
+	err = write_archive_entries(repo, args, write_zip_entry);
 	if (!err)
 		write_zip_trailer(args->commit_oid);
 
diff --git a/archive.c b/archive.c
index 15f3ac92dfc..2cca7bc5c8a 100644
--- a/archive.c
+++ b/archive.c
@@ -134,7 +134,9 @@ static int check_attr_export_subst(const struct attr_check *check)
 	return check && ATTR_TRUE(check->items[1].value);
 }
 
-static int write_archive_entry(const struct object_id *oid, const char *base,
+static int write_archive_entry(
+		struct repository *repo,
+		const struct object_id *oid, const char *base,
 		int baselen, const char *filename, unsigned mode,
 		void *context)
 {
@@ -160,7 +162,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	if (!S_ISDIR(mode)) {
 		const struct attr_check *check;
-		check = get_archive_attrs(args->repo->index, path_without_prefix);
+		check = get_archive_attrs(repo->index, path_without_prefix);
 		if (check_attr_export_ignore(check))
 			return 0;
 		args->convert = check_attr_export_subst(check);
@@ -169,7 +171,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
-		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
+		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
 		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -180,14 +182,14 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
 
 	/* Stream it? */
 	if (S_ISREG(mode) && !args->convert &&
-	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
+	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
 	    size > big_file_threshold)
-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
+		return write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
 
 	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
 	if (!buffer)
 		return error(_("cannot read '%s'"), oid_to_hex(oid));
-	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
+	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
 	free(buffer);
 	return err;
 }
@@ -207,7 +209,9 @@ static void queue_directory(const struct object_id *oid,
 	oidcpy(&d->oid, oid);
 }
 
-static int write_directory(struct archiver_context *c)
+static int write_directory(
+		struct repository *repo,
+		struct archiver_context *c)
 {
 	struct directory *d = c->bottom;
 	int ret;
@@ -217,8 +221,8 @@ static int write_directory(struct archiver_context *c)
 	c->bottom = d->up;
 	d->path[d->len - 1] = '\0'; /* no trailing slash */
 	ret =
-		write_directory(c) ||
-		write_archive_entry(&d->oid, d->path, d->baselen,
+		write_directory(repo, c) ||
+		write_archive_entry(repo, &d->oid, d->path, d->baselen,
 				    d->path + d->baselen, d->mode,
 				    c) != READ_TREE_RECURSIVE;
 	free(d);
@@ -257,9 +261,9 @@ static int queue_or_write_archive_entry(
 		return READ_TREE_RECURSIVE;
 	}
 
-	if (write_directory(c))
+	if (write_directory(r, c))
 		return -1;
-	return write_archive_entry(oid, base->buf, base->len, filename, mode,
+	return write_archive_entry(r, oid, base->buf, base->len, filename, mode,
 				   context);
 }
 
@@ -269,7 +273,9 @@ struct extra_file_info {
 	void *content;
 };
 
-int write_archive_entries(struct archiver_args *args,
+int write_archive_entries(
+		struct repository *repo,
+		struct archiver_args *args,
 		write_archive_entry_fn_t write_entry)
 {
 	struct archiver_context context;
@@ -290,7 +296,7 @@ int write_archive_entries(struct archiver_args *args,
 			len--;
 		if (args->verbose)
 			fprintf(stderr, "%.*s\n", (int)len, args->base);
-		err = write_entry(args, &args->tree->object.oid, args->base,
+		err = write_entry(repo, args, &args->tree->object.oid, args->base,
 				  len, 040777, NULL, 0);
 		if (err)
 			return err;
@@ -345,12 +351,12 @@ int write_archive_entries(struct archiver_args *args,
 			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
 				err = error_errno(_("cannot read '%s'"), path);
 			else
-				err = write_entry(args, &fake_oid, path_in_archive.buf,
+				err = write_entry(repo, args, &fake_oid, path_in_archive.buf,
 						  path_in_archive.len,
 						  canon_mode(info->stat.st_mode),
 						  content.buf, content.len);
 		} else {
-			err = write_entry(args, &fake_oid,
+			err = write_entry(repo, args, &fake_oid,
 					  path, strlen(path),
 					  canon_mode(info->stat.st_mode),
 					  info->content, info->stat.st_size);
@@ -711,7 +717,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
 	parse_treeish_arg(argv, &args, prefix, remote);
 	parse_pathspec_arg(argv + 1, &args);
 
-	rc = ar->write_archive(ar, &args);
+	rc = ar->write_archive(ar, repo, &args);
 
 	string_list_clear_func(&args.extra_files, extra_file_info_clear);
 	free(args.refname);
diff --git a/archive.h b/archive.h
index 08bed3ed3af..bfbbd3274bd 100644
--- a/archive.h
+++ b/archive.h
@@ -41,7 +41,10 @@ const char *archive_format_from_filename(const char *filename);
 #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
 struct archiver {
 	const char *name;
-	int (*write_archive)(const struct archiver *, struct archiver_args *);
+	int (*write_archive)(
+		const struct archiver *,
+		struct repository *,
+		struct archiver_args *);
 	unsigned flags;
 	char *filter_command;
 };
@@ -51,12 +54,17 @@ void init_tar_archiver(void);
 void init_zip_archiver(void);
 void init_archivers(void);
 
-typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
+typedef int (*write_archive_entry_fn_t)(
+					struct repository *repo,
+					struct archiver_args *args,
 					const struct object_id *oid,
 					const char *path, size_t pathlen,
 					unsigned int mode,
 					void *buffer, unsigned long size);
 
-int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
+int write_archive_entries(
+	struct repository *repo,
+	struct archiver_args *args,
+	write_archive_entry_fn_t write_entry);
 
 #endif	/* ARCHIVE_H */
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 7/9] archive: remove global repository from archive_args
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (5 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
                       ` (4 subsequent siblings)
  11 siblings, 0 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

Remove archive_args.repo to ensure all functions are using local
repository instances.
Since all functions now have access to repo, this access isn't
needed anymore.

The main issue is that submodules do not use the same repo
as the subproject repo that is being passed around contextually.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 archive.c | 51 +++++++++++++++++++++++++++++----------------------
 archive.h |  1 -
 2 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/archive.c b/archive.c
index 2cca7bc5c8a..34549d849f1 100644
--- a/archive.c
+++ b/archive.c
@@ -36,7 +36,9 @@ void init_archivers(void)
 	init_zip_archiver();
 }
 
-static void format_subst(const struct commit *commit,
+static void format_subst(
+			 struct repository *repo,
+			 const struct commit *commit,
 			 const char *src, size_t len,
 			 struct strbuf *buf, struct pretty_print_context *ctx)
 {
@@ -59,7 +61,7 @@ static void format_subst(const struct commit *commit,
 		strbuf_add(&fmt, b + 8, c - b - 8);
 
 		strbuf_add(buf, src, b - src);
-		format_commit_message(commit, fmt.buf, buf, ctx);
+		repo_format_commit_message(repo, commit, fmt.buf, buf, ctx);
 		len -= c + 1 - src;
 		src  = c + 1;
 	}
@@ -68,7 +70,9 @@ static void format_subst(const struct commit *commit,
 	free(to_free);
 }
 
-static void *object_file_to_archive(const struct archiver_args *args,
+static void *object_file_to_archive(
+				    struct repository *repo,
+				    const struct archiver_args *args,
 				    const char *path,
 				    const struct object_id *oid,
 				    unsigned int mode,
@@ -84,15 +88,15 @@ static void *object_file_to_archive(const struct archiver_args *args,
 			       (args->tree ? &args->tree->object.oid : NULL), oid);
 
 	path += args->baselen;
-	buffer = read_object_file(oid, type, sizep);
+	buffer = repo_read_object_file(repo, oid, type, sizep);
 	if (buffer && S_ISREG(mode)) {
 		struct strbuf buf = STRBUF_INIT;
 		size_t size = 0;
 
 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
-		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
 		if (commit)
-			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
+			format_subst(repo, commit, buf.buf, buf.len, &buf, args->pretty_ctx);
 		buffer = strbuf_detach(&buf, &size);
 		*sizep = size;
 	}
@@ -186,7 +190,7 @@ static int write_archive_entry(
 	    size > big_file_threshold)
 		return write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
 
-	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
+	buffer = object_file_to_archive(repo, args, path.buf, oid, mode, &type, &size);
 	if (!buffer)
 		return error(_("cannot read '%s'"), oid_to_hex(oid));
 	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
@@ -313,8 +317,8 @@ int write_archive_entries(
 		memset(&opts, 0, sizeof(opts));
 		opts.index_only = 1;
 		opts.head_idx = -1;
-		opts.src_index = args->repo->index;
-		opts.dst_index = args->repo->index;
+		opts.src_index = repo->index;
+		opts.dst_index = repo->index;
 		opts.fn = oneway_merge;
 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
 		if (unpack_trees(1, &t, &opts))
@@ -322,7 +326,7 @@ int write_archive_entries(
 		git_attr_set_direction(GIT_ATTR_INDEX);
 	}
 
-	err = read_tree(args->repo, args->tree,
+	err = read_tree(repo, args->tree,
 			&args->pathspec,
 			queue_or_write_archive_entry,
 			&context);
@@ -412,7 +416,7 @@ static int reject_entry(
 	return ret;
 }
 
-static int path_exists(struct archiver_args *args, const char *path)
+static int path_exists(struct repository *repo, struct archiver_args *args, const char *path)
 {
 	const char *paths[] = { path, NULL };
 	struct path_exists_context ctx;
@@ -421,14 +425,16 @@ static int path_exists(struct archiver_args *args, const char *path)
 	ctx.args = args;
 	parse_pathspec(&ctx.pathspec, 0, 0, "", paths);
 	ctx.pathspec.recursive = 1;
-	ret = read_tree(args->repo, args->tree,
+	ret = read_tree(repo, args->tree,
 			&ctx.pathspec,
 			reject_entry, &ctx);
 	clear_pathspec(&ctx.pathspec);
 	return ret != 0;
 }
 
-static void parse_pathspec_arg(const char **pathspec,
+static void parse_pathspec_arg(
+		struct repository *repo,
+		const char **pathspec,
 		struct archiver_args *ar_args)
 {
 	/*
@@ -442,14 +448,16 @@ static void parse_pathspec_arg(const char **pathspec,
 	ar_args->pathspec.recursive = 1;
 	if (pathspec) {
 		while (*pathspec) {
-			if (**pathspec && !path_exists(ar_args, *pathspec))
+			if (**pathspec && !path_exists(repo, ar_args, *pathspec))
 				die(_("pathspec '%s' did not match any files"), *pathspec);
 			pathspec++;
 		}
 	}
 }
 
-static void parse_treeish_arg(const char **argv,
+static void parse_treeish_arg(
+		struct repository *repo,
+		const char **argv,
 		struct archiver_args *ar_args, const char *prefix,
 		int remote)
 {
@@ -475,7 +483,7 @@ static void parse_treeish_arg(const char **argv,
 	if (get_oid(name, &oid))
 		die(_("not a valid object name: %s"), name);
 
-	commit = lookup_commit_reference_gently(ar_args->repo, &oid, 1);
+	commit = lookup_commit_reference_gently(repo, &oid, 1);
 	if (commit) {
 		commit_oid = &commit->object.oid;
 		archive_time = commit->date;
@@ -484,7 +492,7 @@ static void parse_treeish_arg(const char **argv,
 		archive_time = time(NULL);
 	}
 
-	tree = parse_tree_indirect(&oid);
+	tree = repo_parse_tree_indirect(repo, &oid);
 	if (!tree)
 		die(_("not a tree object: %s"), oid_to_hex(&oid));
 
@@ -493,14 +501,14 @@ static void parse_treeish_arg(const char **argv,
 		unsigned short mode;
 		int err;
 
-		err = get_tree_entry(ar_args->repo,
+		err = get_tree_entry(repo,
 				     &tree->object.oid,
 				     prefix, &tree_oid,
 				     &mode);
 		if (err || !S_ISDIR(mode))
 			die(_("current working directory is untracked"));
 
-		tree = parse_tree_indirect(&tree_oid);
+		tree = repo_parse_tree_indirect(repo, &tree_oid);
 	}
 	ar_args->refname = ref;
 	ar_args->tree = tree;
@@ -701,7 +709,6 @@ int write_archive(int argc, const char **argv, const char *prefix,
 	ctx.abbrev = DEFAULT_ABBREV;
 	ctx.describe_status = &describe_status;
 	args.pretty_ctx = &ctx;
-	args.repo = repo;
 	args.prefix = prefix;
 	string_list_init_dup(&args.extra_files);
 	argc = parse_archive_args(argc, argv, &ar, &args, name_hint, remote);
@@ -714,8 +721,8 @@ int write_archive(int argc, const char **argv, const char *prefix,
 		setup_git_directory();
 	}
 
-	parse_treeish_arg(argv, &args, prefix, remote);
-	parse_pathspec_arg(argv + 1, &args);
+	parse_treeish_arg(repo, argv, &args, prefix, remote);
+	parse_pathspec_arg(repo, argv + 1, &args);
 
 	rc = ar->write_archive(ar, repo, &args);
 
diff --git a/archive.h b/archive.h
index bfbbd3274bd..540a3b12130 100644
--- a/archive.h
+++ b/archive.h
@@ -8,7 +8,6 @@ struct repository;
 struct pretty_print_context;
 
 struct archiver_args {
-	struct repository *repo;
 	char *refname;
 	const char *prefix;
 	const char *base;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (6 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 7/9] archive: remove global repository from archive_args Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-26 23:34       ` Glen Choo
  2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
                       ` (3 subsequent siblings)
  11 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

This makes it possible to include submodule contents in an archive command.

The default behavior remains the same, do not write submodule contents
to the resulting archive.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 Documentation/git-archive.txt |  6 +++++-
 archive.c                     | 36 +++++++++++++++++++++++++++++++++--
 archive.h                     |  1 +
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-archive.txt b/Documentation/git-archive.txt
index 60c040988bb..22f54428b98 100644
--- a/Documentation/git-archive.txt
+++ b/Documentation/git-archive.txt
@@ -10,7 +10,8 @@ SYNOPSIS
 --------
 [verse]
 'git archive' [--format=<fmt>] [--list] [--prefix=<prefix>/] [<extra>]
-	      [-o <file> | --output=<file>] [--worktree-attributes]
+	      [-o <file> | --output=<file>]
+	      [--recurse-submodules] [--worktree-attributes]
 	      [--remote=<repo> [--exec=<git-upload-archive>]] <tree-ish>
 	      [<path>...]
 
@@ -82,6 +83,9 @@ The file mode is limited to a regular file, and the option may be
 subject to platform-dependent command-line limits. For non-trivial
 cases, write an untracked file and use `--add-file` instead.
 
+--recurse-submodules
+	Include submodules recursively in archive.
+
 --worktree-attributes::
 	Look for attributes in .gitattributes files in the working tree
 	as well (see <<ATTRIBUTES>>).
diff --git a/archive.c b/archive.c
index 34549d849f1..f81ef741487 100644
--- a/archive.c
+++ b/archive.c
@@ -10,6 +10,7 @@
 #include "unpack-trees.h"
 #include "dir.h"
 #include "quote.h"
+#include "submodule.h"
 
 static char const * const archive_usage[] = {
 	N_("git archive [<options>] <tree-ish> [<path>...]"),
@@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
 	oidcpy(&d->oid, oid);
 }
 
+static void queue_submodule(
+		struct repository *superproject,
+		const struct object_id *oid,
+		struct strbuf *base, const char *filename,
+		unsigned mode, struct archiver_context *c)
+{
+	struct repository subrepo;
+
+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
+		return;
+
+	if (repo_read_index(&subrepo) < 0)
+		die("index file corrupt");
+
+    queue_directory(oid, base, filename, mode, c);
+
+	repo_clear(&subrepo);
+}
+
 static int write_directory(
 		struct repository *repo,
 		struct archiver_context *c)
@@ -228,9 +248,11 @@ static int write_directory(
 		write_directory(repo, c) ||
 		write_archive_entry(repo, &d->oid, d->path, d->baselen,
 				    d->path + d->baselen, d->mode,
-				    c) != READ_TREE_RECURSIVE;
+				    c);
 	free(d);
-	return ret ? -1 : 0;
+	if (ret == READ_TREE_RECURSIVE)
+		return 0;
+	return ret;
 }
 
 static int queue_or_write_archive_entry(
@@ -263,6 +285,11 @@ static int queue_or_write_archive_entry(
 			return 0;
 		queue_directory(oid, base, filename, mode, c);
 		return READ_TREE_RECURSIVE;
+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
+		if (is_submodule_active(r, filename)) {
+			queue_submodule(r, oid, base, filename, mode, c);
+			return READ_TREE_RECURSIVE;
+		}
 	}
 
 	if (write_directory(r, c))
@@ -446,6 +473,7 @@ static void parse_pathspec_arg(
 		       PATHSPEC_PREFER_FULL,
 		       "", pathspec);
 	ar_args->pathspec.recursive = 1;
+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
 	if (pathspec) {
 		while (*pathspec) {
 			if (**pathspec && !path_exists(repo, ar_args, *pathspec))
@@ -609,6 +637,7 @@ static int parse_archive_args(int argc, const char **argv,
 	int verbose = 0;
 	int i;
 	int list = 0;
+	int recurse_submodules = 0;
 	int worktree_attributes = 0;
 	struct option opts[] = {
 		OPT_GROUP(""),
@@ -623,6 +652,8 @@ static int parse_archive_args(int argc, const char **argv,
 		  add_file_cb, (intptr_t)&base },
 		OPT_STRING('o', "output", &output, N_("file"),
 			N_("write the archive to this file")),
+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
+			N_("include submodules in archive")),
 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
 			N_("read .gitattributes in working directory")),
 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
@@ -686,6 +717,7 @@ static int parse_archive_args(int argc, const char **argv,
 	args->verbose = verbose;
 	args->base = base;
 	args->baselen = strlen(base);
+	args->recurse_submodules = recurse_submodules;
 	args->worktree_attributes = worktree_attributes;
 
 	return argc;
diff --git a/archive.h b/archive.h
index 540a3b12130..1b21484dda6 100644
--- a/archive.h
+++ b/archive.h
@@ -18,6 +18,7 @@ struct archiver_args {
 	timestamp_t time;
 	struct pathspec pathspec;
 	unsigned int verbose : 1;
+	unsigned int recurse_submodules : 1;
 	unsigned int worktree_attributes : 1;
 	unsigned int convert : 1;
 	int compression_level;
-- 
gitgitgadget


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (7 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
@ 2022-10-17  2:23     ` Heather Lapointe via GitGitGadget
  2022-10-27 18:54       ` Jonathan Tan
  2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
  2022-10-17 13:57     ` [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command Phillip Wood
                       ` (2 subsequent siblings)
  11 siblings, 2 replies; 48+ messages in thread
From: Heather Lapointe via GitGitGadget @ 2022-10-17  2:23 UTC (permalink / raw)
  To: git; +Cc: René Scharfe, Heather Lapointe, Heather Lapointe

From: Heather Lapointe <alpha@alphaservcomputing.solutions>

Ensuring functionality works with and without submodules.
We expect --recurse-submodules to fail if there are uninitialized submodules
present.

Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
---
 archive.c                     |  2 +-
 t/t5005-archive-submodules.sh | 83 +++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100755 t/t5005-archive-submodules.sh

diff --git a/archive.c b/archive.c
index f81ef741487..b0a3181f7f5 100644
--- a/archive.c
+++ b/archive.c
@@ -179,7 +179,7 @@ static int write_archive_entry(
 		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
 		if (err)
 			return err;
-		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
+		return READ_TREE_RECURSIVE;
 	}
 
 	if (args->verbose)
diff --git a/t/t5005-archive-submodules.sh b/t/t5005-archive-submodules.sh
new file mode 100755
index 00000000000..aad6cfd1082
--- /dev/null
+++ b/t/t5005-archive-submodules.sh
@@ -0,0 +1,83 @@
+#!/bin/sh
+
+test_description='git archive --recurse-submodules test'
+
+. ./test-lib.sh
+
+check_tar() {
+	tarfile=$1.tar
+	listfile=$1.lst
+	dir=$1
+	dir_with_prefix=$dir/$2
+
+	test_expect_success ' extract tar archive' '
+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
+	'
+}
+
+check_added() {
+	dir=$1
+	path_in_fs=$2
+	path_in_archive=$3
+
+	test_expect_success " validate extra file $path_in_archive" '
+		test -f $dir/$path_in_archive &&
+		diff -r $path_in_fs $dir/$path_in_archive
+	'
+}
+
+check_not_added() {
+	dir=$1
+	path_in_archive=$2
+
+	test_expect_success " validate unpresent file $path_in_archive" '
+		! test -f $dir/$path_in_archive &&
+		! test -d $dir/$path_in_archive
+	'
+}
+
+test_expect_success 'setup' '
+	rm -rf repo_with_submodules submodule1 uninited_repo_with_submodules &&
+	git init repo_with_submodules &&
+	git init submodule1 &&
+	(
+		cd submodule1 &&
+		echo "dir1/sub1/file1.txt" > "file1.txt" &&
+		git add file1.txt &&
+		git commit -m "initialize with file1.txt"
+	) &&
+	(
+	    cd repo_with_submodules &&
+	    echo "file2" > file2.txt &&
+	    git add file2.txt &&
+	    git commit -m "initialize with file2.txt" &&
+	    mkdir -p dir1 &&
+	    git submodule add ../submodule1 dir1/sub1 &&
+	    git commit -m "add submodule1"
+	) &&
+	git clone repo_with_submodules uninited_repo_with_submodules
+'
+
+test_expect_success 'archive without recurse, non-init' '
+	git -C uninited_repo_with_submodules archive -v HEAD >b.tar
+'
+
+check_tar b
+check_added b uninited_repo_with_submodules/file2.txt file2.txt
+check_not_added b uninited_repo_with_submodules/dir1/sub1/file1.txt
+
+# It is expected that --recurse-submodules will not work if submodules are not
+# initialized.
+test_expect_success 'archive with recurse, non-init' '
+	! git -C uninited_repo_with_submodules archive --recurse-submodules -v HEAD >b2-err.tar
+'
+
+test_expect_success 'archive with recurse, init' '
+	git -C repo_with_submodules archive --recurse-submodules -v HEAD >b3.tar
+'
+
+check_tar b3
+check_added b3 repo_with_submodules/file2.txt file2.txt
+check_added b3 repo_with_submodules/dir1/sub1/file1.txt dir1/sub1/file1.txt
+
+test_done
-- 
gitgitgadget

^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods.
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
@ 2022-10-17 13:26       ` Junio C Hamano
  2022-10-26 22:33       ` Glen Choo
  2022-10-27 18:09       ` Jonathan Tan
  2 siblings, 0 replies; 48+ messages in thread
From: Junio C Hamano @ 2022-10-17 13:26 UTC (permalink / raw)
  To: Alphadelta14 via GitGitGadget; +Cc: git, René Scharfe, Heather Lapointe

"Alphadelta14 via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Alphadelta14 <alpha@alphaservcomputing.solutions>

I'll fix this line to match all the other patches in the series
before applying.

> Expect that tree walking may switch repository contexts for cases
> such as submodules.
> Added compatibility macros for existing cases.
>
> Annotate an existing issue where repo is wrong when traversing.
>
> Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
> ---


> @@ -58,7 +58,11 @@ int read_tree_at(struct repository *r,
>  				    oid_to_hex(&entry.oid),
>  				    base->buf, entry.path);
>  
> -			if (parse_commit(commit))
> +			// FIXME: This is the wrong repo instance (it refers to the superproject)
> +			// it will always fail as is (will fix in later patch)
> +			// This current codepath isn't executed by any existing callbacks
> +			// so it wouldn't show up as an issue at this time.

	/*
	 * We write our multi-line comments
	 * this way.
	 */

My suspicion is that the if/else if/ cascade for GITLINK assumes
that the caller earlier did add_submodule_odb() to make sure any
object it needs should be available via the_repository->objects
object store.  If your caller (presumably "archive that is trying to
learn the --recurse-submodules option") hasn't learned to do so yet
at this step, it is understandable if it fails.

> +			if (repo_parse_commit(r, commit))
>  				die("Invalid commit %s in submodule path %s%s",
>  				    oid_to_hex(&entry.oid),
>  				    base->buf, entry.path);

> +#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
> +#define parse_tree(tree) repo_parse_tree(the_repository, tree)
> +#define parse_tree_gently(tree, quiet_on_missing) repo_parse_tree_gently(the_repository, tree, quiet_on_missing)
> +#define parse_tree_indirect(oid) repo_parse_tree_indirect(the_repository, oid)
> +#endif

Good.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 3/9] tree: increase test coverage for tree.c
  2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
@ 2022-10-17 13:34       ` Phillip Wood
  2022-10-17 13:36       ` Junio C Hamano
  2022-10-27 18:28       ` Jonathan Tan
  2 siblings, 0 replies; 48+ messages in thread
From: Phillip Wood @ 2022-10-17 13:34 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

Hi Heather

On 17/10/2022 03:23, Heather Lapointe via GitGitGadget wrote:
> From: Heather Lapointe <alpha@alphaservcomputing.solutions>
> 
> This highlights some buggy behavior from read_tree for submodules that
> was not being executed.

The commit message should explain the reason behind the change being 
made. In this case it would be helpful to give an overview of what the 
bug is you're testing for. Given the description I was expecting to see 
some failing tests that are fixed by a later patch but that doesn't seem 
to be the case, so I'm wondering what these tests do.

> This introduces a test-tool tree-read-tree-at command
> (the complex name is because it is not related to the read-tree command).

It would also be helpful to explain why we cannot reproduce the bug with 
"git read-tree --recurse-submodules"

Best Wishes

Phillip

> Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
> ---
>   Makefile                          |  1 +
>   t/helper/test-tool.c              |  1 +
>   t/helper/test-tool.h              |  1 +
>   t/helper/test-tree-read-tree-at.c | 40 +++++++++++++++++++
>   t/t1023-tree-read-tree-at.sh      | 65 +++++++++++++++++++++++++++++++
>   5 files changed, 108 insertions(+)
>   create mode 100644 t/helper/test-tree-read-tree-at.c
>   create mode 100755 t/t1023-tree-read-tree-at.sh
> 
> diff --git a/Makefile b/Makefile
> index 6bfb62cbe94..52d17ca7276 100644
> --- a/Makefile
> +++ b/Makefile
> @@ -788,6 +788,7 @@ TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o
>   TEST_BUILTINS_OBJS += test-submodule.o
>   TEST_BUILTINS_OBJS += test-subprocess.o
>   TEST_BUILTINS_OBJS += test-trace2.o
> +TEST_BUILTINS_OBJS += test-tree-read-tree-at.o
>   TEST_BUILTINS_OBJS += test-urlmatch-normalization.o
>   TEST_BUILTINS_OBJS += test-userdiff.o
>   TEST_BUILTINS_OBJS += test-wildmatch.o
> diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c
> index d1d013bcd92..a8a9bedec5f 100644
> --- a/t/helper/test-tool.c
> +++ b/t/helper/test-tool.c
> @@ -82,6 +82,7 @@ static struct test_cmd cmds[] = {
>   	{ "submodule-nested-repo-config", cmd__submodule_nested_repo_config },
>   	{ "subprocess", cmd__subprocess },
>   	{ "trace2", cmd__trace2 },
> +	{ "tree-read-tree-at", cmd__tree_read_tree_at },
>   	{ "userdiff", cmd__userdiff },
>   	{ "urlmatch-normalization", cmd__urlmatch_normalization },
>   	{ "xml-encode", cmd__xml_encode },
> diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h
> index 6b46b6444b6..409fddfaeb8 100644
> --- a/t/helper/test-tool.h
> +++ b/t/helper/test-tool.h
> @@ -76,6 +76,7 @@ int cmd__submodule_config(int argc, const char **argv);
>   int cmd__submodule_nested_repo_config(int argc, const char **argv);
>   int cmd__subprocess(int argc, const char **argv);
>   int cmd__trace2(int argc, const char **argv);
> +int cmd__tree_read_tree_at(int argc, const char **argv);
>   int cmd__userdiff(int argc, const char **argv);
>   int cmd__urlmatch_normalization(int argc, const char **argv);
>   int cmd__xml_encode(int argc, const char **argv);
> diff --git a/t/helper/test-tree-read-tree-at.c b/t/helper/test-tree-read-tree-at.c
> new file mode 100644
> index 00000000000..bba759bb264
> --- /dev/null
> +++ b/t/helper/test-tree-read-tree-at.c
> @@ -0,0 +1,40 @@
> +/* This tests tree.c's read_tree / read_tree_at.
> +We call it tree-read-tree-at to disambiguate with the read-tree tool.
> +*/
> +#include "cache.h"
> +#include "pathspec.h"
> +#include "test-tool.h"
> +#include "tree.h"
> +
> +static int test_handle_entry(const struct object_id *oid,
> +		struct strbuf *base, const char *filename,
> +		unsigned mode, void *context UNUSED) {
> +	printf("%i %s %s%s\n", mode, oid_to_hex(oid), base->buf, filename);
> +	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
> +		return READ_TREE_RECURSIVE;
> +	}
> +	return 0;
> +}
> +
> +int cmd__tree_read_tree_at(int argc UNUSED, const char **argv)
> +{
> +	struct pathspec pathspec;
> +	struct tree *tree;
> +	struct repository *repo;
> +	struct object_id oid;
> +
> +	setup_git_directory();
> +	repo = the_repository;
> +	assert(repo);
> +
> +	parse_pathspec(&pathspec, 0,
> +		       PATHSPEC_PREFER_FULL,
> +		       "", argv);
> +
> +	assert(repo_get_oid(repo, "HEAD", &oid) == 0);
> +	tree = repo_parse_tree_indirect(repo, &oid);
> +	assert(tree);
> +	pathspec.recurse_submodules = 1;
> +	read_tree(repo, tree, &pathspec, test_handle_entry, NULL);
> +	return 0;
> +}
> diff --git a/t/t1023-tree-read-tree-at.sh b/t/t1023-tree-read-tree-at.sh
> new file mode 100755
> index 00000000000..9e5ce3abb4b
> --- /dev/null
> +++ b/t/t1023-tree-read-tree-at.sh
> @@ -0,0 +1,65 @@
> +#!/bin/sh
> +
> +# tests for tree.c (not read-tree.c)
> +test_description='Test read_tree / read_tree_at'
> +. ./test-lib.sh
> +
> +test_expect_success 'read_tree basic' '
> +	rm -rf walk_tree_basic &&
> +	git init walk_tree_basic &&
> +	(
> +		cd walk_tree_basic &&
> +		set -x &&
> +
> +		mkdir -p dir1/dirA &&
> +		mkdir -p dir1/dirB &&
> +		mkdir -p dir2 &&
> +		echo "file1" > file1.txt &&
> +		echo "file2" > file2.txt &&
> +		# uncommitted
> +		echo "file3" > file3.txt &&
> +
> +		echo "file1A1" > dir1/dirA/file1.txt &&
> +		git add file1.txt file2.txt dir1/dirA/file1.txt &&
> +		git commit -m "initial commit" &&
> +
> +		test-tool tree-read-tree-at . > walk1.txt &&
> +		grep " file1.txt" walk1.txt &&
> +		! grep " file3.txt" walk1.txt &&
> +		! grep " dir1/dirB" walk1.txt &&
> +		grep " dir1/dirA/file1.txt" walk1.txt
> +	)
> +'
> +
> +test_expect_success 'read_tree submodules' '
> +	rm -rf walk_tree_submodules &&
> +	git init submodule1 &&
> +	(
> +		cd submodule1 &&
> +		mkdir -p dir1/dirA &&
> +		echo "dir2/sub1/file1.txt" > file1.txt &&
> +		echo "dir2/sub1/file1A1.txt" > dir1/dirA/file1.txt &&
> +		git add file1.txt dir1/dirA/file1.txt &&
> +		git commit -m "initial commit"
> +	) &&
> +	git init walk_tree_submodules &&
> +	(
> +		cd walk_tree_submodules &&
> +
> +		mkdir -p dir2 &&
> +		echo "file1" > file1.txt &&
> +		echo "dir2/file2" > dir2/file2.txt &&
> +		git add file1.txt dir2/file2.txt &&
> +		git commit -m "initial commit" &&
> +
> +		git submodule add ../submodule1 dir2/sub1 &&
> +		git commit -m "add submodule1" &&
> +
> +		test-tool tree-read-tree-at . > walk2.txt &&
> +		grep " file1.txt" walk2.txt &&
> +		grep " dir2/sub1/file1.txt" walk2.txt &&
> +		grep " dir2/sub1/dir1/dirA/file1.txt" walk2.txt
> +	)
> +'
> +
> +test_done

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 3/9] tree: increase test coverage for tree.c
  2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
  2022-10-17 13:34       ` Phillip Wood
@ 2022-10-17 13:36       ` Junio C Hamano
  2022-10-27 18:28       ` Jonathan Tan
  2 siblings, 0 replies; 48+ messages in thread
From: Junio C Hamano @ 2022-10-17 13:36 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: git, René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/t/t1023-tree-read-tree-at.sh b/t/t1023-tree-read-tree-at.sh
> new file mode 100755
> index 00000000000..9e5ce3abb4b
> --- /dev/null
> +++ b/t/t1023-tree-read-tree-at.sh
> @@ -0,0 +1,65 @@
> +#!/bin/sh
> +
> +# tests for tree.c (not read-tree.c)
> +test_description='Test read_tree / read_tree_at'
> +. ./test-lib.sh
> +
> +test_expect_success 'read_tree basic' '
> +	rm -rf walk_tree_basic &&
> +	git init walk_tree_basic &&
> +	(
> +		cd walk_tree_basic &&
> +		set -x &&

Do we need this, when we have '-x' option alongside with '-v', '-i',
and '-d' available in the test harness already?

> +		mkdir -p dir1/dirA &&
> +		mkdir -p dir1/dirB &&
> +		mkdir -p dir2 &&

Can't we have these three done by the same single "mkdir -p" process?

> +		echo "file1" > file1.txt &&
> +		echo "file2" > file2.txt &&

Lose the SP between redirection operator ">" and its target, i.e.

		echo file1 >file1.txt

cf. Documentation/CodingGuidelines

Also you do not necessarily have to have dq around a single token.

> +		# uncommitted
> +		echo "file3" > file3.txt &&
> +
> +		echo "file1A1" > dir1/dirA/file1.txt &&
> +		git add file1.txt file2.txt dir1/dirA/file1.txt &&
> +		git commit -m "initial commit" &&
> +
> +		test-tool tree-read-tree-at . > walk1.txt &&
> +		grep " file1.txt" walk1.txt &&
> +		! grep " file3.txt" walk1.txt &&
> +		! grep " dir1/dirB" walk1.txt &&
> +		grep " dir1/dirA/file1.txt" walk1.txt
> +	)
> +'
> +
> +test_expect_success 'read_tree submodules' '
> +	rm -rf walk_tree_submodules &&

Curious why the above does not clean "submodule1", too.  After all,
all the "rm -rf" we saw in this script above are removing what its
earlier steps would never have created (but will create), just in
case.  Why not do the same?

If the pattern is to "remove what we will need to create immediately
before we actually try to create, just in case", then shouldn't the
above be removing "submodule1", and we should have another "rm -rf"
for "walk_tree_submodules" immediately before we do "git init" on
it several lines below?

> +	git init submodule1 &&
> +	(
> +		cd submodule1 &&
> +		mkdir -p dir1/dirA &&
> +		echo "dir2/sub1/file1.txt" > file1.txt &&
> +		echo "dir2/sub1/file1A1.txt" > dir1/dirA/file1.txt &&
> +		git add file1.txt dir1/dirA/file1.txt &&
> +		git commit -m "initial commit"
> +	) &&
> +	git init walk_tree_submodules &&
> +	(
> +		cd walk_tree_submodules &&
> +
> +		mkdir -p dir2 &&
> +		echo "file1" > file1.txt &&
> +		echo "dir2/file2" > dir2/file2.txt &&
> +		git add file1.txt dir2/file2.txt &&
> +		git commit -m "initial commit" &&
> +
> +		git submodule add ../submodule1 dir2/sub1 &&
> +		git commit -m "add submodule1" &&
> +
> +		test-tool tree-read-tree-at . > walk2.txt &&
> +		grep " file1.txt" walk2.txt &&
> +		grep " dir2/sub1/file1.txt" walk2.txt &&
> +		grep " dir2/sub1/dir1/dirA/file1.txt" walk2.txt
> +	)
> +'
> +
> +test_done

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
@ 2022-10-17 13:48       ` Phillip Wood
  2022-10-17 13:56       ` Junio C Hamano
                         ` (2 subsequent siblings)
  3 siblings, 0 replies; 48+ messages in thread
From: Phillip Wood @ 2022-10-17 13:48 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

Hi Heather

On 17/10/2022 03:23, Heather Lapointe via GitGitGadget wrote:
> From: Heather Lapointe <alpha@alphaservcomputing.solutions>
> 
> This supports traversal into an actual submodule for read_tree_at.
> The logic is blocked on pathspec->recurse_submodules now,

I'm struggling to understand what this is saying.

> but previously hadn't been executed due to all fn() cases
> returning early for submodules.
> 
> Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
> ---
>   tree.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------
>   1 file changed, 61 insertions(+), 27 deletions(-)
> 
> diff --git a/tree.c b/tree.c
> index 13f9173d45e..2a087c010f9 100644
> --- a/tree.c
> +++ b/tree.c
> @@ -8,6 +8,7 @@
>   #include "alloc.h"
>   #include "tree-walk.h"
>   #include "repository.h"
> +#include "pathspec.h"
>   
>   const char *tree_type = "tree";
>   
> @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r,
>   			return -1;
>   		}
>   
> -		if (S_ISDIR(entry.mode))
> +		if (S_ISDIR(entry.mode)) {
>   			oidcpy(&oid, &entry.oid);
> -		else if (S_ISGITLINK(entry.mode)) {
> -			struct commit *commit;
>   
> -			commit = lookup_commit(r, &entry.oid);
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			strbuf_addch(base, '/');
> +			retval = read_tree_at(r, lookup_tree(r, &oid),
> +						base, pathspec,
> +						fn, context);
> +			strbuf_setlen(base, oldlen);
> +			if (retval)
> +				return -1;
> +		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
> +			struct commit *commit;
> +			struct repository subrepo;
> +			struct repository* subrepo_p = &subrepo;

Normally we'd just use &subrepo wherever we want a pointer rather than 
defining an alias like this. For example it is common to see

	struct strbuf buf = STRBUF_INIT;

	strbuf_add(&buf, "hello world");

we don't define a buf_p variable

> +			struct tree* submodule_tree;
> +			char *submodule_rel_path;
> +			int name_base_len = 0;
> +
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			submodule_rel_path = base->buf;
> +			// repo_submodule_init expects a path relative to submodule_prefix

I found the comments in this section code code helpful, but single line 
comments should be formatted as
	/* single line comment */

> +			if (r->submodule_prefix) {
> +				name_base_len = strlen(r->submodule_prefix);
> +				// we should always expect to start with submodule_prefix
> +				assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len));

Rather than using assert() we tend to use BUG() as that then provides a 
grep-able message. It also means that we wont have an out of bounds 
access if the invariant is violated when compiling with NDEBUG. So we 
could drop the comment and write

	if (strncmp(submodule_rel_path, r->submodule_prefix, name_base_len)
		BUG("missing submodule path prefix");

> +				// strip the prefix
> +				submodule_rel_path += name_base_len;
> +				// if submodule_prefix doesn't end with a /, we want to get rid of that too

I think there is a typo here - if the prefix does end with a / then 
we're dropping it.

> +				if (is_dir_sep(submodule_rel_path[0])) {
> +					submodule_rel_path++;
> +				}
> +			}
> +
> +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
> +				die("couldn't init submodule %s", base->buf);
> +
> +			if (repo_read_index(subrepo_p) < 0)
> +				die("index file corrupt");
> +
> +			commit = lookup_commit(subrepo_p, &entry.oid);
>   			if (!commit)
> -				die("Commit %s in submodule path %s%s not found",
> +				die("Commit %s in submodule path %s not found",
>   				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> -
> -			// FIXME: This is the wrong repo instance (it refers to the superproject)
> -			// it will always fail as is (will fix in later patch)
> -			// This current codepath isn't executed by any existing callbacks
> -			// so it wouldn't show up as an issue at this time.
> -			if (repo_parse_commit(r, commit))

Style comment for the patch that added this code. Multi-line comments 
should be formatted as
	/*
	 * Multi-line
	 * comment
	 */

Best Wishes

Phillip

> -				die("Invalid commit %s in submodule path %s%s",
> +				    base->buf);
> +
> +			if (repo_parse_commit(subrepo_p, commit))
> +				die("Invalid commit %s in submodule path %s",
>   				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> +				    base->buf);
>   
> -			oidcpy(&oid, get_commit_tree_oid(commit));
> -		}
> -		else
> -			continue;
> +			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
> +			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>   
> -		len = tree_entry_len(&entry);
> -		strbuf_add(base, entry.path, len);
> -		strbuf_addch(base, '/');
> -		retval = read_tree_at(r, lookup_tree(r, &oid),
> -				      base, pathspec,
> -				      fn, context);
> -		strbuf_setlen(base, oldlen);
> -		if (retval)
> -			return -1;
> +			strbuf_addch(base, '/');
> +
> +			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
> +						base, pathspec,
> +						fn, context);
> +			if (retval)
> +			    die("failed to read tree for %s", base->buf);
> +			strbuf_setlen(base, oldlen);
> +			repo_clear(subrepo_p);
> +		}
> +		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)
>   	}
>   	return 0;
>   }

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 6/9] archive: pass repo objects to write_archive handlers
  2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
@ 2022-10-17 13:50       ` Phillip Wood
  0 siblings, 0 replies; 48+ messages in thread
From: Phillip Wood @ 2022-10-17 13:50 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

Hi Heather

On 17/10/2022 03:23, Heather Lapointe via GitGitGadget wrote:
> From: Heather Lapointe <alpha@alphaservcomputing.solutions>
> 
> Use contextual repos instead of the_repository or args->repo
> to ensure that submodules will be handled correctly
> since they use multiple repo instances.

Reading this I wondered whether we could get rid of args->repo. Having 
read ahead to the next patch the answer is yes. Perhaps you could expand 
this commit message to mention that there are still uses of args->repo 
after this change but they will be removed in the next patch at which 
point we can remove that member.

> 
> Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
> ---
>   archive-tar.c | 15 ++++++++++-----
>   archive-zip.c | 15 +++++++++------
>   archive.c     | 38 ++++++++++++++++++++++----------------
>   archive.h     | 14 +++++++++++---
>   4 files changed, 52 insertions(+), 30 deletions(-)
> 
> diff --git a/archive-tar.c b/archive-tar.c
> index 3e4822b6840..5a2d42ff229 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -18,6 +18,7 @@ static unsigned long offset;
>   static int tar_umask = 002;
>   
>   static int write_tar_filter_archive(const struct archiver *ar,
> +					struct repository *repo,
>   				    struct archiver_args *args);
>   
>   /*
> @@ -246,7 +247,9 @@ static void write_extended_header(struct archiver_args *args,
>   	write_blocked(buffer, size);
>   }
>   
> -static int write_tar_entry(struct archiver_args *args,
> +static int write_tar_entry(
> +			   struct repository *repo,

This argument should be on the same line as the function name

Best Wishes

Phillip

> +			   struct archiver_args *args,
>   			   const struct object_id *oid,
>   			   const char *path, size_t pathlen,
>   			   unsigned int mode,
> @@ -316,7 +319,7 @@ static int write_tar_entry(struct archiver_args *args,
>   		if (buffer)
>   			write_blocked(buffer, size);
>   		else
> -			err = stream_blocked(args->repo, oid);
> +			err = stream_blocked(repo, oid);
>   	}
>   	return err;
>   }
> @@ -422,12 +425,13 @@ static int git_tar_config(const char *var, const char *value, void *cb)
>   }
>   
>   static int write_tar_archive(const struct archiver *ar UNUSED,
> +			     struct repository *repo,
>   			     struct archiver_args *args)
>   {
>   	int err = 0;
>   
>   	write_global_extended_header(args);
> -	err = write_archive_entries(args, write_tar_entry);
> +	err = write_archive_entries(repo, args, write_tar_entry);
>   	if (!err)
>   		write_trailer();
>   	return err;
> @@ -462,6 +466,7 @@ static void tgz_write_block(const void *data)
>   static const char internal_gzip_command[] = "git archive gzip";
>   
>   static int write_tar_filter_archive(const struct archiver *ar,
> +					struct repository *repo,
>   				    struct archiver_args *args)
>   {
>   #if ZLIB_VERNUM >= 0x1221
> @@ -484,7 +489,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
>   		gzstream.next_out = outbuf;
>   		gzstream.avail_out = sizeof(outbuf);
>   
> -		r = write_tar_archive(ar, args);
> +		r = write_tar_archive(ar, repo, args);
>   
>   		tgz_deflate(Z_FINISH);
>   		git_deflate_end(&gzstream);
> @@ -506,7 +511,7 @@ static int write_tar_filter_archive(const struct archiver *ar,
>   		die_errno(_("unable to redirect descriptor"));
>   	close(filter.in);
>   
> -	r = write_tar_archive(ar, args);
> +	r = write_tar_archive(ar, repo, args);
>   
>   	close(1);
>   	if (finish_command(&filter) != 0)
> diff --git a/archive-zip.c b/archive-zip.c
> index 0456f1ebf15..2c1f943a6cc 100644
> --- a/archive-zip.c
> +++ b/archive-zip.c
> @@ -283,7 +283,9 @@ static int entry_is_binary(struct index_state *istate, const char *path,
>   
>   #define STREAM_BUFFER_SIZE (1024 * 16)
>   
> -static int write_zip_entry(struct archiver_args *args,
> +static int write_zip_entry(
> +			   struct repository *repo,
> +			   struct archiver_args *args,
>   			   const struct object_id *oid,
>   			   const char *path, size_t pathlen,
>   			   unsigned int mode,
> @@ -340,7 +342,7 @@ static int write_zip_entry(struct archiver_args *args,
>   
>   		if (!buffer) {
>   			enum object_type type;
> -			stream = open_istream(args->repo, oid, &type, &size,
> +			stream = open_istream(repo, oid, &type, &size,
>   					      NULL);
>   			if (!stream)
>   				return error(_("cannot stream blob %s"),
> @@ -349,7 +351,7 @@ static int write_zip_entry(struct archiver_args *args,
>   			out = NULL;
>   		} else {
>   			crc = crc32(crc, buffer, size);
> -			is_binary = entry_is_binary(args->repo->index,
> +			is_binary = entry_is_binary(repo->index,
>   						    path_without_prefix,
>   						    buffer, size);
>   			out = buffer;
> @@ -426,7 +428,7 @@ static int write_zip_entry(struct archiver_args *args,
>   				break;
>   			crc = crc32(crc, buf, readlen);
>   			if (is_binary == -1)
> -				is_binary = entry_is_binary(args->repo->index,
> +				is_binary = entry_is_binary(repo->index,
>   							    path_without_prefix,
>   							    buf, readlen);
>   			write_or_die(1, buf, readlen);
> @@ -459,7 +461,7 @@ static int write_zip_entry(struct archiver_args *args,
>   				break;
>   			crc = crc32(crc, buf, readlen);
>   			if (is_binary == -1)
> -				is_binary = entry_is_binary(args->repo->index,
> +				is_binary = entry_is_binary(repo->index,
>   							    path_without_prefix,
>   							    buf, readlen);
>   
> @@ -619,6 +621,7 @@ static int archive_zip_config(const char *var, const char *value,
>   }
>   
>   static int write_zip_archive(const struct archiver *ar UNUSED,
> +				 struct repository *repo,
>   			     struct archiver_args *args)
>   {
>   	int err;
> @@ -629,7 +632,7 @@ static int write_zip_archive(const struct archiver *ar UNUSED,
>   
>   	strbuf_init(&zip_dir, 0);
>   
> -	err = write_archive_entries(args, write_zip_entry);
> +	err = write_archive_entries(repo, args, write_zip_entry);
>   	if (!err)
>   		write_zip_trailer(args->commit_oid);
>   
> diff --git a/archive.c b/archive.c
> index 15f3ac92dfc..2cca7bc5c8a 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -134,7 +134,9 @@ static int check_attr_export_subst(const struct attr_check *check)
>   	return check && ATTR_TRUE(check->items[1].value);
>   }
>   
> -static int write_archive_entry(const struct object_id *oid, const char *base,
> +static int write_archive_entry(
> +		struct repository *repo,
> +		const struct object_id *oid, const char *base,
>   		int baselen, const char *filename, unsigned mode,
>   		void *context)
>   {
> @@ -160,7 +162,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
>   
>   	if (!S_ISDIR(mode)) {
>   		const struct attr_check *check;
> -		check = get_archive_attrs(args->repo->index, path_without_prefix);
> +		check = get_archive_attrs(repo->index, path_without_prefix);
>   		if (check_attr_export_ignore(check))
>   			return 0;
>   		args->convert = check_attr_export_subst(check);
> @@ -169,7 +171,7 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
>   	if (S_ISDIR(mode) || S_ISGITLINK(mode)) {
>   		if (args->verbose)
>   			fprintf(stderr, "%.*s\n", (int)path.len, path.buf);
> -		err = write_entry(args, oid, path.buf, path.len, mode, NULL, 0);
> +		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
>   		if (err)
>   			return err;
>   		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
> @@ -180,14 +182,14 @@ static int write_archive_entry(const struct object_id *oid, const char *base,
>   
>   	/* Stream it? */
>   	if (S_ISREG(mode) && !args->convert &&
> -	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
> +	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
>   	    size > big_file_threshold)
> -		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
> +		return write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
>   
>   	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
>   	if (!buffer)
>   		return error(_("cannot read '%s'"), oid_to_hex(oid));
> -	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
> +	err = write_entry(repo, args, oid, path.buf, path.len, mode, buffer, size);
>   	free(buffer);
>   	return err;
>   }
> @@ -207,7 +209,9 @@ static void queue_directory(const struct object_id *oid,
>   	oidcpy(&d->oid, oid);
>   }
>   
> -static int write_directory(struct archiver_context *c)
> +static int write_directory(
> +		struct repository *repo,
> +		struct archiver_context *c)
>   {
>   	struct directory *d = c->bottom;
>   	int ret;
> @@ -217,8 +221,8 @@ static int write_directory(struct archiver_context *c)
>   	c->bottom = d->up;
>   	d->path[d->len - 1] = '\0'; /* no trailing slash */
>   	ret =
> -		write_directory(c) ||
> -		write_archive_entry(&d->oid, d->path, d->baselen,
> +		write_directory(repo, c) ||
> +		write_archive_entry(repo, &d->oid, d->path, d->baselen,
>   				    d->path + d->baselen, d->mode,
>   				    c) != READ_TREE_RECURSIVE;
>   	free(d);
> @@ -257,9 +261,9 @@ static int queue_or_write_archive_entry(
>   		return READ_TREE_RECURSIVE;
>   	}
>   
> -	if (write_directory(c))
> +	if (write_directory(r, c))
>   		return -1;
> -	return write_archive_entry(oid, base->buf, base->len, filename, mode,
> +	return write_archive_entry(r, oid, base->buf, base->len, filename, mode,
>   				   context);
>   }
>   
> @@ -269,7 +273,9 @@ struct extra_file_info {
>   	void *content;
>   };
>   
> -int write_archive_entries(struct archiver_args *args,
> +int write_archive_entries(
> +		struct repository *repo,
> +		struct archiver_args *args,
>   		write_archive_entry_fn_t write_entry)
>   {
>   	struct archiver_context context;
> @@ -290,7 +296,7 @@ int write_archive_entries(struct archiver_args *args,
>   			len--;
>   		if (args->verbose)
>   			fprintf(stderr, "%.*s\n", (int)len, args->base);
> -		err = write_entry(args, &args->tree->object.oid, args->base,
> +		err = write_entry(repo, args, &args->tree->object.oid, args->base,
>   				  len, 040777, NULL, 0);
>   		if (err)
>   			return err;
> @@ -345,12 +351,12 @@ int write_archive_entries(struct archiver_args *args,
>   			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
>   				err = error_errno(_("cannot read '%s'"), path);
>   			else
> -				err = write_entry(args, &fake_oid, path_in_archive.buf,
> +				err = write_entry(repo, args, &fake_oid, path_in_archive.buf,
>   						  path_in_archive.len,
>   						  canon_mode(info->stat.st_mode),
>   						  content.buf, content.len);
>   		} else {
> -			err = write_entry(args, &fake_oid,
> +			err = write_entry(repo, args, &fake_oid,
>   					  path, strlen(path),
>   					  canon_mode(info->stat.st_mode),
>   					  info->content, info->stat.st_size);
> @@ -711,7 +717,7 @@ int write_archive(int argc, const char **argv, const char *prefix,
>   	parse_treeish_arg(argv, &args, prefix, remote);
>   	parse_pathspec_arg(argv + 1, &args);
>   
> -	rc = ar->write_archive(ar, &args);
> +	rc = ar->write_archive(ar, repo, &args);
>   
>   	string_list_clear_func(&args.extra_files, extra_file_info_clear);
>   	free(args.refname);
> diff --git a/archive.h b/archive.h
> index 08bed3ed3af..bfbbd3274bd 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -41,7 +41,10 @@ const char *archive_format_from_filename(const char *filename);
>   #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
>   struct archiver {
>   	const char *name;
> -	int (*write_archive)(const struct archiver *, struct archiver_args *);
> +	int (*write_archive)(
> +		const struct archiver *,
> +		struct repository *,
> +		struct archiver_args *);
>   	unsigned flags;
>   	char *filter_command;
>   };
> @@ -51,12 +54,17 @@ void init_tar_archiver(void);
>   void init_zip_archiver(void);
>   void init_archivers(void);
>   
> -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
> +typedef int (*write_archive_entry_fn_t)(
> +					struct repository *repo,
> +					struct archiver_args *args,
>   					const struct object_id *oid,
>   					const char *path, size_t pathlen,
>   					unsigned int mode,
>   					void *buffer, unsigned long size);
>   
> -int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
> +int write_archive_entries(
> +	struct repository *repo,
> +	struct archiver_args *args,
> +	write_archive_entry_fn_t write_entry);
>   
>   #endif	/* ARCHIVE_H */

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
  2022-10-17 13:48       ` Phillip Wood
@ 2022-10-17 13:56       ` Junio C Hamano
  2022-10-26 22:48       ` Glen Choo
  2022-10-27 18:43       ` Jonathan Tan
  3 siblings, 0 replies; 48+ messages in thread
From: Junio C Hamano @ 2022-10-17 13:56 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: git, René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> From: Heather Lapointe <alpha@alphaservcomputing.solutions>
>
> This supports traversal into an actual submodule for read_tree_at.
> The logic is blocked on pathspec->recurse_submodules now,
> but previously hadn't been executed due to all fn() cases
> returning early for submodules.
>
> Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
> ---
>  tree.c | 88 ++++++++++++++++++++++++++++++++++++++++------------------
>  1 file changed, 61 insertions(+), 27 deletions(-)
>
> diff --git a/tree.c b/tree.c
> index 13f9173d45e..2a087c010f9 100644
> --- a/tree.c
> +++ b/tree.c
> @@ -8,6 +8,7 @@
>  #include "alloc.h"
>  #include "tree-walk.h"
>  #include "repository.h"
> +#include "pathspec.h"
>  
>  const char *tree_type = "tree";
>  
> @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r,
>  			return -1;
>  		}
>  
> -		if (S_ISDIR(entry.mode))
> +		if (S_ISDIR(entry.mode)) {
>  			oidcpy(&oid, &entry.oid);
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			strbuf_addch(base, '/');
> +			retval = read_tree_at(r, lookup_tree(r, &oid),
> +						base, pathspec,
> +						fn, context);
> +			strbuf_setlen(base, oldlen);
> +			if (retval)
> +				return -1;

The diff output makes it appear as if we are now adding many extra
processing to a normal directory case, but it actually folds the
code that was originally outside the if/else if/ cascade here.  So
I think this is not breaking the normal directory case.

> +		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
> +			struct commit *commit;
> +			struct repository subrepo;
> +			struct repository* subrepo_p = &subrepo;
> +			struct tree* submodule_tree;

In our codebase, star/asterisk for a pointer declaration sticks to
the variable, not the type.

cf. Documentation/CodingGuidelines

> +			char *submodule_rel_path;

Funny that the new code sometimes gets it right ;-)

> +			int name_base_len = 0;
> +
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			submodule_rel_path = base->buf;
> +			// repo_submodule_init expects a path relative to submodule_prefix

We avoid // comments.

> +			if (r->submodule_prefix) {
> +				name_base_len = strlen(r->submodule_prefix);
> +				// we should always expect to start with submodule_prefix
> +				assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len));
> +				// strip the prefix
> +				submodule_rel_path += name_base_len;
> +				// if submodule_prefix doesn't end with a /, we want to get rid of that too
> +				if (is_dir_sep(submodule_rel_path[0])) {
> +					submodule_rel_path++;
> +				}
> +			}
> +
> +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
> +				die("couldn't init submodule %s", base->buf);
> +
> +			if (repo_read_index(subrepo_p) < 0)
> +				die("index file corrupt");

Why?  You are going to ask the object store of the submodule
repository, and to do so you do not need to have its index read into
the core.

> +			commit = lookup_commit(subrepo_p, &entry.oid);
>  			if (!commit)
> -				die("Commit %s in submodule path %s%s not found",
> +				die("Commit %s in submodule path %s not found",
>  				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> -
> -			// FIXME: This is the wrong repo instance (it refers to the superproject)
> -			// it will always fail as is (will fix in later patch)
> -			// This current codepath isn't executed by any existing callbacks
> -			// so it wouldn't show up as an issue at this time.
> -			if (repo_parse_commit(r, commit))
> -				die("Invalid commit %s in submodule path %s%s",
> +				    base->buf);
> +
> +			if (repo_parse_commit(subrepo_p, commit))
> +				die("Invalid commit %s in submodule path %s",
>  				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> +				    base->buf);
>  
> -			oidcpy(&oid, get_commit_tree_oid(commit));
> -		}
> -		else
> -			continue;
> +			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
> +			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>  
> -		len = tree_entry_len(&entry);
> -		strbuf_add(base, entry.path, len);
> -		strbuf_addch(base, '/');
> -		retval = read_tree_at(r, lookup_tree(r, &oid),
> -				      base, pathspec,
> -				      fn, context);
> -		strbuf_setlen(base, oldlen);
> -		if (retval)
> -			return -1;
> +			strbuf_addch(base, '/');
> +
> +			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
> +						base, pathspec,
> +						fn, context);
> +			if (retval)
> +			    die("failed to read tree for %s", base->buf);
> +			strbuf_setlen(base, oldlen);
> +			repo_clear(subrepo_p);

This is a lot of new code, which must be done correctly.  An easier
way out to use the add_submodule_odb() trick that the original code
assumed becomes somewhat tempting (I guess we would do that in fn()
that would tell us to recurse into this codepath upon seeing a gitlink
entry).  Then we wouldn't have had to touch any tree() calls that were
taught to take "struct repository *" in earlier steps in this series.

But at some point, we would need to bite the bullet and plumb the
repository pointer through the callchain of more APIs, and this may
be that point.  I dunno.

> +		}
> +		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)
>  	}
>  	return 0;
>  }

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (8 preceding siblings ...)
  2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
@ 2022-10-17 13:57     ` Phillip Wood
  2022-10-18 18:34     ` Junio C Hamano
  2022-10-26 22:14     ` Glen Choo
  11 siblings, 0 replies; 48+ messages in thread
From: Phillip Wood @ 2022-10-17 13:57 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

Hi Heather

On 17/10/2022 03:23, Heather Lapointe via GitGitGadget wrote:
> This makes it possible to include submodule contents in an archive command.
> 
> The inspiration for this change comes from this Github thread,
> https://github.com/dear-github/dear-github/issues/214, with at least 160
> 👍🏻 's at the time of writing. (I stumbled upon it because I wanted it as
> well).
> 
> I figured the underlying implementation wouldn't be too difficult with most
> of the plumbing already in place, so I decided to add the relevant logic to
> the client git-archive command.
> 
> One of the trickier parts of this implementation involved teaching read_tree
> about submodules. Some of the troublesome areas were still using the
> the_repository references to look up commit or tree or oid information. I
> ended up deciding that read_tree_fn_t would probably be best off having a
> concrete repo reference since it allows changing the context to a subrepo
> where needed (even though some of the usages did not need it specifically).
> 
> I am open to feedback since this is all quite new to me :)

I've had a quick read through and I thought this patch series was well 
structured and easy to follow for someone like me who is not familiar 
with the archive code. I've left a few mostly high-level comments on 
some of the individual patches. The commit messages are all pretty brief 
and would benefit from a bit more explaintion of why the changes are 
being made. From a brief read through the code changes themselves all 
looked pretty good.

Best Wishes

Phillip


> Alphadelta14 (1):
>    tree: do not use the_repository for tree traversal methods.
 >
> Heather Lapointe (8):
>    tree: update cases to use repo_ tree methods
>    tree: increase test coverage for tree.c
>    tree: handle submodule case for read_tree_at properly
>    tree: add repository parameter to read_tree_fn_t
>    archive: pass repo objects to write_archive handlers
>    archive: remove global repository from archive_args
>    archive: add --recurse-submodules to git-archive command
>    archive: add tests for git archive --recurse-submodules
> 
>   Documentation/git-archive.txt     |   6 +-
>   Makefile                          |   1 +
>   archive-tar.c                     |  15 ++--
>   archive-zip.c                     |  15 ++--
>   archive.c                         | 138 ++++++++++++++++++++----------
>   archive.h                         |  16 +++-
>   builtin/checkout.c                |   4 +-
>   builtin/log.c                     |   4 +-
>   builtin/ls-files.c                |   8 +-
>   builtin/ls-tree.c                 |  34 +++++---
>   merge-recursive.c                 |   4 +-
>   merge.c                           |   4 +-
>   reset.c                           |   2 +-
>   revision.c                        |   4 +-
>   sequencer.c                       |   6 +-
>   sparse-index.c                    |   4 +-
>   t/helper/test-tool.c              |   1 +
>   t/helper/test-tool.h              |   1 +
>   t/helper/test-tree-read-tree-at.c |  41 +++++++++
>   t/t1023-tree-read-tree-at.sh      |  65 ++++++++++++++
>   t/t5005-archive-submodules.sh     |  83 ++++++++++++++++++
>   tree.c                            |  93 ++++++++++++++------
>   tree.h                            |  16 ++--
>   wt-status.c                       |   4 +-
>   24 files changed, 448 insertions(+), 121 deletions(-)
>   create mode 100644 t/helper/test-tree-read-tree-at.c
>   create mode 100755 t/t1023-tree-read-tree-at.sh
>   create mode 100755 t/t5005-archive-submodules.sh
> 
> 
> base-commit: e85701b4af5b7c2a9f3a1b07858703318dce365d
> Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1359%2FAlphadelta14%2Farchive-recurse-submodules-v3
> Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1359/Alphadelta14/archive-recurse-submodules-v3
> Pull-Request: https://github.com/git/git/pull/1359
> 
> Range-diff vs v2:
> 
>    -:  ----------- >  1:  79959a54eb4 tree: do not use the_repository for tree traversal methods.
>    -:  ----------- >  2:  2291e0f9b5c tree: update cases to use repo_ tree methods
>    -:  ----------- >  3:  9a07c6932f4 tree: increase test coverage for tree.c
>    2:  68f7830c6d9 !  4:  d3d1738e670 archive: fix a case of submodule in submodule traversal
>       @@
>         ## Metadata ##
>       -Author: Alphadelta14 <alpha@alphaservcomputing.solutions>
>       +Author: Heather Lapointe <alpha@alphaservcomputing.solutions>
>        
>         ## Commit message ##
>       -    archive: fix a case of submodule in submodule traversal
>       +    tree: handle submodule case for read_tree_at properly
>        
>       -    repo_submodule_init actually expects the path relative to submodule_prefix.
>       -    We preform a simple strip to the correct path.
>       +    This supports traversal into an actual submodule for read_tree_at.
>       +    The logic is blocked on pathspec->recurse_submodules now,
>       +    but previously hadn't been executed due to all fn() cases
>       +    returning early for submodules.
>        
>       -    Signed-off-by: Alphadelta14 <alpha@alphaservcomputing.solutions>
>       +    Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
>        
>         ## tree.c ##
>       +@@
>       + #include "alloc.h"
>       + #include "tree-walk.h"
>       + #include "repository.h"
>       ++#include "pathspec.h"
>       +
>       + const char *tree_type = "tree";
>       +
>        @@ tree.c: int read_tree_at(struct repository *r,
>       - 			struct repository subrepo;
>       - 			struct repository* subrepo_p = &subrepo;
>       - 			struct tree* submodule_tree;
>       + 			return -1;
>       + 		}
>       +
>       +-		if (S_ISDIR(entry.mode))
>       ++		if (S_ISDIR(entry.mode)) {
>       + 			oidcpy(&oid, &entry.oid);
>       +-		else if (S_ISGITLINK(entry.mode)) {
>       +-			struct commit *commit;
>       +
>       +-			commit = lookup_commit(r, &entry.oid);
>       ++			len = tree_entry_len(&entry);
>       ++			strbuf_add(base, entry.path, len);
>       ++			strbuf_addch(base, '/');
>       ++			retval = read_tree_at(r, lookup_tree(r, &oid),
>       ++						base, pathspec,
>       ++						fn, context);
>       ++			strbuf_setlen(base, oldlen);
>       ++			if (retval)
>       ++				return -1;
>       ++		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
>       ++			struct commit *commit;
>       ++			struct repository subrepo;
>       ++			struct repository* subrepo_p = &subrepo;
>       ++			struct tree* submodule_tree;
>        +			char *submodule_rel_path;
>        +			int name_base_len = 0;
>       -
>       --			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
>       --				die("couldn't init submodule %s%s", base->buf, entry.path);
>       ++
>        +			len = tree_entry_len(&entry);
>        +			strbuf_add(base, entry.path, len);
>        +			submodule_rel_path = base->buf;
>       @@ tree.c: int read_tree_at(struct repository *r,
>        +
>        +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
>        +				die("couldn't init submodule %s", base->buf);
>       -
>       - 			if (repo_read_index(subrepo_p) < 0)
>       - 				die("index file corrupt");
>       -
>       - 			commit = lookup_commit(subrepo_p, &entry.oid);
>       ++
>       ++			if (repo_read_index(subrepo_p) < 0)
>       ++				die("index file corrupt");
>       ++
>       ++			commit = lookup_commit(subrepo_p, &entry.oid);
>         			if (!commit)
>        -				die("Commit %s in submodule path %s%s not found",
>        +				die("Commit %s in submodule path %s not found",
>         				    oid_to_hex(&entry.oid),
>        -				    base->buf, entry.path);
>       -+				    base->buf);
>       -
>       - 			if (repo_parse_commit(subrepo_p, commit))
>       +-
>       +-			// FIXME: This is the wrong repo instance (it refers to the superproject)
>       +-			// it will always fail as is (will fix in later patch)
>       +-			// This current codepath isn't executed by any existing callbacks
>       +-			// so it wouldn't show up as an issue at this time.
>       +-			if (repo_parse_commit(r, commit))
>        -				die("Invalid commit %s in submodule path %s%s",
>       ++				    base->buf);
>       ++
>       ++			if (repo_parse_commit(subrepo_p, commit))
>        +				die("Invalid commit %s in submodule path %s",
>         				    oid_to_hex(&entry.oid),
>        -				    base->buf, entry.path);
>        +				    base->buf);
>         
>       - 			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
>       - 			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>       +-			oidcpy(&oid, get_commit_tree_oid(commit));
>       +-		}
>       +-		else
>       +-			continue;
>       ++			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
>       ++			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>         
>       --			len = tree_entry_len(&entry);
>       --			strbuf_add(base, entry.path, len);
>       - 			strbuf_addch(base, '/');
>       +-		len = tree_entry_len(&entry);
>       +-		strbuf_add(base, entry.path, len);
>       +-		strbuf_addch(base, '/');
>       +-		retval = read_tree_at(r, lookup_tree(r, &oid),
>       +-				      base, pathspec,
>       +-				      fn, context);
>       +-		strbuf_setlen(base, oldlen);
>       +-		if (retval)
>       +-			return -1;
>       ++			strbuf_addch(base, '/');
>        +
>       - 			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
>       - 						base, pathspec,
>       - 						fn, context);
>       - 			if (retval) {
>       --			    die("failed to read tree for %s%s", base->buf, entry.path);
>       ++			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
>       ++						base, pathspec,
>       ++						fn, context);
>       ++			if (retval)
>        +			    die("failed to read tree for %s", base->buf);
>       - 			    return -1;
>       - 			}
>       - 			strbuf_setlen(base, oldlen);
>       ++			strbuf_setlen(base, oldlen);
>       ++			repo_clear(subrepo_p);
>       ++		}
>       ++		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)
>       + 	}
>       + 	return 0;
>       + }
>    -:  ----------- >  5:  376345fdf66 tree: add repository parameter to read_tree_fn_t
>    1:  41664a59029 !  6:  1b9b049d64f archive: add --recurse-submodules to git-archive command
>       @@
>         ## Metadata ##
>       -Author: Alphadelta14 <alpha@alphaservcomputing.solutions>
>       +Author: Heather Lapointe <alpha@alphaservcomputing.solutions>
>        
>         ## Commit message ##
>       -    archive: add --recurse-submodules to git-archive command
>       +    archive: pass repo objects to write_archive handlers
>        
>       -    This makes it possible to include submodule contents in an archive command.
>       -
>       -    This required updating the general read_tree callbacks to support sub-repos
>       -    by not using the_repository global references where possible.
>       -
>       -    archive: update streaming to use target repo
>       -    archive: add test cases for git archive --recurse-submodules
>       +    Use contextual repos instead of the_repository or args->repo
>       +    to ensure that submodules will be handled correctly
>       +    since they use multiple repo instances.
>        
>            Signed-off-by: Heather Lapointe <alpha@alphaservcomputing.solutions>
>        
>       @@ archive-tar.c: static unsigned long offset;
>         static int tar_umask = 002;
>         
>         static int write_tar_filter_archive(const struct archiver *ar,
>       -+				    struct repository *repo,
>       ++					struct repository *repo,
>         				    struct archiver_args *args);
>         
>         /*
>       @@ archive-tar.c: static void write_extended_header(struct archiver_args *args,
>         }
>         
>        -static int write_tar_entry(struct archiver_args *args,
>       -+static int write_tar_entry(struct repository *repo,
>       ++static int write_tar_entry(
>       ++			   struct repository *repo,
>        +			   struct archiver_args *args,
>         			   const struct object_id *oid,
>         			   const char *path, size_t pathlen,
>       @@ archive-tar.c: static void tgz_write_block(const void *data)
>         static const char internal_gzip_command[] = "git archive gzip";
>         
>         static int write_tar_filter_archive(const struct archiver *ar,
>       -+				    struct repository *repo,
>       ++					struct repository *repo,
>         				    struct archiver_args *args)
>         {
>         #if ZLIB_VERNUM >= 0x1221
>       @@ archive-zip.c: static int entry_is_binary(struct index_state *istate, const char
>         #define STREAM_BUFFER_SIZE (1024 * 16)
>         
>        -static int write_zip_entry(struct archiver_args *args,
>       -+static int write_zip_entry(struct repository *repo,
>       ++static int write_zip_entry(
>       ++			   struct repository *repo,
>        +			   struct archiver_args *args,
>         			   const struct object_id *oid,
>         			   const char *path, size_t pathlen,
>       @@ archive-zip.c: static int archive_zip_config(const char *var, const char *value,
>         }
>         
>         static int write_zip_archive(const struct archiver *ar UNUSED,
>       -+			     struct repository *repo,
>       ++				 struct repository *repo,
>         			     struct archiver_args *args)
>         {
>         	int err;
>       @@ archive-zip.c: static int write_zip_archive(const struct archiver *ar UNUSED,
>         
>        
>         ## archive.c ##
>       -@@
>       - #include "unpack-trees.h"
>       - #include "dir.h"
>       - #include "quote.h"
>       -+#include "submodule.h"
>       -
>       - static char const * const archive_usage[] = {
>       - 	N_("git archive [<options>] <tree-ish> [<path>...]"),
>       -@@ archive.c: static void format_subst(const struct commit *commit,
>       - }
>       -
>       - static void *object_file_to_archive(const struct archiver_args *args,
>       -+				    struct repository *repo,
>       - 				    const char *path,
>       - 				    const struct object_id *oid,
>       - 				    unsigned int mode,
>       -@@ archive.c: static void *object_file_to_archive(const struct archiver_args *args,
>       - 			       (args->tree ? &args->tree->object.oid : NULL), oid);
>       -
>       - 	path += args->baselen;
>       --	buffer = read_object_file(oid, type, sizep);
>       -+	buffer = repo_read_object_file(repo, oid, type, sizep);
>       - 	if (buffer && S_ISREG(mode)) {
>       - 		struct strbuf buf = STRBUF_INIT;
>       - 		size_t size = 0;
>       -
>       - 		strbuf_attach(&buf, buffer, *sizep, *sizep + 1);
>       --		convert_to_working_tree(args->repo->index, path, buf.buf, buf.len, &buf, &meta);
>       -+		convert_to_working_tree(repo->index, path, buf.buf, buf.len, &buf, &meta);
>       - 		if (commit)
>       - 			format_subst(commit, buf.buf, buf.len, &buf, args->pretty_ctx);
>       - 		buffer = strbuf_detach(&buf, &size);
>        @@ archive.c: static int check_attr_export_subst(const struct attr_check *check)
>         	return check && ATTR_TRUE(check->items[1].value);
>         }
>         
>        -static int write_archive_entry(const struct object_id *oid, const char *base,
>       -+static int write_archive_entry(struct repository *repo, const struct object_id *oid, const char *base,
>       ++static int write_archive_entry(
>       ++		struct repository *repo,
>       ++		const struct object_id *oid, const char *base,
>         		int baselen, const char *filename, unsigned mode,
>         		void *context)
>         {
>       @@ archive.c: static int write_archive_entry(const struct object_id *oid, const cha
>        +		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
>         		if (err)
>         			return err;
>       --		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
>       -+		return READ_TREE_RECURSIVE;
>       - 	}
>       -
>       - 	if (args->verbose)
>       + 		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
>        @@ archive.c: static int write_archive_entry(const struct object_id *oid, const char *base,
>         
>         	/* Stream it? */
>         	if (S_ISREG(mode) && !args->convert &&
>        -	    oid_object_info(args->repo, oid, &size) == OBJ_BLOB &&
>       --	    size > big_file_threshold)
>       --		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
>        +	    oid_object_info(repo, oid, &size) == OBJ_BLOB &&
>       -+	    size > big_file_threshold) {
>       -+			err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
>       -+			if (err) {
>       -+				die("Failed to write file %.*s", (int)path.len, path.buf);
>       -+			}
>       -+			return err;
>       -+		}
>       + 	    size > big_file_threshold)
>       +-		return write_entry(args, oid, path.buf, path.len, mode, NULL, size);
>       ++		return write_entry(repo, args, oid, path.buf, path.len, mode, NULL, size);
>         
>       --	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
>       -+	buffer = object_file_to_archive(args, repo, path.buf, oid, mode, &type, &size);
>       + 	buffer = object_file_to_archive(args, path.buf, oid, mode, &type, &size);
>         	if (!buffer)
>         		return error(_("cannot read '%s'"), oid_to_hex(oid));
>        -	err = write_entry(args, oid, path.buf, path.len, mode, buffer, size);
>       @@ archive.c: static void queue_directory(const struct object_id *oid,
>         }
>         
>        -static int write_directory(struct archiver_context *c)
>       -+static void queue_submodule(struct repository *superproject,
>       -+		const struct object_id *oid,
>       -+		struct strbuf *base, const char *filename,
>       -+		unsigned mode, struct archiver_context *c)
>       -+{
>       -+	struct repository subrepo;
>       -+
>       -+	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
>       -+		return;
>       -+
>       -+	if (repo_read_index(&subrepo) < 0)
>       -+		die("index file corrupt");
>       -+
>       -+    queue_directory(oid, base, filename, mode, c);
>       -+
>       -+	repo_clear(&subrepo);
>       -+}
>       -+
>       -+static int write_directory(struct repository *repo, struct archiver_context *c)
>       ++static int write_directory(
>       ++		struct repository *repo,
>       ++		struct archiver_context *c)
>         {
>         	struct directory *d = c->bottom;
>         	int ret;
>       @@ archive.c: static int write_directory(struct archiver_context *c)
>        +		write_directory(repo, c) ||
>        +		write_archive_entry(repo, &d->oid, d->path, d->baselen,
>         				    d->path + d->baselen, d->mode,
>       --				    c) != READ_TREE_RECURSIVE;
>       -+				    c);
>       + 				    c) != READ_TREE_RECURSIVE;
>         	free(d);
>       --	return ret ? -1 : 0;
>       -+	if (ret == READ_TREE_RECURSIVE)
>       -+		return 0;
>       -+	return ret;
>       - }
>       -
>       --static int queue_or_write_archive_entry(const struct object_id *oid,
>       -+static int queue_or_write_archive_entry(
>       -+		struct repository *repo, const struct object_id *oid,
>       - 		struct strbuf *base, const char *filename,
>       - 		unsigned mode, void *context)
>       - {
>       -@@ archive.c: static int queue_or_write_archive_entry(const struct object_id *oid,
>       - 		/* Borrow base, but restore its original value when done. */
>       - 		strbuf_addstr(base, filename);
>       - 		strbuf_addch(base, '/');
>       --		check = get_archive_attrs(c->args->repo->index, base->buf);
>       -+		check = get_archive_attrs(repo->index, base->buf);
>       - 		strbuf_setlen(base, baselen);
>       -
>       - 		if (check_attr_export_ignore(check))
>       - 			return 0;
>       - 		queue_directory(oid, base, filename, mode, c);
>       +@@ archive.c: static int queue_or_write_archive_entry(
>         		return READ_TREE_RECURSIVE;
>       -+	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
>       -+		if (is_submodule_active(repo, filename)) {
>       -+			queue_submodule(repo, oid, base, filename, mode, c);
>       -+			return READ_TREE_RECURSIVE;
>       -+		}
>         	}
>         
>        -	if (write_directory(c))
>       -+	if (write_directory(repo, c))
>       ++	if (write_directory(r, c))
>         		return -1;
>        -	return write_archive_entry(oid, base->buf, base->len, filename, mode,
>       -+	return write_archive_entry(repo, oid, base->buf, base->len, filename, mode,
>       ++	return write_archive_entry(r, oid, base->buf, base->len, filename, mode,
>         				   context);
>         }
>         
>       @@ archive.c: struct extra_file_info {
>         };
>         
>        -int write_archive_entries(struct archiver_args *args,
>       -+int write_archive_entries(struct repository *repo,
>       ++int write_archive_entries(
>       ++		struct repository *repo,
>        +		struct archiver_args *args,
>         		write_archive_entry_fn_t write_entry)
>         {
>       @@ archive.c: int write_archive_entries(struct archiver_args *args,
>         				  len, 040777, NULL, 0);
>         		if (err)
>         			return err;
>       -@@ archive.c: int write_archive_entries(struct archiver_args *args,
>       - 		memset(&opts, 0, sizeof(opts));
>       - 		opts.index_only = 1;
>       - 		opts.head_idx = -1;
>       --		opts.src_index = args->repo->index;
>       --		opts.dst_index = args->repo->index;
>       -+		opts.src_index = repo->index;
>       -+		opts.dst_index = repo->index;
>       - 		opts.fn = oneway_merge;
>       - 		init_tree_desc(&t, args->tree->buffer, args->tree->size);
>       - 		if (unpack_trees(1, &t, &opts))
>       -@@ archive.c: int write_archive_entries(struct archiver_args *args,
>       - 		git_attr_set_direction(GIT_ATTR_INDEX);
>       - 	}
>       -
>       --	err = read_tree(args->repo, args->tree,
>       -+	err = read_tree(repo, args->tree,
>       - 			&args->pathspec,
>       - 			queue_or_write_archive_entry,
>       - 			&context);
>        @@ archive.c: int write_archive_entries(struct archiver_args *args,
>         			if (strbuf_read_file(&content, path, info->stat.st_size) < 0)
>         				err = error_errno(_("cannot read '%s'"), path);
>       @@ archive.c: int write_archive_entries(struct archiver_args *args,
>         					  path, strlen(path),
>         					  canon_mode(info->stat.st_mode),
>         					  info->content, info->stat.st_size);
>       -@@ archive.c: struct path_exists_context {
>       - 	struct archiver_args *args;
>       - };
>       -
>       --static int reject_entry(const struct object_id *oid UNUSED,
>       -+static int reject_entry(struct repository *repo, const struct object_id *oid UNUSED,
>       - 			struct strbuf *base,
>       - 			const char *filename, unsigned mode,
>       - 			void *context)
>       -@@ archive.c: static int reject_entry(const struct object_id *oid UNUSED,
>       - 		struct strbuf sb = STRBUF_INIT;
>       - 		strbuf_addbuf(&sb, base);
>       - 		strbuf_addstr(&sb, filename);
>       --		if (!match_pathspec(ctx->args->repo->index,
>       -+		if (!match_pathspec(repo->index,
>       - 				    &ctx->pathspec,
>       - 				    sb.buf, sb.len, 0, NULL, 1))
>       - 			ret = READ_TREE_RECURSIVE;
>       -@@ archive.c: static void parse_pathspec_arg(const char **pathspec,
>       - 		       PATHSPEC_PREFER_FULL,
>       - 		       "", pathspec);
>       - 	ar_args->pathspec.recursive = 1;
>       -+	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
>       - 	if (pathspec) {
>       - 		while (*pathspec) {
>       - 			if (**pathspec && !path_exists(ar_args, *pathspec))
>       -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
>       - 	int verbose = 0;
>       - 	int i;
>       - 	int list = 0;
>       -+	int recurse_submodules = 0;
>       - 	int worktree_attributes = 0;
>       - 	struct option opts[] = {
>       - 		OPT_GROUP(""),
>       -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
>       - 		  add_file_cb, (intptr_t)&base },
>       - 		OPT_STRING('o', "output", &output, N_("file"),
>       - 			N_("write the archive to this file")),
>       -+		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
>       -+			N_("include submodules in archive")),
>       - 		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
>       - 			N_("read .gitattributes in working directory")),
>       - 		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
>       -@@ archive.c: static int parse_archive_args(int argc, const char **argv,
>       - 	args->base = base;
>       - 	args->baselen = strlen(base);
>       - 	args->worktree_attributes = worktree_attributes;
>       -+	args->recurse_submodules = recurse_submodules;
>       -
>       - 	return argc;
>       - }
>        @@ archive.c: int write_archive(int argc, const char **argv, const char *prefix,
>         	parse_treeish_arg(argv, &args, prefix, remote);
>         	parse_pathspec_arg(argv + 1, &args);
>       @@ archive.c: int write_archive(int argc, const char **argv, const char *prefix,
>         	free(args.refname);
>        
>         ## archive.h ##
>       -@@ archive.h: struct archiver_args {
>       - 	timestamp_t time;
>       - 	struct pathspec pathspec;
>       - 	unsigned int verbose : 1;
>       -+	unsigned int recurse_submodules : 1;
>       - 	unsigned int worktree_attributes : 1;
>       - 	unsigned int convert : 1;
>       - 	int compression_level;
>        @@ archive.h: const char *archive_format_from_filename(const char *filename);
>         #define ARCHIVER_HIGH_COMPRESSION_LEVELS 4
>         struct archiver {
>         	const char *name;
>        -	int (*write_archive)(const struct archiver *, struct archiver_args *);
>       -+	int (*write_archive)(const struct archiver *, struct repository *repo, struct archiver_args *);
>       ++	int (*write_archive)(
>       ++		const struct archiver *,
>       ++		struct repository *,
>       ++		struct archiver_args *);
>         	unsigned flags;
>         	char *filter_command;
>         };
>       @@ archive.h: void init_tar_archiver(void);
>         void init_archivers(void);
>         
>        -typedef int (*write_archive_entry_fn_t)(struct archiver_args *args,
>       -+typedef int (*write_archive_entry_fn_t)(struct repository *repo,
>       ++typedef int (*write_archive_entry_fn_t)(
>       ++					struct repository *repo,
>        +					struct archiver_args *args,
>         					const struct object_id *oid,
>         					const char *path, size_t pathlen,
>       @@ archive.h: void init_tar_archiver(void);
>         					void *buffer, unsigned long size);
>         
>        -int write_archive_entries(struct archiver_args *args, write_archive_entry_fn_t write_entry);
>       -+int write_archive_entries(struct repository *repo, struct archiver_args *args, write_archive_entry_fn_t write_entry);
>       ++int write_archive_entries(
>       ++	struct repository *repo,
>       ++	struct archiver_args *args,
>       ++	write_archive_entry_fn_t write_entry);
>         
>         #endif	/* ARCHIVE_H */
>       -
>       - ## builtin/checkout.c ##
>       -@@ builtin/checkout.c: static int post_checkout_hook(struct commit *old_commit, struct commit *new_comm
>       -
>       - }
>       -
>       --static int update_some(const struct object_id *oid, struct strbuf *base,
>       -+static int update_some(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
>       - 		       const char *pathname, unsigned mode, void *context UNUSED)
>       - {
>       - 	int len;
>       -
>       - ## builtin/log.c ##
>       -@@ builtin/log.c: static int show_tag_object(const struct object_id *oid, struct rev_info *rev)
>       - 	return 0;
>       - }
>       -
>       --static int show_tree_object(const struct object_id *oid UNUSED,
>       -+static int show_tree_object(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
>       - 			    struct strbuf *base UNUSED,
>       - 			    const char *pathname, unsigned mode,
>       - 			    void *context)
>       -
>       - ## builtin/ls-files.c ##
>       -@@ builtin/ls-files.c: static int get_common_prefix_len(const char *common_prefix)
>       - 	return common_prefix_len;
>       - }
>       -
>       --static int read_one_entry_opt(struct index_state *istate,
>       -+static int read_one_entry_opt(struct repository *repo UNUSED, struct index_state *istate,
>       - 			      const struct object_id *oid,
>       - 			      struct strbuf *base,
>       - 			      const char *pathname,
>       -@@ builtin/ls-files.c: static int read_one_entry_opt(struct index_state *istate,
>       - 	return add_index_entry(istate, ce, opt);
>       - }
>       -
>       --static int read_one_entry(const struct object_id *oid, struct strbuf *base,
>       -+static int read_one_entry(struct repository *repo, const struct object_id *oid, struct strbuf *base,
>       - 			  const char *pathname, unsigned mode,
>       - 			  void *context)
>       - {
>       - 	struct index_state *istate = context;
>       --	return read_one_entry_opt(istate, oid, base, pathname,
>       -+	return read_one_entry_opt(repo, istate, oid, base, pathname,
>       - 				  mode,
>       - 				  ADD_CACHE_OK_TO_ADD|ADD_CACHE_SKIP_DFCHECK);
>       - }
>       -@@ builtin/ls-files.c: static int read_one_entry(const struct object_id *oid, struct strbuf *base,
>       -  * This is used when the caller knows there is no existing entries at
>       -  * the stage that will conflict with the entry being added.
>       -  */
>       --static int read_one_entry_quick(const struct object_id *oid, struct strbuf *base,
>       -+static int read_one_entry_quick(struct repository *repo, const struct object_id *oid, struct strbuf *base,
>       - 				const char *pathname, unsigned mode,
>       - 				void *context)
>       - {
>       - 	struct index_state *istate = context;
>       --	return read_one_entry_opt(istate, oid, base, pathname,
>       -+	return read_one_entry_opt(repo, istate, oid, base, pathname,
>       - 				  mode, ADD_CACHE_JUST_APPEND);
>       - }
>       -
>       -
>       - ## builtin/ls-tree.c ##
>       -@@ builtin/ls-tree.c: static int show_recursive(const char *base, size_t baselen, const char *pathname
>       - 	return 0;
>       - }
>       -
>       --static int show_tree_fmt(const struct object_id *oid, struct strbuf *base,
>       -+static int show_tree_fmt(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
>       - 			 const char *pathname, unsigned mode, void *context UNUSED)
>       - {
>       - 	size_t baselen;
>       -@@ builtin/ls-tree.c: static void show_tree_common_default_long(struct strbuf *base,
>       - 	strbuf_setlen(base, baselen);
>       - }
>       -
>       --static int show_tree_default(const struct object_id *oid, struct strbuf *base,
>       -+static int show_tree_default(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
>       - 			     const char *pathname, unsigned mode,
>       - 			     void *context UNUSED)
>       - {
>       -@@ builtin/ls-tree.c: static int show_tree_default(const struct object_id *oid, struct strbuf *base,
>       - 	return recurse;
>       - }
>       -
>       --static int show_tree_long(const struct object_id *oid, struct strbuf *base,
>       -+static int show_tree_long(struct repository *repo, const struct object_id *oid, struct strbuf *base,
>       - 			  const char *pathname, unsigned mode,
>       - 			  void *context UNUSED)
>       - {
>       -@@ builtin/ls-tree.c: static int show_tree_long(const struct object_id *oid, struct strbuf *base,
>       -
>       - 	if (data.type == OBJ_BLOB) {
>       - 		unsigned long size;
>       --		if (oid_object_info(the_repository, data.oid, &size) == OBJ_BAD)
>       -+		if (oid_object_info(repo, data.oid, &size) == OBJ_BAD)
>       - 			xsnprintf(size_text, sizeof(size_text), "BAD");
>       - 		else
>       - 			xsnprintf(size_text, sizeof(size_text),
>       -@@ builtin/ls-tree.c: static int show_tree_long(const struct object_id *oid, struct strbuf *base,
>       - 	}
>       -
>       - 	printf("%06o %s %s %7s\t", data.mode, type_name(data.type),
>       --	       find_unique_abbrev(data.oid, abbrev), size_text);
>       -+	       repo_find_unique_abbrev(repo, data.oid, abbrev), size_text);
>       - 	show_tree_common_default_long(base, pathname, data.base->len);
>       - 	return recurse;
>       - }
>       -
>       --static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
>       -+static int show_tree_name_only(struct repository *repo UNUSED, const struct object_id *oid, struct strbuf *base,
>       - 			       const char *pathname, unsigned mode,
>       - 			       void *context UNUSED)
>       - {
>       -@@ builtin/ls-tree.c: static int show_tree_name_only(const struct object_id *oid, struct strbuf *base,
>       - 	return recurse;
>       - }
>       -
>       --static int show_tree_object(const struct object_id *oid, struct strbuf *base,
>       -+static int show_tree_object(struct repository *repo, const struct object_id *oid, struct strbuf *base,
>       - 			    const char *pathname, unsigned mode,
>       - 			    void *context UNUSED)
>       - {
>       -@@ builtin/ls-tree.c: static int show_tree_object(const struct object_id *oid, struct strbuf *base,
>       - 	if (early >= 0)
>       - 		return early;
>       -
>       --	printf("%s%c", find_unique_abbrev(oid, abbrev), line_termination);
>       -+	printf("%s%c", repo_find_unique_abbrev(repo, oid, abbrev), line_termination);
>       - 	return recurse;
>       - }
>       -
>       -
>       - ## list-objects.c ##
>       -@@ list-objects.c: static void process_tree(struct traversal_context *ctx,
>       - 	    !revs->include_check_obj(&tree->object, revs->include_check_data))
>       - 		return;
>       -
>       --	failed_parse = parse_tree_gently(tree, 1);
>       -+	failed_parse = parse_tree_gently(revs->repo, tree, 1);
>       - 	if (failed_parse) {
>       - 		if (revs->ignore_missing_links)
>       - 			return;
>       -
>       - ## merge-recursive.c ##
>       -@@ merge-recursive.c: static void unpack_trees_finish(struct merge_options *opt)
>       - 	clear_unpack_trees_porcelain(&opt->priv->unpack_opts);
>       - }
>       -
>       --static int save_files_dirs(const struct object_id *oid UNUSED,
>       -+static int save_files_dirs(struct repository *repo UNUSED, const struct object_id *oid UNUSED,
>       - 			   struct strbuf *base, const char *path,
>       - 			   unsigned int mode, void *context)
>       - {
>       -
>       - ## revision.c ##
>       -@@ revision.c: static void mark_tree_contents_uninteresting(struct repository *r,
>       - 	struct tree_desc desc;
>       - 	struct name_entry entry;
>       -
>       --	if (parse_tree_gently(tree, 1) < 0)
>       -+	if (parse_tree_gently(r, tree, 1) < 0)
>       - 		return;
>       -
>       - 	init_tree_desc(&desc, tree->buffer, tree->size);
>       -@@ revision.c: static void add_children_by_path(struct repository *r,
>       - 	if (!tree)
>       - 		return;
>       -
>       --	if (parse_tree_gently(tree, 1) < 0)
>       -+	if (parse_tree_gently(r, tree, 1) < 0)
>       - 		return;
>       -
>       - 	init_tree_desc(&desc, tree->buffer, tree->size);
>       -
>       - ## sparse-index.c ##
>       -@@ sparse-index.c: static void set_index_entry(struct index_state *istate, int nr, struct cache_ent
>       - 	add_name_hash(istate, ce);
>       - }
>       -
>       --static int add_path_to_index(const struct object_id *oid,
>       -+static int add_path_to_index(struct repository *repo UNUSED, const struct object_id *oid,
>       - 			     struct strbuf *base, const char *path,
>       - 			     unsigned int mode, void *context)
>       - {
>       -
>       - ## t/t5005-archive-submodules.sh (new) ##
>       -@@
>       -+#!/bin/sh
>       -+
>       -+test_description='git archive --recurse-submodules test'
>       -+
>       -+. ./test-lib.sh
>       -+. "$TEST_DIRECTORY"/lib-submodule-update.sh
>       -+
>       -+test_expect_success 'setup' '
>       -+	create_lib_submodule_repo &&
>       -+	git -C submodule_update_repo checkout valid_sub1 &&
>       -+	git -C submodule_update_repo submodule update
>       -+'
>       -+
>       -+check_tar() {
>       -+	tarfile=$1.tar
>       -+	listfile=$1.lst
>       -+	dir=$1
>       -+	dir_with_prefix=$dir/$2
>       -+
>       -+	test_expect_success ' extract tar archive' '
>       -+		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
>       -+	'
>       -+}
>       -+
>       -+check_added() {
>       -+	dir=$1
>       -+	path_in_fs=$2
>       -+	path_in_archive=$3
>       -+
>       -+	test_expect_success " validate extra file $path_in_archive" '
>       -+		test -f $dir/$path_in_archive &&
>       -+		diff -r $path_in_fs $dir/$path_in_archive
>       -+	'
>       -+}
>       -+
>       -+check_not_added() {
>       -+	dir=$1
>       -+	path_in_archive=$2
>       -+
>       -+	test_expect_success " validate unpresent file $path_in_archive" '
>       -+		! test -f $dir/$path_in_archive &&
>       -+		! test -d $dir/$path_in_archive
>       -+	'
>       -+}
>       -+
>       -+test_expect_success 'archive without recurse, non-init' '
>       -+	reset_work_tree_to valid_sub1 &&
>       -+	git -C submodule_update archive HEAD >b.tar
>       -+'
>       -+
>       -+check_tar b
>       -+check_added b submodule_update/file1 file1
>       -+check_not_added b sub1/file1
>       -+
>       -+test_expect_success 'archive with recurse, non-init' '
>       -+	reset_work_tree_to valid_sub1 &&
>       -+	! git -C submodule_update archive --recurse-submodules HEAD >b2-err.tar
>       -+'
>       -+
>       -+test_expect_success 'archive with recurse, init' '
>       -+	reset_work_tree_to valid_sub1 &&
>       -+	git -C submodule_update submodule update --init &&
>       -+	git -C submodule_update ls-files --recurse-submodules &&
>       -+	git -C submodule_update ls-tree HEAD &&
>       -+	git -C submodule_update archive --recurse-submodules HEAD >b2.tar
>       -+'
>       -+
>       -+check_tar b2
>       -+check_added b2 submodule_update/sub1/file1 sub1/file1
>       -+
>       -+test_expect_success 'archive with recurse with big files' '
>       -+	reset_work_tree_to valid_sub1 &&
>       -+	test_config core.bigfilethreshold 1 &&
>       -+	git -C submodule_update submodule update --init &&
>       -+	git -C submodule_update ls-files --recurse-submodules &&
>       -+	git -C submodule_update ls-tree HEAD &&
>       -+	git -C submodule_update archive --recurse-submodules HEAD >b3.tar
>       -+'
>       -+
>       -+check_tar b3
>       -+check_added b3 submodule_update/sub1/file1 sub1/file1
>       -+
>       -+
>       -+test_done
>       -
>       - ## tree.c ##
>       -@@
>       - #include "alloc.h"
>       - #include "tree-walk.h"
>       - #include "repository.h"
>       -+#include "pathspec.h"
>       -
>       - const char *tree_type = "tree";
>       -
>       -@@ tree.c: int read_tree_at(struct repository *r,
>       - 	int len, oldlen = base->len;
>       - 	enum interesting retval = entry_not_interesting;
>       -
>       --	if (parse_tree(tree))
>       --		return -1;
>       -+	if (repo_parse_tree(r, tree))
>       -+		die("Failed to parse tree");
>       -
>       - 	init_tree_desc(&desc, tree->buffer, tree->size);
>       -
>       -@@ tree.c: int read_tree_at(struct repository *r,
>       - 				continue;
>       - 		}
>       -
>       --		switch (fn(&entry.oid, base,
>       -+		switch (fn(r, &entry.oid, base,
>       - 			   entry.path, entry.mode, context)) {
>       - 		case 0:
>       - 			continue;
>       -@@ tree.c: int read_tree_at(struct repository *r,
>       - 			return -1;
>       - 		}
>       -
>       --		if (S_ISDIR(entry.mode))
>       -+		if (S_ISDIR(entry.mode)) {
>       - 			oidcpy(&oid, &entry.oid);
>       --		else if (S_ISGITLINK(entry.mode)) {
>       -+			len = tree_entry_len(&entry);
>       -+			strbuf_add(base, entry.path, len);
>       -+			strbuf_addch(base, '/');
>       -+			retval = read_tree_at(r, lookup_tree(r, &oid),
>       -+						base, pathspec,
>       -+						fn, context);
>       -+			strbuf_setlen(base, oldlen);
>       -+			if (retval)
>       -+				return -1;
>       -+		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
>       - 			struct commit *commit;
>       -+			struct repository subrepo;
>       -+			struct repository* subrepo_p = &subrepo;
>       -+			struct tree* submodule_tree;
>       -
>       --			commit = lookup_commit(r, &entry.oid);
>       -+			if (repo_submodule_init(subrepo_p, r, entry.path, null_oid()))
>       -+				die("couldn't init submodule %s%s", base->buf, entry.path);
>       -+
>       -+			if (repo_read_index(subrepo_p) < 0)
>       -+				die("index file corrupt");
>       -+
>       -+			commit = lookup_commit(subrepo_p, &entry.oid);
>       - 			if (!commit)
>       - 				die("Commit %s in submodule path %s%s not found",
>       - 				    oid_to_hex(&entry.oid),
>       - 				    base->buf, entry.path);
>       -
>       --			if (parse_commit(commit))
>       -+			if (repo_parse_commit(subrepo_p, commit))
>       - 				die("Invalid commit %s in submodule path %s%s",
>       - 				    oid_to_hex(&entry.oid),
>       - 				    base->buf, entry.path);
>       -
>       --			oidcpy(&oid, get_commit_tree_oid(commit));
>       -+			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
>       -+			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>       -+
>       -+			len = tree_entry_len(&entry);
>       -+			strbuf_add(base, entry.path, len);
>       -+			strbuf_addch(base, '/');
>       -+			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
>       -+						base, pathspec,
>       -+						fn, context);
>       -+			if (retval) {
>       -+			    die("failed to read tree for %s%s", base->buf, entry.path);
>       -+			    return -1;
>       -+			}
>       -+			strbuf_setlen(base, oldlen);
>       -+			repo_clear(subrepo_p);
>       - 		}
>       --		else
>       --			continue;
>       -
>       --		len = tree_entry_len(&entry);
>       --		strbuf_add(base, entry.path, len);
>       --		strbuf_addch(base, '/');
>       --		retval = read_tree_at(r, lookup_tree(r, &oid),
>       --				      base, pathspec,
>       --				      fn, context);
>       --		strbuf_setlen(base, oldlen);
>       --		if (retval)
>       --			return -1;
>       - 	}
>       - 	return 0;
>       - }
>       -@@ tree.c: int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size)
>       - 	return 0;
>       - }
>       -
>       --int parse_tree_gently(struct tree *item, int quiet_on_missing)
>       -+int parse_tree_gently(struct repository *r, struct tree *item, int quiet_on_missing)
>       - {
>       - 	 enum object_type type;
>       - 	 void *buffer;
>       -@@ tree.c: int parse_tree_gently(struct tree *item, int quiet_on_missing)
>       -
>       - 	if (item->object.parsed)
>       - 		return 0;
>       --	buffer = read_object_file(&item->object.oid, &type, &size);
>       -+	buffer = repo_read_object_file(r, &item->object.oid, &type, &size);
>       - 	if (!buffer)
>       - 		return quiet_on_missing ? -1 :
>       - 			error("Could not read %s",
>       -
>       - ## tree.h ##
>       -@@ tree.h: struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
>       -
>       - int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
>       -
>       --int parse_tree_gently(struct tree *tree, int quiet_on_missing);
>       --static inline int parse_tree(struct tree *tree)
>       -+int parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
>       -+static inline int repo_parse_tree(struct repository *r, struct tree *tree)
>       - {
>       --	return parse_tree_gently(tree, 0);
>       -+	return parse_tree_gently(r, tree, 0);
>       - }
>       -+#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
>       -+#define parse_tree(tree) repo_parse_tree(the_repository, tree)
>       -+#endif
>       - void free_tree_buffer(struct tree *tree);
>       -
>       - /* Parses and returns the tree in the given ent, chasing tags and commits. */
>       -@@ tree.h: struct tree *parse_tree_indirect(const struct object_id *oid);
>       - int cmp_cache_name_compare(const void *a_, const void *b_);
>       -
>       - #define READ_TREE_RECURSIVE 1
>       --typedef int (*read_tree_fn_t)(const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
>       -+typedef int (*read_tree_fn_t)(struct repository *, const struct object_id *, struct strbuf *, const char *, unsigned int, void *);
>       -
>       - int read_tree_at(struct repository *r,
>       - 		 struct tree *tree, struct strbuf *base,
>       -
>       - ## wt-status.c ##
>       -@@ wt-status.c: static void wt_status_collect_changes_index(struct wt_status *s)
>       - 	release_revisions(&rev);
>       - }
>       -
>       --static int add_file_to_list(const struct object_id *oid,
>       -+static int add_file_to_list(struct repository *repo UNUSED, const struct object_id *oid,
>       - 			    struct strbuf *base, const char *path,
>       - 			    unsigned int mode, void *context)
>       - {
>    -:  ----------- >  7:  2443c9b1b6e archive: remove global repository from archive_args
>    -:  ----------- >  8:  4672e3d9586 archive: add --recurse-submodules to git-archive command
>    -:  ----------- >  9:  f88ebbaf17c archive: add tests for git archive --recurse-submodules
> 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (9 preceding siblings ...)
  2022-10-17 13:57     ` [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command Phillip Wood
@ 2022-10-18 18:34     ` Junio C Hamano
  2022-10-18 18:48       ` Heather Lapointe
  2022-10-26 22:14     ` Glen Choo
  11 siblings, 1 reply; 48+ messages in thread
From: Junio C Hamano @ 2022-10-18 18:34 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: git, René Scharfe, Heather Lapointe, Taylor Blau

Today I was scheduled to be offline, so I won't dig further on the
issues this topic has now, but the new tests this series introduces,
namely t1023 and t5005, both relies on being able to clone a nested
submodule via file:// transport, which no longer is allowed.

The patches need to be updated to adjust to the new world order, of
course, but we probably should take it as an example of what the
most recent update may be breaking for real world users.

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-18 18:34     ` Junio C Hamano
@ 2022-10-18 18:48       ` Heather Lapointe
  2022-10-19 16:16         ` Junio C Hamano
  0 siblings, 1 reply; 48+ messages in thread
From: Heather Lapointe @ 2022-10-18 18:48 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Heather Lapointe via GitGitGadget, git, René Scharfe,
	Taylor Blau

Thanks for taking a look.

On Tue, Oct 18, 2022 at 2:34 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Today I was scheduled to be offline, so I won't dig further on the
> issues this topic has now, but the new tests this series introduces,
> namely t1023 and t5005, both relies on being able to clone a nested
> submodule via file:// transport, which no longer is allowed.

I was following the patterns of t/lib-submodule-update.sh. Are there
better examples
that I can follow?

> The patches need to be updated to adjust to the new world order, of
> course, but we probably should take it as an example of what the
> most recent update may be breaking for real world users.
>
> Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-18 18:48       ` Heather Lapointe
@ 2022-10-19 16:16         ` Junio C Hamano
  2022-10-19 20:44           ` Junio C Hamano
  0 siblings, 1 reply; 48+ messages in thread
From: Junio C Hamano @ 2022-10-19 16:16 UTC (permalink / raw)
  To: Heather Lapointe
  Cc: Heather Lapointe via GitGitGadget, git, René Scharfe,
	Taylor Blau

Heather Lapointe <alpha@alphaservcomputing.solutions> writes:

> Thanks for taking a look.
>
> On Tue, Oct 18, 2022 at 2:34 PM Junio C Hamano <gitster@pobox.com> wrote:
>>
>> Today I was scheduled to be offline, so I won't dig further on the
>> issues this topic has now, but the new tests this series introduces,
>> namely t1023 and t5005, both relies on being able to clone a nested
>> submodule via file:// transport, which no longer is allowed.
>
> I was following the patterns of t/lib-submodule-update.sh. Are there
> better examples
> that I can follow?

Mimic what Taylor did to adjust to the new world order that was
introduced in the 2.38.1 update.

Look at 9c32cfb4 (Sync with v2.38.1, 2022-10-17), which merges
2.38.1 and updates the tests to adjust to the new world order, by
comparing the t/ directory of its first parent and the result of the
merge.  It shows what Taylor did to adjust the tests to adjust.

    $ git diff 9c32cfb4^ 9c32cfb4 t/

I personally doubt it is generally a good idea, as it sets a bad
pattern that tempts unsuspecting users to blindly copy and paste it
to their $HOME/.gitconfig without realizing what its ramifications
are, but the easiest workaround may be to mimic what was done in
t/lib-submodule-update.sh that sets protocol.file.allow
configuration knob globally.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-19 16:16         ` Junio C Hamano
@ 2022-10-19 20:44           ` Junio C Hamano
  2022-10-20  1:21             ` Junio C Hamano
  0 siblings, 1 reply; 48+ messages in thread
From: Junio C Hamano @ 2022-10-19 20:44 UTC (permalink / raw)
  To: Heather Lapointe
  Cc: Heather Lapointe via GitGitGadget, git, René Scharfe,
	Taylor Blau

Junio C Hamano <gitster@pobox.com> writes:

> Mimic what Taylor did to adjust to the new world order that was
> introduced in the 2.38.1 update.
>
> Look at 9c32cfb4 (Sync with v2.38.1, 2022-10-17), which merges
> 2.38.1 and updates the tests to adjust to the new world order, by
> comparing the t/ directory of its first parent and the result of the
> merge.  It shows what Taylor did to adjust the tests to adjust.
>
>     $ git diff 9c32cfb4^ 9c32cfb4 t/
>
> I personally doubt it is generally a good idea, as it sets a bad
> pattern that tempts unsuspecting users to blindly copy and paste it
> to their $HOME/.gitconfig without realizing what its ramifications
> are, but the easiest workaround may be to mimic what was done in
> t/lib-submodule-update.sh that sets protocol.file.allow
> configuration knob globally.

I'll queue this at the tip of your topic when I rebuild 'seen' for
today's integration run.

 t/t1023-tree-read-tree-at.sh  | 4 +++-
 t/t5005-archive-submodules.sh | 7 ++++---
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/t/t1023-tree-read-tree-at.sh b/t/t1023-tree-read-tree-at.sh
index 9e5ce3abb4..cfe6c867e3 100755
--- a/t/t1023-tree-read-tree-at.sh
+++ b/t/t1023-tree-read-tree-at.sh
@@ -32,7 +32,8 @@ test_expect_success 'read_tree basic' '
 '
 
 test_expect_success 'read_tree submodules' '
-	rm -rf walk_tree_submodules &&
+	git config --global protocol.file.allow always &&
+	rm -rf submodule1 &&
 	git init submodule1 &&
 	(
 		cd submodule1 &&
@@ -42,6 +43,7 @@ test_expect_success 'read_tree submodules' '
 		git add file1.txt dir1/dirA/file1.txt &&
 		git commit -m "initial commit"
 	) &&
+	rm -rf walk_tree_submodules &&
 	git init walk_tree_submodules &&
 	(
 		cd walk_tree_submodules &&
diff --git a/t/t5005-archive-submodules.sh b/t/t5005-archive-submodules.sh
index aad6cfd108..e1413e08a2 100755
--- a/t/t5005-archive-submodules.sh
+++ b/t/t5005-archive-submodules.sh
@@ -4,7 +4,7 @@ test_description='git archive --recurse-submodules test'
 
 . ./test-lib.sh
 
-check_tar() {
+check_tar () {
 	tarfile=$1.tar
 	listfile=$1.lst
 	dir=$1
@@ -15,7 +15,7 @@ check_tar() {
 	'
 }
 
-check_added() {
+check_added () {
 	dir=$1
 	path_in_fs=$2
 	path_in_archive=$3
@@ -26,7 +26,7 @@ check_added() {
 	'
 }
 
-check_not_added() {
+check_not_added () {
 	dir=$1
 	path_in_archive=$2
 
@@ -37,6 +37,7 @@ check_not_added() {
 }
 
 test_expect_success 'setup' '
+	git config --global protocol.file.allow always &&
 	rm -rf repo_with_submodules submodule1 uninited_repo_with_submodules &&
 	git init repo_with_submodules &&
 	git init submodule1 &&
-- 
2.38.1-236-gf47955814b


^ permalink raw reply related	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-19 20:44           ` Junio C Hamano
@ 2022-10-20  1:21             ` Junio C Hamano
  2022-10-21  1:43               ` Junio C Hamano
  0 siblings, 1 reply; 48+ messages in thread
From: Junio C Hamano @ 2022-10-20  1:21 UTC (permalink / raw)
  To: Heather Lapointe
  Cc: Heather Lapointe via GitGitGadget, git, René Scharfe,
	Taylor Blau

Junio C Hamano <gitster@pobox.com> writes:

>> I personally doubt it is generally a good idea, as it sets a bad
>> pattern that tempts unsuspecting users to blindly copy and paste it
>> to their $HOME/.gitconfig without realizing what its ramifications
>> are, but the easiest workaround may be to mimic what was done in
>> t/lib-submodule-update.sh that sets protocol.file.allow
>> configuration knob globally.
>
> I'll queue this at the tip of your topic when I rebuild 'seen' for
> today's integration run.
>
>  t/t1023-tree-read-tree-at.sh  | 4 +++-
>  t/t5005-archive-submodules.sh | 7 ++++---
>  2 files changed, 7 insertions(+), 4 deletions(-)

It seems to have cleared the "submodule tests no longer can use
submodules with file:// without tweaking the config" issue I saw
earlier.  It seems to give us a segfault in win+VS test, though.

https://github.com/git/git/actions/runs/3285647856/jobs/5413033844#step:5:245

Thanks.



^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-20  1:21             ` Junio C Hamano
@ 2022-10-21  1:43               ` Junio C Hamano
  0 siblings, 0 replies; 48+ messages in thread
From: Junio C Hamano @ 2022-10-21  1:43 UTC (permalink / raw)
  To: Heather Lapointe
  Cc: Heather Lapointe via GitGitGadget, git, René Scharfe,
	Taylor Blau

Junio C Hamano <gitster@pobox.com> writes:

> It seems to have cleared the "submodule tests no longer can use
> submodules with file:// without tweaking the config" issue I saw
> earlier.  It seems to give us a segfault in win+VS test, though.
>
> https://github.com/git/git/actions/runs/3285647856/jobs/5413033844#step:5:245

Here is a pair of CI run that attributes the breakage to this topic:

  https://github.com/git/git/actions/runs/3293333066

is one CI run on 'seen' that has this topic and everything else in
flight.  This topic is at the tip of 'seen' when this CI run was
done, and win+VS test (8) seems to be failing the same way as the
previous one I reported earlier above.

Dropping the merge of this topic (i.e. "git reset --hard HEAD^") out
of 'seen' and running CI again: 

  https://github.com/git/git/actions/runs/3293553109

we can see that all tests pass there, which unfortunately is a rare
event these days (well, the segfaulting code is something this topic
adds, so it is not surprising that the rest of the topics in flight
would not segfault the same way).

Do you need help from somebody equipped with Windows knowledge and
build/test environment?  As I do not do Windows or macOS, I cannot
offer to be one myself, but the development community is full of
capable folks and help is often a request away.

Thanks.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
                       ` (10 preceding siblings ...)
  2022-10-18 18:34     ` Junio C Hamano
@ 2022-10-26 22:14     ` Glen Choo
  2022-10-28 18:18       ` Heather Lapointe
  11 siblings, 1 reply; 48+ messages in thread
From: Glen Choo @ 2022-10-26 22:14 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

Hi Heather!

We covered this series in Review Club [1]. We will leave review on
this thread, though you may find the notes [2] useful.

[1] https://lore.kernel.org/git/kl6l35bbsubq.fsf@chooglen-macbookpro.roam.corp.google.com
[2] https://docs.google.com/document/d/14L8BAumGTpsXpjDY8VzZ4rRtpAjuGrFSRqn3stCuS_w/edit?pli=1#

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> This makes it possible to include submodule contents in an archive command.
>
> The inspiration for this change comes from this Github thread,
> https://github.com/dear-github/dear-github/issues/214, with at least 160
> 👍🏻 's at the time of writing. (I stumbled upon it because I wanted it as
> well).
>
> I figured the underlying implementation wouldn't be too difficult with most
> of the plumbing already in place, so I decided to add the relevant logic to
> the client git-archive command.
>
> One of the trickier parts of this implementation involved teaching read_tree
> about submodules. Some of the troublesome areas were still using the
> the_repository references to look up commit or tree or oid information. I
> ended up deciding that read_tree_fn_t would probably be best off having a
> concrete repo reference since it allows changing the context to a subrepo
> where needed (even though some of the usages did not need it specifically).
>
> I am open to feedback since this is all quite new to me :)

The Review Club participants generally agreed that this is a really
well-structured and easy-to-follow series :) As far as new contributions
go, this is really good.

I think this series broadly makes sense, i.e.:

- the implementation of plumbing "struct repository" through read_tree()
  (this might also be really helpful for future work)
- the interface (using "--recurse-submodules")
- the expected behavior

So I can see this going through with a bit of polish. The others have
covered style issues quite thoroughly, so I won't comment on those.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods.
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
  2022-10-17 13:26       ` Junio C Hamano
@ 2022-10-26 22:33       ` Glen Choo
  2022-10-27 18:09       ` Jonathan Tan
  2 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-26 22:33 UTC (permalink / raw)
  To: Alphadelta14 via GitGitGadget, git; +Cc: René Scharfe, Heather Lapointe

"Alphadelta14 via GitGitGadget" <gitgitgadget@gmail.com> writes:

> diff --git a/tree.h b/tree.h
> index 6efff003e21..cc6402e4738 100644
> --- a/tree.h
> +++ b/tree.h
> @@ -18,15 +18,21 @@ struct tree *lookup_tree(struct repository *r, const struct object_id *oid);
>  
>  int parse_tree_buffer(struct tree *item, void *buffer, unsigned long size);
>  
> -int parse_tree_gently(struct tree *tree, int quiet_on_missing);
> -static inline int parse_tree(struct tree *tree)
> +int repo_parse_tree_gently(struct repository *r, struct tree *tree, int quiet_on_missing);
> +static inline int repo_parse_tree(struct repository *r, struct tree *tree)
>  {
> -	return parse_tree_gently(tree, 0);
> +	return repo_parse_tree_gently(r, tree, 0);
>  }
> +
> +#ifndef NO_THE_REPOSITORY_COMPATIBILITY_MACROS
> +#define parse_tree(tree) repo_parse_tree(the_repository, tree)
> +#define parse_tree_gently(tree, quiet_on_missing) repo_parse_tree_gently(the_repository, tree, quiet_on_missing)
> +#define parse_tree_indirect(oid) repo_parse_tree_indirect(the_repository, oid)
> +#endif

Typically, when we have repo_* and non-repo_* variants, we use a "static
inline" function, e.g. from refs.h:

  int repo_dwim_ref(struct repository *r, const char *str, int len,
        struct object_id *oid, char **ref, int nonfatal_dangling_mark);

  static inline int dwim_ref(const char *str, int len, struct object_id *oid,
          char **ref, int nonfatal_dangling_mark)
  {
    return repo_dwim_ref(the_repository, str, len, oid, ref,
            nonfatal_dangling_mark);
  }

I think we should do the same here, instead of using "#ifndef
NO_THE_REPOSITORY_COMPATIBILITY_MACROS".

From I can gather from "git log -S
NO_THE_REPOSITORY_COMPATIBILITY_MACROS", that macro was introduced
in e675765235 (diff.c: remove implicit dependency on the_index,
2018-09-21) and all instances of that macro were introduced around that
time. At the time, there was an effort to get rid of the_repository and
the_index almost everywhere (except builtins), and the macro would
ensure that we did this successfully.

We did such a good job with the_index that we flipped the default from
NO_THE_INDEX_COMPATIBILITY_MACROS to USE_THE_INDEX_COMPATIBILITY_MACROS
(f8adbec9fe (cache.h: flip NO_THE_INDEX_COMPATIBILITY_MACROS switch,
2019-01-24)) but it looks like we never got there with the_repository.
I couldn't find any instances of "#define
NO_THE_REPOSITORY_COMPATIBILITY_MACROS", so I think we should just use
"static inline" instead.

Alternatively, one could get rid of the non-repo_* variant and adjust
all existing callers to use "struct repository", but that's a lot of
churn and may conflict with other in-flight series, so that's probably
left for another time.

>  void free_tree_buffer(struct tree *tree);
>  
>  /* Parses and returns the tree in the given ent, chasing tags and commits. */
> -struct tree *parse_tree_indirect(const struct object_id *oid);
> +struct tree *repo_parse_tree_indirect(struct repository *r, const struct object_id *oid);
>  
>  int cmp_cache_name_compare(const void *a_, const void *b_);
>  
> -- 
> gitgitgadget

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
  2022-10-17 13:48       ` Phillip Wood
  2022-10-17 13:56       ` Junio C Hamano
@ 2022-10-26 22:48       ` Glen Choo
  2022-10-27 18:43       ` Jonathan Tan
  3 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-26 22:48 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> @@ -47,40 +48,73 @@ int read_tree_at(struct repository *r,
>  			return -1;
>  		}
>  
> -		if (S_ISDIR(entry.mode))
> +		if (S_ISDIR(entry.mode)) {
>  			oidcpy(&oid, &entry.oid);
> -		else if (S_ISGITLINK(entry.mode)) {
> -			struct commit *commit;
>  
> -			commit = lookup_commit(r, &entry.oid);
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			strbuf_addch(base, '/');
> +			retval = read_tree_at(r, lookup_tree(r, &oid),
> +						base, pathspec,
> +						fn, context);
> +			strbuf_setlen(base, oldlen);
> +			if (retval)
> +				return -1;
> +		} else if (pathspec->recurse_submodules && S_ISGITLINK(entry.mode)) {
> +			struct commit *commit;
> +			struct repository subrepo;
> +			struct repository* subrepo_p = &subrepo;
> +			struct tree* submodule_tree;
> +			char *submodule_rel_path;
> +			int name_base_len = 0;
> +
> +			len = tree_entry_len(&entry);
> +			strbuf_add(base, entry.path, len);
> +			submodule_rel_path = base->buf;
> +			// repo_submodule_init expects a path relative to submodule_prefix
> +			if (r->submodule_prefix) {
> +				name_base_len = strlen(r->submodule_prefix);
> +				// we should always expect to start with submodule_prefix
> +				assert(!strncmp(submodule_rel_path, r->submodule_prefix, name_base_len));
> +				// strip the prefix
> +				submodule_rel_path += name_base_len;
> +				// if submodule_prefix doesn't end with a /, we want to get rid of that too
> +				if (is_dir_sep(submodule_rel_path[0])) {
> +					submodule_rel_path++;
> +				}
> +			}
> +
> +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))
> +				die("couldn't init submodule %s", base->buf);
> +
> +			if (repo_read_index(subrepo_p) < 0)
> +				die("index file corrupt");
> +
> +			commit = lookup_commit(subrepo_p, &entry.oid);
>  			if (!commit)
> -				die("Commit %s in submodule path %s%s not found",
> +				die("Commit %s in submodule path %s not found",
>  				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> -
> -			// FIXME: This is the wrong repo instance (it refers to the superproject)
> -			// it will always fail as is (will fix in later patch)
> -			// This current codepath isn't executed by any existing callbacks
> -			// so it wouldn't show up as an issue at this time.
> -			if (repo_parse_commit(r, commit))
> -				die("Invalid commit %s in submodule path %s%s",
> +				    base->buf);
> +
> +			if (repo_parse_commit(subrepo_p, commit))
> +				die("Invalid commit %s in submodule path %s",
>  				    oid_to_hex(&entry.oid),
> -				    base->buf, entry.path);
> +				    base->buf);
>  
> -			oidcpy(&oid, get_commit_tree_oid(commit));
> -		}
> -		else
> -			continue;
> +			submodule_tree = repo_get_commit_tree(subrepo_p, commit);
> +			oidcpy(&oid, submodule_tree ? &submodule_tree->object.oid : NULL);
>  
> -		len = tree_entry_len(&entry);
> -		strbuf_add(base, entry.path, len);
> -		strbuf_addch(base, '/');
> -		retval = read_tree_at(r, lookup_tree(r, &oid),
> -				      base, pathspec,
> -				      fn, context);
> -		strbuf_setlen(base, oldlen);
> -		if (retval)
> -			return -1;
> +			strbuf_addch(base, '/');
> +
> +			retval = read_tree_at(subrepo_p, lookup_tree(subrepo_p, &oid),
> +						base, pathspec,
> +						fn, context);
> +			if (retval)
> +			    die("failed to read tree for %s", base->buf);
> +			strbuf_setlen(base, oldlen);
> +			repo_clear(subrepo_p);
> +		}
> +		// else, this is a file (or a submodule, but no pathspec->recurse_submodules)

In this patch, we say that we can ignore a submodule when
pathspec->recurse_submodules is 0, but unless I'm missing something, I
don't think that's the case. The preimage is:

		else if (S_ISGITLINK(entry.mode)) {
			struct commit *commit;

			commit = lookup_commit(r, &entry.oid);
			if (!commit)
				die("Commit %s in submodule path %s%s not found",
				    oid_to_hex(&entry.oid),
				    base->buf, entry.path);

      /* ... */
			if (repo_parse_commit(r, commit))
				die("Invalid commit %s in submodule path %s%s",
				    oid_to_hex(&entry.oid),
				    base->buf, entry.path);

			oidcpy(&oid, get_commit_tree_oid(commit));
		}
		else
			continue;

		len = tree_entry_len(&entry);
		strbuf_add(base, entry.path, len);
		strbuf_addch(base, '/');
		retval = read_tree_at(r, lookup_tree(r, &oid),
				      base, pathspec,
				      fn, context);

which isn't a no-op since we actually do recurse into the gitlink. I
don't know whether the subsequent call actually succeeds though (e.g.
maybe it always failed and it was just a de facto no-op?), but that's
much harder to prove. Since this function has callers outside of "git
archive", it would be better to be conservative and keep the original
behavior in the S_ISGITLINK(entry.mode) && !pathspec->recurse_submodules
case.

>  	}
>  	return 0;
>  }
> -- 
> gitgitgadget

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
@ 2022-10-26 23:34       ` Glen Choo
  2022-10-27  7:09         ` René Scharfe
  0 siblings, 1 reply; 48+ messages in thread
From: Glen Choo @ 2022-10-26 23:34 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget, git
  Cc: René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:

> index 34549d849f1..f81ef741487 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>  	oidcpy(&d->oid, oid);
>  }
>  
> +static void queue_submodule(
> +		struct repository *superproject,
> +		const struct object_id *oid,
> +		struct strbuf *base, const char *filename,
> +		unsigned mode, struct archiver_context *c)
> +{
> +	struct repository subrepo;
> +
> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
> +		return;
> +
> +	if (repo_read_index(&subrepo) < 0)
> +		die("index file corrupt");
> +
> +    queue_directory(oid, base, filename, mode, c);
> +
> +	repo_clear(&subrepo);
> +}
> +

This bit is puzzling to me because we init the submodule, read its
index, and then don't read objects from it at all. How does this work
when we aren't reading objects from the submodule we init here? My guess
is that read_tree() is already doing the heavy lifting of recursing into
submodules, so we don't need to worry any more about init-ing submodules
in archive.c, which is great.

So in effect, this is just checking whether we can read the submodule
and its index. We can drop this check since we already do that check in
read_tree().

What's much more surprising is that you can delete the entire function
body (even queue_directory()!) and the tests still pass! The tests are
definitely testing what they say they are (I've also checked the
tarballs), so I'm not sure what's going on.

I commented out queue_directory() in the S_ISDIR case, and the only test
failures I saw were:

- t5000.68, which uses a glob in its pathspec. I tried using a glob for
  in the archive submodule tests, but I couldn't reproduce the failure.
- t5004.11, which is a really big test case that I didn't bother looking
  deeply into.

So I'm at a loss as to what queue_directory() actually does. My best
guess at a reproduction would be to make a subdirectory in t5000.68 a
submodule. If we do find such a reproducing case, we should add it to
the test suite.

>  static int write_directory(
>  		struct repository *repo,
>  		struct archiver_context *c)
> @@ -228,9 +248,11 @@ static int write_directory(
>  		write_directory(repo, c) ||
>  		write_archive_entry(repo, &d->oid, d->path, d->baselen,
>  				    d->path + d->baselen, d->mode,
> -				    c) != READ_TREE_RECURSIVE;
> +				    c);
>  	free(d);
> -	return ret ? -1 : 0;
> +	if (ret == READ_TREE_RECURSIVE)
> +		return 0;
> +	return ret;
>  }
>  
>  static int queue_or_write_archive_entry(
> @@ -263,6 +285,11 @@ static int queue_or_write_archive_entry(
>  			return 0;
>  		queue_directory(oid, base, filename, mode, c);
>  		return READ_TREE_RECURSIVE;
> +	} else if (c->args->recurse_submodules && S_ISGITLINK(mode)) {
> +		if (is_submodule_active(r, filename)) {
> +			queue_submodule(r, oid, base, filename, mode, c);
> +			return READ_TREE_RECURSIVE;
> +		}

If we are omitting inactive submodules from the archive, we should test
this behavior.

>  	}
>  
>  	if (write_directory(r, c))
> @@ -446,6 +473,7 @@ static void parse_pathspec_arg(
>  		       PATHSPEC_PREFER_FULL,
>  		       "", pathspec);
>  	ar_args->pathspec.recursive = 1;
> +	ar_args->pathspec.recurse_submodules = ar_args->recurse_submodules;
>  	if (pathspec) {
>  		while (*pathspec) {
>  			if (**pathspec && !path_exists(repo, ar_args, *pathspec))
> @@ -609,6 +637,7 @@ static int parse_archive_args(int argc, const char **argv,
>  	int verbose = 0;
>  	int i;
>  	int list = 0;
> +	int recurse_submodules = 0;
>  	int worktree_attributes = 0;
>  	struct option opts[] = {
>  		OPT_GROUP(""),
> @@ -623,6 +652,8 @@ static int parse_archive_args(int argc, const char **argv,
>  		  add_file_cb, (intptr_t)&base },
>  		OPT_STRING('o', "output", &output, N_("file"),
>  			N_("write the archive to this file")),
> +		OPT_BOOL(0, "recurse-submodules", &recurse_submodules,
> +			N_("include submodules in archive")),
>  		OPT_BOOL(0, "worktree-attributes", &worktree_attributes,
>  			N_("read .gitattributes in working directory")),
>  		OPT__VERBOSE(&verbose, N_("report archived files on stderr")),
> @@ -686,6 +717,7 @@ static int parse_archive_args(int argc, const char **argv,
>  	args->verbose = verbose;
>  	args->base = base;
>  	args->baselen = strlen(base);
> +	args->recurse_submodules = recurse_submodules;
>  	args->worktree_attributes = worktree_attributes;
>  
>  	return argc;
> diff --git a/archive.h b/archive.h
> index 540a3b12130..1b21484dda6 100644
> --- a/archive.h
> +++ b/archive.h
> @@ -18,6 +18,7 @@ struct archiver_args {
>  	timestamp_t time;
>  	struct pathspec pathspec;
>  	unsigned int verbose : 1;
> +	unsigned int recurse_submodules : 1;
>  	unsigned int worktree_attributes : 1;
>  	unsigned int convert : 1;
>  	int compression_level;
> -- 
> gitgitgadget

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-26 23:34       ` Glen Choo
@ 2022-10-27  7:09         ` René Scharfe
  2022-10-27 17:29           ` Glen Choo
                             ` (2 more replies)
  0 siblings, 3 replies; 48+ messages in thread
From: René Scharfe @ 2022-10-27  7:09 UTC (permalink / raw)
  To: Glen Choo, Heather Lapointe via GitGitGadget, git; +Cc: Heather Lapointe

Am 27.10.22 um 01:34 schrieb Glen Choo:
> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
>
>> index 34549d849f1..f81ef741487 100644
>> --- a/archive.c
>> +++ b/archive.c
>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>>  	oidcpy(&d->oid, oid);
>>  }
>>
>> +static void queue_submodule(
>> +		struct repository *superproject,
>> +		const struct object_id *oid,
>> +		struct strbuf *base, const char *filename,
>> +		unsigned mode, struct archiver_context *c)
>> +{
>> +	struct repository subrepo;
>> +
>> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
>> +		return;
>> +
>> +	if (repo_read_index(&subrepo) < 0)
>> +		die("index file corrupt");
>> +
>> +    queue_directory(oid, base, filename, mode, c);
>> +
>> +	repo_clear(&subrepo);
>> +}
>> +

> What's much more surprising is that you can delete the entire function
> body (even queue_directory()!) and the tests still pass! The tests are
> definitely testing what they say they are (I've also checked the
> tarballs), so I'm not sure what's going on.
>
> I commented out queue_directory() in the S_ISDIR case, and the only test
> failures I saw were:
>
> - t5000.68, which uses a glob in its pathspec. I tried using a glob for
>   in the archive submodule tests, but I couldn't reproduce the failure.
> - t5004.11, which is a really big test case that I didn't bother looking
>   deeply into.
>
> So I'm at a loss as to what queue_directory() actually does.
An archive doesn't strictly need directory entries.  If it contains a
file with a deeply nested path then extractors will create the parent
directory hierarchy regardless.  diff(1) won't notice any difference.
Directory entries are mainly included to specify the permission bits.

t5000.68 checks for the directory entries in the output given by the
option --verbose of git archive.  t5004.11 checks the number of archive
entries (including directories) using "zipinfo -h".

René

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-27  7:09         ` René Scharfe
@ 2022-10-27 17:29           ` Glen Choo
  2022-10-27 17:30           ` Glen Choo
  2022-10-27 17:33           ` Glen Choo
  2 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-27 17:29 UTC (permalink / raw)
  To: René Scharfe, Heather Lapointe via GitGitGadget, git
  Cc: Heather Lapointe

René Scharfe <l.s.r@web.de> writes:

> Am 27.10.22 um 01:34 schrieb Glen Choo:
>> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> index 34549d849f1..f81ef741487 100644
>>> --- a/archive.c
>>> +++ b/archive.c
>>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>>>  	oidcpy(&d->oid, oid);
>>>  }
>>>
>>> +static void queue_submodule(
>>> +		struct repository *superproject,
>>> +		const struct object_id *oid,
>>> +		struct strbuf *base, const char *filename,
>>> +		unsigned mode, struct archiver_context *c)
>>> +{
>>> +	struct repository subrepo;
>>> +
>>> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
>>> +		return;
>>> +
>>> +	if (repo_read_index(&subrepo) < 0)
>>> +		die("index file corrupt");
>>> +
>>> +    queue_directory(oid, base, filename, mode, c);
>>> +
>>> +	repo_clear(&subrepo);
>>> +}
>>> +
>
>> What's much more surprising is that you can delete the entire function
>> body (even queue_directory()!) and the tests still pass! The tests are
>> definitely testing what they say they are (I've also checked the
>> tarballs), so I'm not sure what's going on.
>>
>> I commented out queue_directory() in the S_ISDIR case, and the only test
>> failures I saw were:
>>
>> - t5000.68, which uses a glob in its pathspec. I tried using a glob for
>>   in the archive submodule tests, but I couldn't reproduce the failure.
>> - t5004.11, which is a really big test case that I didn't bother looking
>>   deeply into.
>>
>> So I'm at a loss as to what queue_directory() actually does.
> An archive doesn't strictly need directory entries.  If it contains a
> file with a deeply nested path then extractors will create the parent
> directory hierarchy regardless.  diff(1) won't notice any difference.
> Directory entries are mainly included to specify the permission bits.

Thanks. In that case, we should probably also test the case where there
are empty directories (e.g. when a file is excluded by a pathspec), and
we should also check the permission bits.

>
> t5000.68 checks for the directory entries in the output given by the
> option --verbose of git archive.  t5004.11 checks the number of archive
> entries (including directories) using "zipinfo -h".
>
> René

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-27  7:09         ` René Scharfe
  2022-10-27 17:29           ` Glen Choo
@ 2022-10-27 17:30           ` Glen Choo
  2022-10-27 17:33           ` Glen Choo
  2 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-27 17:30 UTC (permalink / raw)
  To: René Scharfe, Heather Lapointe via GitGitGadget, git
  Cc: Heather Lapointe

René Scharfe <l.s.r@web.de> writes:

> Am 27.10.22 um 01:34 schrieb Glen Choo:
>> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> index 34549d849f1..f81ef741487 100644
>>> --- a/archive.c
>>> +++ b/archive.c
>>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>>>  	oidcpy(&d->oid, oid);
>>>  }
>>>
>>> +static void queue_submodule(
>>> +		struct repository *superproject,
>>> +		const struct object_id *oid,
>>> +		struct strbuf *base, const char *filename,
>>> +		unsigned mode, struct archiver_context *c)
>>> +{
>>> +	struct repository subrepo;
>>> +
>>> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
>>> +		return;
>>> +
>>> +	if (repo_read_index(&subrepo) < 0)
>>> +		die("index file corrupt");
>>> +
>>> +    queue_directory(oid, base, filename, mode, c);
>>> +
>>> +	repo_clear(&subrepo);
>>> +}
>>> +
>
>> What's much more surprising is that you can delete the entire function
>> body (even queue_directory()!) and the tests still pass! The tests are
>> definitely testing what they say they are (I've also checked the
>> tarballs), so I'm not sure what's going on.
>>
>> I commented out queue_directory() in the S_ISDIR case, and the only test
>> failures I saw were:
>>
>> - t5000.68, which uses a glob in its pathspec. I tried using a glob for
>>   in the archive submodule tests, but I couldn't reproduce the failure.
>> - t5004.11, which is a really big test case that I didn't bother looking
>>   deeply into.
>>
>> So I'm at a loss as to what queue_directory() actually does.
> An archive doesn't strictly need directory entries.  If it contains a
> file with a deeply nested path then extractors will create the parent
> directory hierarchy regardless.  diff(1) won't notice any difference.
> Directory entries are mainly included to specify the permission bits.

Thanks. In that case, we should probably also test the case where there
are empty directories (e.g. when a file is excluded by a pathspec), and
we should also check the permission bits.

>
> t5000.68 checks for the directory entries in the output given by the
> option --verbose of git archive.  t5004.11 checks the number of archive
> entries (including directories) using "zipinfo -h".
>
> René

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command
  2022-10-27  7:09         ` René Scharfe
  2022-10-27 17:29           ` Glen Choo
  2022-10-27 17:30           ` Glen Choo
@ 2022-10-27 17:33           ` Glen Choo
  2 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-27 17:33 UTC (permalink / raw)
  To: René Scharfe, Heather Lapointe via GitGitGadget, git
  Cc: Heather Lapointe

René Scharfe <l.s.r@web.de> writes:

> Am 27.10.22 um 01:34 schrieb Glen Choo:
>> "Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
>>
>>> index 34549d849f1..f81ef741487 100644
>>> --- a/archive.c
>>> +++ b/archive.c
>>> @@ -213,6 +214,25 @@ static void queue_directory(const struct object_id *oid,
>>>  	oidcpy(&d->oid, oid);
>>>  }
>>>
>>> +static void queue_submodule(
>>> +		struct repository *superproject,
>>> +		const struct object_id *oid,
>>> +		struct strbuf *base, const char *filename,
>>> +		unsigned mode, struct archiver_context *c)
>>> +{
>>> +	struct repository subrepo;
>>> +
>>> +	if (repo_submodule_init(&subrepo, superproject, filename, null_oid()))
>>> +		return;
>>> +
>>> +	if (repo_read_index(&subrepo) < 0)
>>> +		die("index file corrupt");
>>> +
>>> +    queue_directory(oid, base, filename, mode, c);
>>> +
>>> +	repo_clear(&subrepo);
>>> +}
>>> +
>
>> What's much more surprising is that you can delete the entire function
>> body (even queue_directory()!) and the tests still pass! The tests are
>> definitely testing what they say they are (I've also checked the
>> tarballs), so I'm not sure what's going on.
>>
>> I commented out queue_directory() in the S_ISDIR case, and the only test
>> failures I saw were:
>>
>> - t5000.68, which uses a glob in its pathspec. I tried using a glob for
>>   in the archive submodule tests, but I couldn't reproduce the failure.
>> - t5004.11, which is a really big test case that I didn't bother looking
>>   deeply into.
>>
>> So I'm at a loss as to what queue_directory() actually does.
> An archive doesn't strictly need directory entries.  If it contains a
> file with a deeply nested path then extractors will create the parent
> directory hierarchy regardless.  diff(1) won't notice any difference.
> Directory entries are mainly included to specify the permission bits.

Thanks. In that case, we should probably also test the case where there
are empty directories (e.g. when a file is excluded by a pathspec), and
we should also check the permission bits.

>
> t5000.68 checks for the directory entries in the output given by the
> option --verbose of git archive.  t5004.11 checks the number of archive
> entries (including directories) using "zipinfo -h".
>
> René

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods.
  2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
  2022-10-17 13:26       ` Junio C Hamano
  2022-10-26 22:33       ` Glen Choo
@ 2022-10-27 18:09       ` Jonathan Tan
  2022-10-27 18:50         ` Junio C Hamano
  2 siblings, 1 reply; 48+ messages in thread
From: Jonathan Tan @ 2022-10-27 18:09 UTC (permalink / raw)
  To: Alphadelta14 via GitGitGadget
  Cc: Jonathan Tan, git, René Scharfe, Heather Lapointe

First of all, let me echo what Glen said [1], that this series is  
overall well laid out and makes sense. 

Other reviewers have commented on style issues, but I'll hold off on 
making my comments on those and also possible improvements on commit 
messages until I can say "besides style and commit messages, I think 
that this series is good to merge in". 

[1] https://lore.kernel.org/git/kl6lr0yuqlk0.fsf@chooglen-macbookpro.roam.corp.google.com/

"Alphadelta14 via GitGitGadget" <gitgitgadget@gmail.com> writes:
> +			// This current codepath isn't executed by any existing callbacks
> +			// so it wouldn't show up as an issue at this time.

I was a bit confused by this comment, so I looked at the surrounding  
code. I think it could be better rephrased as: 

  All existing callbacks at the time of writing cause this part of the  
  code to be skipped when S_ISGITLINK(entry.mode) is true, so this 
  wrong behavior does not call any issues. 
 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 3/9] tree: increase test coverage for tree.c
  2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
  2022-10-17 13:34       ` Phillip Wood
  2022-10-17 13:36       ` Junio C Hamano
@ 2022-10-27 18:28       ` Jonathan Tan
  2 siblings, 0 replies; 48+ messages in thread
From: Jonathan Tan @ 2022-10-27 18:28 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: Jonathan Tan, git, René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
> +	setup_git_directory();
> +	repo = the_repository;
> +	assert(repo);
> +
> +	parse_pathspec(&pathspec, 0,
> +		       PATHSPEC_PREFER_FULL,
> +		       "", argv);

Here, the repository is hardcoded to be "the_repository", and the C 
code allows the pathspec to be varied but the shell test code always 
specifies "." as the pathspec. Given that one of the main points of 
this series is the repo varying, could the repo be taken in as a CLI 
argument? The pathspec can be left variable, but if it's not going to 
change, you might as well hardcode it in the C code. 

The existing test cases of a basic one and a recursing one is good, but 
it would be good also to have one where the repo being passed into the 
function is not the repo whose directory we're currently executing in 
(that is, different to the_repository). That way we can test that the 
function works for arbitrary repositories.

 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly
  2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
                         ` (2 preceding siblings ...)
  2022-10-26 22:48       ` Glen Choo
@ 2022-10-27 18:43       ` Jonathan Tan
  3 siblings, 0 replies; 48+ messages in thread
From: Jonathan Tan @ 2022-10-27 18:43 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: Jonathan Tan, git, René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
> From: Heather Lapointe <alpha@alphaservcomputing.solutions>
> 
> This supports traversal into an actual submodule for read_tree_at.
> The logic is blocked on pathspec->recurse_submodules now,

What do you mean by "blocked"? Do you mean only that 
pathspec->recurse_submodules needs to be specified, or do you also mean 
that no caller specifies pathspec->recurse_submodules now, so this code 
path is never executed? 

> but previously hadn't been executed due to all fn() cases
> returning early for submodules.

What do you mean by "previously hadn't been executed due to..."? At a 
glance, I would think that this new logic is introduced in this patch, 
so of course it would never have been previously executed. 

> +			if (repo_submodule_init(subrepo_p, r, submodule_rel_path, null_oid()))

I don't think this can be null_oid() here - it has to match the tree 
from which you constructed submodule_rel_path. (That tree may not be 
the tree at HEAD.) 
 

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods.
  2022-10-27 18:09       ` Jonathan Tan
@ 2022-10-27 18:50         ` Junio C Hamano
  0 siblings, 0 replies; 48+ messages in thread
From: Junio C Hamano @ 2022-10-27 18:50 UTC (permalink / raw)
  To: Jonathan Tan
  Cc: Alphadelta14 via GitGitGadget, git, René Scharfe,
	Heather Lapointe

Jonathan Tan <jonathantanmy@google.com> writes:

> First of all, let me echo what Glen said [1], that this series is  
> overall well laid out and makes sense. 
>
> Other reviewers have commented on style issues, but I'll hold off on 
> making my comments on those and also possible improvements on commit 
> messages until I can say "besides style and commit messages, I think 
> that this series is good to merge in". 
>
> [1] https://lore.kernel.org/git/kl6lr0yuqlk0.fsf@chooglen-macbookpro.roam.corp.google.com/
>
> "Alphadelta14 via GitGitGadget" <gitgitgadget@gmail.com> writes:
>> +			// This current codepath isn't executed by any existing callbacks
>> +			// so it wouldn't show up as an issue at this time.
>
> I was a bit confused by this comment, so I looked at the surrounding  
> code. I think it could be better rephrased as: 
>
>   All existing callbacks at the time of writing cause this part of the  
>   code to be skipped when S_ISGITLINK(entry.mode) is true, so this 
>   wrong behavior does not call any issues. 
>  

As I already said, I do not think this is "wrong behaviour" to begin
with.  The current code requires that you'd use add_submodule_odb()
to make the objects in them accessible and if your program fails to
do so, as a very natural consequence, you'd not see objects pointed
by the gitlink.

Changing that assumption is OK as long as existing callers that
depend on the current semantics are not broken by such a change, but
I do not think "wrong behaviour does not call any issues" is a
correct analysis of the problem.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules
  2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
@ 2022-10-27 18:54       ` Jonathan Tan
  2022-10-27 23:30         ` Glen Choo
  2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
  1 sibling, 1 reply; 48+ messages in thread
From: Jonathan Tan @ 2022-10-27 18:54 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: Jonathan Tan, git, René Scharfe, Heather Lapointe

"Heather Lapointe via GitGitGadget" <gitgitgadget@gmail.com> writes:
> diff --git a/archive.c b/archive.c
> index f81ef741487..b0a3181f7f5 100644
> --- a/archive.c
> +++ b/archive.c
> @@ -179,7 +179,7 @@ static int write_archive_entry(
>  		err = write_entry(repo, args, oid, path.buf, path.len, mode, NULL, 0);
>  		if (err)
>  			return err;
> -		return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
> +		return READ_TREE_RECURSIVE;

Should this change be in the previous commit, if this commit is about 
tests? 

> +check_tar() {
> +	tarfile=$1.tar
> +	listfile=$1.lst
> +	dir=$1
> +	dir_with_prefix=$dir/$2
> +
> +	test_expect_success ' extract tar archive' '
> +		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile
> +	'
> +}

In the Git test codebase, there is a mix of styles in that some people 
want each test_expect_success block to be individually runnable (I am 
one of them), and to some, that's not as important. But this is 
extremely in the other direction. It would be better if each 
test_expect_success block tests one thing, but inspecting the resulting 
archive should all go into the same test_expect_success block that 
originally created the archive; we should not split each step of 
inspection into its own block. 

Also, I don't think we need to extract the tar to check it; using "tf" 
and inspecting the resulting list with "grep" and "! grep" should do. 


^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules
  2022-10-27 18:54       ` Jonathan Tan
@ 2022-10-27 23:30         ` Glen Choo
  0 siblings, 0 replies; 48+ messages in thread
From: Glen Choo @ 2022-10-27 23:30 UTC (permalink / raw)
  To: Jonathan Tan, Heather Lapointe via GitGitGadget
  Cc: Jonathan Tan, git, René Scharfe, Heather Lapointe

Jonathan Tan <jonathantanmy@google.com> writes:

> Also, I don't think we need to extract the tar to check it; using "tf" 
> and inspecting the resulting list with "grep" and "! grep" should do. 

It might be even easier to inspect the output with test_cmp, instead of
reasoning about which files should and should not be there.

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules
  2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
  2022-10-27 18:54       ` Jonathan Tan
@ 2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
  1 sibling, 0 replies; 48+ messages in thread
From: Ævar Arnfjörð Bjarmason @ 2022-10-28  0:17 UTC (permalink / raw)
  To: Heather Lapointe via GitGitGadget
  Cc: git, René Scharfe, Heather Lapointe


On Mon, Oct 17 2022, Heather Lapointe via GitGitGadget wrote:

> From: Heather Lapointe <alpha@alphaservcomputing.solutions>

[In addition to what others mentioned]

> +test_description='git archive --recurse-submodules test'
> +
> +. ./test-lib.sh
> +
> +check_tar() {
> +	tarfile=$1.tar
> +	listfile=$1.lst

This "listfile" is used nowhere?"

> +	dir=$1
> +	dir_with_prefix=$dir/$2

Nor dir_with_prefix?

> +
> +	test_expect_success ' extract tar archive' '
> +		(mkdir $dir && cd $dir && "$TAR" xf -) <$tarfile

Aside from what Jonathan mentioned, maybe we can just use one variable
here then?

	mkdir $foo ... <$foo.tar

> +	test_expect_success " validate extra file $path_in_archive" '
> +		test -f $dir/$path_in_archive &&

Instead use "test_path_is_file", and in general for "test <whatever>"
check out if we have a wrapper in test-lib-functions.sh.

> +check_not_added() {
> +	dir=$1
> +	path_in_archive=$2
> +
> +	test_expect_success " validate unpresent file $path_in_archive" '
> +		! test -f $dir/$path_in_archive &&
> +		! test -d $dir/$path_in_archive

Don't test for what a thing isn't, but what it is. Can't we do that
here?

> +test_expect_success 'setup' '
> +	rm -rf repo_with_submodules submodule1 uninited_repo_with_submodules &&

Don't have a test rm -rf stuff from a previous block, but have
"test_when_finished" clean up after that previous test instead.

> +	git init repo_with_submodules &&
> +	git init submodule1 &&
> +	(
> +		cd submodule1 &&

This:
> +		echo "dir1/sub1/file1.txt" > "file1.txt" &&
> +		git add file1.txt &&
> +		git commit -m "initialize with file1.txt"

Looks like you can use test_commit instead.

And note you can use -C, so you won't need the sub-shell either, I think.
> +	) &&
> +	(
> +	    cd repo_with_submodules &&
> +	    echo "file2" > file2.txt &&
> +	    git add file2.txt &&
> +	    git commit -m "initialize with file2.txt" &&

Ditto.

> +	    mkdir -p dir1 &&

Let's drop "-p" here, to check for errors.

> +test_expect_success 'archive with recurse, non-init' '
> +	! git -C uninited_repo_with_submodules archive --recurse-submodules -v HEAD >b2-err.tar

For git, don't use !, use test_must_fail, ! hides segfaults.

Does this test pass when you build with SANITIZE=leak? Then you can do this at the top:

	TEST_PASSES_SANITIZE_LEAK=true
	. ./test-lib.sh

If you can't test that locally pushing to GitHub CI will tell you...

^ permalink raw reply	[flat|nested] 48+ messages in thread

* Re: [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command
  2022-10-26 22:14     ` Glen Choo
@ 2022-10-28 18:18       ` Heather Lapointe
  0 siblings, 0 replies; 48+ messages in thread
From: Heather Lapointe @ 2022-10-28 18:18 UTC (permalink / raw)
  To: Glen Choo; +Cc: Heather Lapointe via GitGitGadget, git, René Scharfe

On Wed, Oct 26, 2022 at 6:15 PM Glen Choo <chooglen@google.com> wrote:

> The Review Club participants generally agreed that this is a really
> well-structured and easy-to-follow series :) As far as new contributions
> go, this is really good.
>
> I think this series broadly makes sense, i.e.:
>
> - the implementation of plumbing "struct repository" through read_tree()
>   (this might also be really helpful for future work)
> - the interface (using "--recurse-submodules")
> - the expected behavior
>
> So I can see this going through with a bit of polish. The others have
> covered style issues quite thoroughly, so I won't comment on those.

Thank you! I've started looking through a lot of these!
I have been a bit swamped with my own work or I would have contributed
another patch series by now.

^ permalink raw reply	[flat|nested] 48+ messages in thread

end of thread, other threads:[~2022-10-28 18:19 UTC | newest]

Thread overview: 48+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-10-12 17:52 [PATCH] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-13 11:35 ` [PATCH v2 0/2] archive: Add " Heather Lapointe via GitGitGadget
2022-10-13 11:35   ` [PATCH v2 1/2] archive: add " Alphadelta14 via GitGitGadget
2022-10-13 17:53     ` René Scharfe
2022-10-13 21:37       ` Heather Lapointe
2022-10-13 11:36   ` [PATCH v2 2/2] archive: fix a case of submodule in submodule traversal Alphadelta14 via GitGitGadget
2022-10-13 17:53   ` [PATCH v2 0/2] archive: Add --recurse-submodules to git-archive command René Scharfe
2022-10-13 21:23     ` Heather Lapointe
2022-10-14  9:47       ` René Scharfe
2022-10-17  2:23   ` [PATCH v3 0/9] " Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 1/9] tree: do not use the_repository for tree traversal methods Alphadelta14 via GitGitGadget
2022-10-17 13:26       ` Junio C Hamano
2022-10-26 22:33       ` Glen Choo
2022-10-27 18:09       ` Jonathan Tan
2022-10-27 18:50         ` Junio C Hamano
2022-10-17  2:23     ` [PATCH v3 2/9] tree: update cases to use repo_ tree methods Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 3/9] tree: increase test coverage for tree.c Heather Lapointe via GitGitGadget
2022-10-17 13:34       ` Phillip Wood
2022-10-17 13:36       ` Junio C Hamano
2022-10-27 18:28       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 4/9] tree: handle submodule case for read_tree_at properly Heather Lapointe via GitGitGadget
2022-10-17 13:48       ` Phillip Wood
2022-10-17 13:56       ` Junio C Hamano
2022-10-26 22:48       ` Glen Choo
2022-10-27 18:43       ` Jonathan Tan
2022-10-17  2:23     ` [PATCH v3 5/9] tree: add repository parameter to read_tree_fn_t Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 6/9] archive: pass repo objects to write_archive handlers Heather Lapointe via GitGitGadget
2022-10-17 13:50       ` Phillip Wood
2022-10-17  2:23     ` [PATCH v3 7/9] archive: remove global repository from archive_args Heather Lapointe via GitGitGadget
2022-10-17  2:23     ` [PATCH v3 8/9] archive: add --recurse-submodules to git-archive command Heather Lapointe via GitGitGadget
2022-10-26 23:34       ` Glen Choo
2022-10-27  7:09         ` René Scharfe
2022-10-27 17:29           ` Glen Choo
2022-10-27 17:30           ` Glen Choo
2022-10-27 17:33           ` Glen Choo
2022-10-17  2:23     ` [PATCH v3 9/9] archive: add tests for git archive --recurse-submodules Heather Lapointe via GitGitGadget
2022-10-27 18:54       ` Jonathan Tan
2022-10-27 23:30         ` Glen Choo
2022-10-28  0:17       ` Ævar Arnfjörð Bjarmason
2022-10-17 13:57     ` [PATCH v3 0/9] archive: Add --recurse-submodules to git-archive command Phillip Wood
2022-10-18 18:34     ` Junio C Hamano
2022-10-18 18:48       ` Heather Lapointe
2022-10-19 16:16         ` Junio C Hamano
2022-10-19 20:44           ` Junio C Hamano
2022-10-20  1:21             ` Junio C Hamano
2022-10-21  1:43               ` Junio C Hamano
2022-10-26 22:14     ` Glen Choo
2022-10-28 18:18       ` Heather Lapointe

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).