git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: "Derrick Stolee via GitGitGadget" <gitgitgadget@gmail.com>
To: git@vger.kernel.org
Cc: gitster@pobox.com, me@ttaylorr.com, newren@gmail.com,
	"Ævar Arnfjörð Bjarmason" <avarab@gmail.com>,
	"Teng Long" <dyroneteng@gmail.com>,
	"Johannes Schindelin" <Johannes.Schindelin@gmx.de>,
	"Derrick Stolee" <derrickstolee@github.com>,
	"Derrick Stolee" <derrickstolee@github.com>
Subject: [PATCH 15/24] bundle-uri: fetch a list of bundles
Date: Fri, 20 May 2022 18:40:33 +0000	[thread overview]
Message-ID: <ceb253ec445b7275943e1c04ac51f12869271e91.1653072042.git.gitgitgadget@gmail.com> (raw)
In-Reply-To: <pull.1234.git.1653072042.gitgitgadget@gmail.com>

From: Derrick Stolee <derrickstolee@github.com>

When the content at a given bundle URI is not understood as a bundle
(based on inspecting the initial content), then Git currently gives up
and ignores that content. Independent bundle providers may want to split
up the bundle content into multiple bundles, but still make them
available from a single URI.

Teach Git to attempt parsing the bundle URI content as a Git config file
providing the key=value pairs for a bundle list. Git then looks at the
mode of the list to see if ANY single bundle is sufficient or if ALL
bundles are required. The content at the selected URIs are downloaded
and the content is inspected again, creating a recursive process.

To guard the recursion against malformed or malicious content, limit the
recursion depth to a reasonable four for now. This can be converted to a
configured value in the future if necessary. The value of four is twice
as high as expected to be useful (a bundle list is unlikely to point to
more bundle lists).

RFC TODO: add tests for this feature right now.

Signed-off-by: Derrick Stolee <derrickstolee@github.com>
---
 bundle-uri.c | 213 ++++++++++++++++++++++++++++++++++++++++++++++-----
 bundle-uri.h |   6 ++
 2 files changed, 200 insertions(+), 19 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index ab4b6385fc0..601e6e87822 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -35,9 +35,10 @@ void init_bundle_list(struct bundle_list *list)
 static int clear_remote_bundle_info(struct remote_bundle_info *bundle,
 				    void *data)
 {
-	free(bundle->id);
-	free(bundle->uri);
+	FREE_AND_NULL(bundle->id);
+	FREE_AND_NULL(bundle->uri);
 	strbuf_release(&bundle->file);
+	bundle->unbundled = 0;
 	return 0;
 }
 
@@ -336,18 +337,102 @@ static int unbundle_from_file(struct repository *r, const char *file)
 	return result;
 }
 
+struct bundle_list_context {
+	struct repository *r;
+	struct bundle_list *list;
+	enum bundle_list_mode mode;
+	int count;
+	int depth;
+};
+
+/*
+ * This early definition is necessary because we use indirect recursion:
+ *
+ * While iterating through a bundle list that was downloaded as part
+ * of fetch_bundle_uri_internal(), iterator methods eventually call it
+ * again, but with depth + 1.
+ */
+static int fetch_bundle_uri_internal(struct repository *r,
+				     struct remote_bundle_info *bundle,
+				     int depth,
+				     struct bundle_list *list);
+
+static int download_bundle_to_file(struct remote_bundle_info *bundle, void *data)
+{
+	struct bundle_list_context *ctx = data;
+
+	if (ctx->mode == BUNDLE_MODE_ANY && ctx->count)
+		return 0;
+
+	ctx->count++;
+	return fetch_bundle_uri_internal(ctx->r, bundle, ctx->depth + 1, ctx->list);
+}
+
+static int download_bundle_list(struct repository *r,
+				struct bundle_list *local_list,
+				struct bundle_list *global_list,
+				int depth)
+{
+	struct bundle_list_context ctx = {
+		.r = r,
+		.list = global_list,
+		.depth = depth + 1,
+		.mode = local_list->mode,
+	};
+
+	return for_all_bundles_in_list(local_list, download_bundle_to_file, &ctx);
+}
+
+static int fetch_bundle_list_in_config_format(struct repository *r,
+					      struct bundle_list *global_list,
+					      struct remote_bundle_info *bundle,
+					      int depth)
+{
+	int result;
+	struct bundle_list list_from_bundle;
+
+	init_bundle_list(&list_from_bundle);
+
+	if ((result = parse_bundle_list_in_config_format(bundle->uri,
+							 bundle->file.buf,
+							 &list_from_bundle)))
+		goto cleanup;
+
+	if (list_from_bundle.mode == BUNDLE_MODE_NONE) {
+		warning(_("unrecognized bundle mode from URI '%s'"),
+			bundle->uri);
+		result = -1;
+		goto cleanup;
+	}
+
+	if ((result = download_bundle_list(r, &list_from_bundle,
+					   global_list, depth)))
+		goto cleanup;
+
+cleanup:
+	clear_bundle_list(&list_from_bundle);
+	return result;
+}
+
 /**
  * This limits the recursion on fetch_bundle_uri_internal() when following
  * bundle lists.
  */
 static int max_bundle_uri_depth = 4;
 
+/**
+ * Recursively download all bundles advertised at the given URI
+ * to files. If the file is a bundle, then add it to the given
+ * 'list'. Otherwise, expect a bundle list and recurse on the
+ * URIs in that list according to the list mode (ANY or ALL).
+ */
 static int fetch_bundle_uri_internal(struct repository *r,
-				     const char *uri,
-				     int depth)
+				     struct remote_bundle_info *bundle,
+				     int depth,
+				     struct bundle_list *list)
 {
 	int result = 0;
-	struct strbuf filename = STRBUF_INIT;
+	struct remote_bundle_info *bcopy;
 
 	if (depth >= max_bundle_uri_depth) {
 		warning(_("exceeded bundle URI recursion limit (%d)"),
@@ -355,31 +440,121 @@ static int fetch_bundle_uri_internal(struct repository *r,
 		return -1;
 	}
 
-	find_temp_filename(&filename);
-	if ((result = copy_uri_to_file(uri, filename.buf)))
+	if (!bundle->file.len)
+		find_temp_filename(&bundle->file);
+	if ((result = copy_uri_to_file(bundle->uri, bundle->file.buf)))
 		goto cleanup;
 
-	if ((result = !is_bundle(filename.buf, 0)))
-		goto cleanup;
-
-	if ((result = unbundle_from_file(r, filename.buf)))
+	if ((result = !is_bundle(bundle->file.buf, 1))) {
+		result = fetch_bundle_list_in_config_format(
+				r, list, bundle, depth);
 		goto cleanup;
+	}
 
-	git_config_set_multivar_gently("log.excludedecoration",
-					"refs/bundle/",
-					"refs/bundle/",
-					CONFIG_FLAGS_FIXED_VALUE |
-					CONFIG_FLAGS_MULTI_REPLACE);
+	/* Copy the bundle and insert it into the global list. */
+	CALLOC_ARRAY(bcopy, 1);
+	bcopy->id = xstrdup(bundle->id);
+	strbuf_init(&bcopy->file, 0);
+	strbuf_add(&bcopy->file, bundle->file.buf, bundle->file.len);
+	strbuf_detach(&bundle->file, NULL);
+	hashmap_entry_init(&bcopy->ent, strhash(bcopy->id));
+	hashmap_add(&list->bundles, &bcopy->ent);
 
 cleanup:
-	unlink(filename.buf);
-	strbuf_release(&filename);
+	if (bundle->file.len)
+		unlink(bundle->file.buf);
 	return result;
 }
 
+struct attempt_unbundle_context {
+	struct repository *r;
+	int success_count;
+	int failure_count;
+};
+
+static int attempt_unbundle(struct remote_bundle_info *info, void *data)
+{
+	struct attempt_unbundle_context *ctx = data;
+
+	if (info->unbundled || !unbundle_from_file(ctx->r, info->file.buf)) {
+		ctx->success_count++;
+		info->unbundled = 1;
+	} else {
+		ctx->failure_count++;
+	}
+
+	return 0;
+}
+
+static int unbundle_all_bundles(struct repository *r,
+				struct bundle_list *list)
+{
+	int last_success_count = -1;
+	struct attempt_unbundle_context ctx = {
+		.r = r,
+	};
+
+	/*
+	 * Iterate through all bundles looking for ones that can
+	 * successfully unbundle. If any succeed, then perhaps another
+	 * will succeed in the next attempt.
+	 */
+	while (last_success_count < ctx.success_count) {
+		last_success_count = ctx.success_count;
+
+		ctx.success_count = 0;
+		ctx.failure_count = 0;
+		for_all_bundles_in_list(list, attempt_unbundle, &ctx);
+	}
+
+	if (ctx.success_count)
+		git_config_set_multivar_gently("log.excludedecoration",
+						"refs/bundle/",
+						"refs/bundle/",
+						CONFIG_FLAGS_FIXED_VALUE |
+						CONFIG_FLAGS_MULTI_REPLACE);
+
+	if (ctx.failure_count) {
+		warning(_("failed to unbundle %d bundles"),
+			ctx.failure_count);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int unlink_bundle(struct remote_bundle_info *info, void *data)
+{
+	if (info->file.buf)
+		unlink_or_warn(info->file.buf);
+	return 0;
+}
+
 int fetch_bundle_uri(struct repository *r, const char *uri)
 {
-	return fetch_bundle_uri_internal(r, uri, 0);
+	int result;
+	struct bundle_list list;
+	struct remote_bundle_info bundle = {
+		.uri = xstrdup(uri),
+		.id = xstrdup("<root>"),
+		.file = STRBUF_INIT,
+	};
+
+	init_bundle_list(&list);
+
+	/* If a bundle is added to this global list, then it is required. */
+	list.mode = BUNDLE_MODE_ALL;
+
+	if ((result = fetch_bundle_uri_internal(r, &bundle, 0, &list)))
+		goto cleanup;
+
+	result = unbundle_all_bundles(r, &list);
+
+cleanup:
+	for_all_bundles_in_list(&list, unlink_bundle, NULL);
+	clear_bundle_list(&list);
+	clear_remote_bundle_info(&bundle, NULL);
+	return result;
 }
 
 /**
diff --git a/bundle-uri.h b/bundle-uri.h
index b0183870336..adce5dc07e2 100644
--- a/bundle-uri.h
+++ b/bundle-uri.h
@@ -36,6 +36,12 @@ struct remote_bundle_info {
 	 * an empty string.
 	 */
 	struct strbuf file;
+
+	/**
+	 * If the bundle has been unbundled successfully, then
+	 * this boolean is true.
+	 */
+	unsigned unbundled:1;
 };
 
 #define REMOTE_BUNDLE_INFO_INIT { \
-- 
gitgitgadget


  parent reply	other threads:[~2022-05-20 18:42 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-20 18:40 [PATCH 00/24] [RFC] Bundle URIs Combined RFC Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 01/24] docs: document bundle URI standard Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 02/24] remote-curl: add 'get' capability Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 03/24] bundle-uri: create basic file-copy logic Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 04/24] bundle-uri: add support for http(s):// and file:// Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 05/24] fetch: add --bundle-uri option Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 06/24] fetch: add 'refs/bundle/' to log.excludeDecoration Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 07/24] clone: add --bundle-uri option Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 08/24] clone: --bundle-uri cannot be combined with --depth Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 09/24] bundle-uri: create bundle_list struct and helpers Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 10/24] bundle-uri: create base key-value pair parsing Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 11/24] bundle-uri: create "key=value" line parsing Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 12/24] bundle-uri: unit test "key=value" parsing Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 13/24] bundle-uri: limit recursion depth for bundle lists Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 14/24] bundle-uri: parse bundle list in config format Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` Derrick Stolee via GitGitGadget [this message]
2022-05-20 18:40 ` [PATCH 16/24] protocol v2: add server-side "bundle-uri" skeleton Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 17/24] bundle-uri client: add minimal NOOP client Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 18/24] bundle-uri client: add "git ls-remote-bundle-uri" Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 19/24] bundle-uri: serve URI advertisement from bundle.* config Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 20/24] bundle-uri client: add boolean transfer.bundleURI setting Ævar Arnfjörð Bjarmason via GitGitGadget
2022-05-20 18:40 ` [PATCH 21/24] bundle-uri: allow relative URLs in bundle lists Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 22/24] bundle-uri: download bundles from an advertised list Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 23/24] clone: unbundle the advertised bundles Derrick Stolee via GitGitGadget
2022-05-20 18:40 ` [PATCH 24/24] t5601: basic bundle URI tests Derrick Stolee via GitGitGadget

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ceb253ec445b7275943e1c04ac51f12869271e91.1653072042.git.gitgitgadget@gmail.com \
    --to=gitgitgadget@gmail.com \
    --cc=Johannes.Schindelin@gmx.de \
    --cc=avarab@gmail.com \
    --cc=derrickstolee@github.com \
    --cc=dyroneteng@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=me@ttaylorr.com \
    --cc=newren@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).