git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>, gitster@pobox.com
Subject: [PATCH v2 4/9] http-fetch: support fetching packfiles by URL
Date: Wed, 10 Jun 2020 13:57:18 -0700	[thread overview]
Message-ID: <a4a7ed512b251fb16f0b2400fac8c0c5cc14aa75.1591821067.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1591821067.git.jonathantanmy@google.com>

Teach http-fetch the ability to download packfiles directly, given a
URL, and to verify them.

The http_pack_request suite has been augmented with a function that
takes a URL directly. With this function, the hash is only used to
determine the name of the temporary file.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/git-http-fetch.txt |  9 ++++-
 http-fetch.c                     | 63 +++++++++++++++++++++++++++-----
 http.c                           | 28 ++++++++++----
 http.h                           | 11 ++++++
 t/t5550-http-fetch-dumb.sh       | 30 +++++++++++++++
 5 files changed, 123 insertions(+), 18 deletions(-)

diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt
index 666b042679..4deb4893f5 100644
--- a/Documentation/git-http-fetch.txt
+++ b/Documentation/git-http-fetch.txt
@@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP
 SYNOPSIS
 --------
 [verse]
-'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url>
+'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile=<hash> | <commit>] <url>
 
 DESCRIPTION
 -----------
@@ -40,6 +40,13 @@ commit-id::
 
 		<commit-id>['\t'<filename-as-in--w>]
 
+--packfile=<hash>::
+	Instead of a commit id on the command line (which is not expected in
+	this case), 'git http-fetch' fetches the packfile directly at the given
+	URL and uses index-pack to generate corresponding .idx and .keep files.
+	The hash is used to determine the name of the temporary file and is
+	arbitrary. The output of index-pack is printed to stdout.
+
 --recover::
 	Verify that everything reachable from target is fetched.  Used after
 	an earlier fetch is interrupted.
diff --git a/http-fetch.c b/http-fetch.c
index e538174bde..1df376e745 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -5,7 +5,7 @@
 #include "walker.h"
 
 static const char http_fetch_usage[] = "git http-fetch "
-"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url";
+"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile=hash | commit-id] url";
 
 static int fetch_using_walker(const char *raw_url, int get_verbosely,
 			      int get_recover, int commits, char **commit_id,
@@ -43,6 +43,37 @@ static int fetch_using_walker(const char *raw_url, int get_verbosely,
 	return rc;
 }
 
+static void fetch_single_packfile(struct object_id *packfile_hash,
+				  const char *url) {
+	struct http_pack_request *preq;
+	struct slot_results results;
+	int ret;
+
+	http_init(NULL, url, 0);
+
+	preq = new_direct_http_pack_request(packfile_hash->hash, xstrdup(url));
+	if (preq == NULL)
+		die("couldn't create http pack request");
+	preq->slot->results = &results;
+	preq->generate_keep = 1;
+
+	if (start_active_slot(preq->slot)) {
+		run_active_slot(preq->slot);
+		if (results.curl_result != CURLE_OK) {
+			die("Unable to get pack file %s\n%s", preq->url,
+			    curl_errorstr);
+		}
+	} else {
+		die("Unable to start request");
+	}
+
+	if ((ret = finish_http_pack_request(preq)))
+		die("finish_http_pack_request gave result %d", ret);
+
+	release_http_pack_request(preq);
+	http_cleanup();
+}
+
 int cmd_main(int argc, const char **argv)
 {
 	int commits_on_stdin = 0;
@@ -52,8 +83,12 @@ int cmd_main(int argc, const char **argv)
 	int arg = 1;
 	int get_verbosely = 0;
 	int get_recover = 0;
+	int packfile = 0;
+	struct object_id packfile_hash;
 
 	while (arg < argc && argv[arg][0] == '-') {
+		const char *p;
+
 		if (argv[arg][1] == 't') {
 		} else if (argv[arg][1] == 'c') {
 		} else if (argv[arg][1] == 'a') {
@@ -68,25 +103,33 @@ int cmd_main(int argc, const char **argv)
 			get_recover = 1;
 		} else if (!strcmp(argv[arg], "--stdin")) {
 			commits_on_stdin = 1;
+		} else if (skip_prefix(argv[arg], "--packfile=", &p)) {
+			const char *end;
+
+			packfile = 1;
+			if (parse_oid_hex(p, &packfile_hash, &end) || *end)
+				die(_("argument to --packfile must be a valid hash (got '%s')"), p);
 		}
 		arg++;
 	}
-	if (argc != arg + 2 - commits_on_stdin)
+	if (argc != arg + 2 - (commits_on_stdin || packfile))
 		usage(http_fetch_usage);
-	if (commits_on_stdin) {
-		commits = walker_targets_stdin(&commit_id, &write_ref);
-	} else {
-		commit_id = (char **) &argv[arg++];
-		commits = 1;
-	}
 
 	setup_git_directory();
 
 	git_config(git_default_config, NULL);
 
-	if (!argv[arg])
-		BUG("must have one arg remaining");
+	if (packfile) {
+		fetch_single_packfile(&packfile_hash, argv[arg]);
+		return 0;
+	}
 
+	if (commits_on_stdin) {
+		commits = walker_targets_stdin(&commit_id, &write_ref);
+	} else {
+		commit_id = (char **) &argv[arg++];
+		commits = 1;
+	}
 	return fetch_using_walker(argv[arg], get_verbosely, get_recover,
 				  commits, commit_id, write_ref,
 				  commits_on_stdin);
diff --git a/http.c b/http.c
index 4f6e1fb018..3aa0fa9fe6 100644
--- a/http.c
+++ b/http.c
@@ -2281,7 +2281,13 @@ int finish_http_pack_request(struct http_pack_request *preq)
 	argv_array_push(&ip.args, "--stdin");
 	ip.git_cmd = 1;
 	ip.in = tmpfile_fd;
-	ip.no_stdout = 1;
+	if (preq->generate_keep) {
+		argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX,
+				 (uintmax_t)getpid());
+		ip.out = 0;
+	} else {
+		ip.no_stdout = 1;
+	}
 
 	if (run_command(&ip)) {
 		ret = -1;
@@ -2307,19 +2313,27 @@ void http_install_packfile(struct packed_git *p,
 }
 
 struct http_pack_request *new_http_pack_request(
-	const unsigned char *packed_git_hash, const char *base_url)
+	const unsigned char *packed_git_hash, const char *base_url) {
+
+	struct strbuf buf = STRBUF_INIT;
+
+	end_url_with_slash(&buf, base_url);
+	strbuf_addf(&buf, "objects/pack/pack-%s.pack",
+		hash_to_hex(packed_git_hash));
+	return new_direct_http_pack_request(packed_git_hash,
+					    strbuf_detach(&buf, NULL));
+}
+
+struct http_pack_request *new_direct_http_pack_request(
+	const unsigned char *packed_git_hash, char *url)
 {
 	off_t prev_posn = 0;
-	struct strbuf buf = STRBUF_INIT;
 	struct http_pack_request *preq;
 
 	preq = xcalloc(1, sizeof(*preq));
 	strbuf_init(&preq->tmpfile, 0);
 
-	end_url_with_slash(&buf, base_url);
-	strbuf_addf(&buf, "objects/pack/pack-%s.pack",
-		hash_to_hex(packed_git_hash));
-	preq->url = strbuf_detach(&buf, NULL);
+	preq->url = url;
 
 	strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(packed_git_hash));
 	preq->packfile = fopen(preq->tmpfile.buf, "a");
diff --git a/http.h b/http.h
index bbc6b070f1..dc49c60165 100644
--- a/http.h
+++ b/http.h
@@ -216,6 +216,15 @@ int http_get_info_packs(const char *base_url,
 
 struct http_pack_request {
 	char *url;
+
+	/*
+	 * If this is true, finish_http_pack_request() will pass "--keep" to
+	 * index-pack, resulting in the creation of a keep file, and will not
+	 * suppress its stdout (that is, the "keep\t<hash>\n" line will be
+	 * printed to stdout).
+	 */
+	unsigned generate_keep : 1;
+
 	FILE *packfile;
 	struct strbuf tmpfile;
 	struct active_request_slot *slot;
@@ -223,6 +232,8 @@ struct http_pack_request {
 
 struct http_pack_request *new_http_pack_request(
 	const unsigned char *packed_git_hash, const char *base_url);
+struct http_pack_request *new_direct_http_pack_request(
+	const unsigned char *packed_git_hash, char *url);
 int finish_http_pack_request(struct http_pack_request *preq);
 void release_http_pack_request(struct http_pack_request *preq);
 
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index 50485300eb..ca2e8af022 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -199,6 +199,28 @@ test_expect_success 'fetch packed objects' '
 	git clone $HTTPD_URL/dumb/repo_pack.git
 '
 
+test_expect_success 'http-fetch --packfile' '
+	# Arbitrary hash. Use rev-parse so that we get one of the correct
+	# length.
+	ARBITRARY=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
+
+	git init packfileclient &&
+	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
+	git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out &&
+
+	grep "^keep.[0-9a-f]\{16,\}$" out &&
+	cut -c6- out >packhash &&
+
+	# Ensure that the expected files are generated
+	test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).pack" &&
+	test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).idx" &&
+	test -e "packfileclient/.git/objects/pack/pack-$(cat packhash).keep" &&
+
+	# Ensure that it has the HEAD of repo_pack, at least
+	HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
+	git -C packfileclient cat-file -e "$HASH"
+'
+
 test_expect_success 'fetch notices corrupt pack' '
 	cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
@@ -214,6 +236,14 @@ test_expect_success 'fetch notices corrupt pack' '
 	)
 '
 
+test_expect_success 'http-fetch --packfile with corrupt pack' '
+	rm -rf packfileclient &&
+	git init packfileclient &&
+	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) &&
+	test_must_fail git -C packfileclient http-fetch --packfile \
+		"$HTTPD_URL"/dumb/repo_bad1.git/$p
+'
+
 test_expect_success 'fetch notices corrupt idx' '
 	cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
-- 
2.27.0.278.ge193c7cf3a9-goog


  parent reply	other threads:[~2020-06-10 20:57 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-29 22:30 [PATCH 0/8] CDN offloading update Jonathan Tan
2020-05-29 22:30 ` [PATCH 1/8] http: use --stdin when getting dumb HTTP pack Jonathan Tan
2020-05-29 23:00   ` Junio C Hamano
2020-06-01 20:37     ` Jonathan Tan
2020-05-29 22:30 ` [PATCH 2/8] http: improve documentation of http_pack_request Jonathan Tan
2020-05-29 22:30 ` [PATCH 3/8] http-fetch: support fetching packfiles by URL Jonathan Tan
2020-05-29 23:25   ` Junio C Hamano
2020-06-01 20:54     ` Jonathan Tan
2020-05-29 22:30 ` [PATCH 4/8] Documentation: order protocol v2 sections Jonathan Tan
2020-05-29 23:32   ` Junio C Hamano
2020-06-01 20:57     ` Jonathan Tan
2020-05-29 22:30 ` [PATCH 5/8] Documentation: add Packfile URIs design doc Jonathan Tan
2020-05-30  0:15   ` Junio C Hamano
2020-05-30  0:22     ` Junio C Hamano
2020-06-01 23:10       ` Jonathan Tan
2020-06-01 23:07     ` Jonathan Tan
2020-06-10  1:14     ` Jonathan Tan
2020-06-10 17:16       ` Junio C Hamano
2020-06-10 18:04         ` Jonathan Tan
2020-05-29 22:30 ` [PATCH 6/8] upload-pack: refactor reading of pack-objects out Jonathan Tan
2020-05-29 22:30 ` [PATCH 7/8] fetch-pack: support more than one pack lockfile Jonathan Tan
2020-05-29 22:30 ` [PATCH 8/8] upload-pack: send part of packfile response as uri Jonathan Tan
2020-05-31 16:59   ` Junio C Hamano
2020-05-31 17:35   ` Junio C Hamano
2020-06-01 23:20     ` Jonathan Tan
2020-06-01 20:00   ` Jonathan Nieder
2020-06-10 20:57 ` [PATCH v2 0/9] CDN offloading update Jonathan Tan
2020-06-10 20:57   ` [PATCH v2 1/9] http: use --stdin when indexing dumb HTTP pack Jonathan Tan
2020-06-11  1:10     ` Junio C Hamano
2020-06-10 20:57   ` [PATCH v2 2/9] http: refactor finish_http_pack_request() Jonathan Tan
2020-06-10 20:57   ` [PATCH v2 3/9] http-fetch: refactor into function Jonathan Tan
2020-06-10 20:57   ` Jonathan Tan [this message]
2020-06-11  1:30     ` [PATCH v2 4/9] http-fetch: support fetching packfiles by URL Junio C Hamano
2020-06-10 20:57   ` [PATCH v2 5/9] Documentation: order protocol v2 sections Jonathan Tan
2020-06-10 20:57   ` [PATCH v2 6/9] Documentation: add Packfile URIs design doc Jonathan Tan
2020-06-11  1:55     ` Junio C Hamano
2020-11-25  9:15     ` Ævar Arnfjörð Bjarmason
2020-11-25 19:09       ` Jonathan Tan
2020-12-01 12:48         ` Ævar Arnfjörð Bjarmason
2020-06-10 20:57   ` [PATCH v2 7/9] upload-pack: refactor reading of pack-objects out Jonathan Tan
2020-06-10 20:57   ` [PATCH v2 8/9] fetch-pack: support more than one pack lockfile Jonathan Tan
2020-06-11  1:41     ` Junio C Hamano
2020-06-10 20:57   ` [PATCH v2 9/9] upload-pack: send part of packfile response as uri Jonathan Tan
2020-06-10 23:16   ` [PATCH v2 0/9] CDN offloading update Junio C Hamano

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a4a7ed512b251fb16f0b2400fac8c0c5cc14aa75.1591821067.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).