git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Jonathan Tan <jonathantanmy@google.com>
To: git@vger.kernel.org
Cc: Jonathan Tan <jonathantanmy@google.com>,
	gitster@pobox.com, peff@peff.net, christian.couder@gmail.com,
	avarab@gmail.com
Subject: [WIP 3/7] http-fetch: support fetching packfiles by URL
Date: Sat, 23 Feb 2019 15:38:57 -0800	[thread overview]
Message-ID: <0d4b4678963fa08b7d73572fa4ae50d08f9e90ce.1550963965.git.jonathantanmy@google.com> (raw)
In-Reply-To: <cover.1550963965.git.jonathantanmy@google.com>

Teach http-fetch the ability to download packfiles directly, given a
URL, and to verify them.

The http_pack_request suite of functions have been modified to support a
NULL target. When target is NULL, the given URL is downloaded directly
instead of being treated as the root of a repository.

Signed-off-by: Jonathan Tan <jonathantanmy@google.com>
---
 Documentation/git-http-fetch.txt |  7 +++-
 http-fetch.c                     | 65 +++++++++++++++++++++++++-------
 http.c                           | 41 ++++++++++++++------
 http.h                           | 11 ++++--
 t/t5550-http-fetch-dumb.sh       | 18 +++++++++
 5 files changed, 113 insertions(+), 29 deletions(-)

diff --git a/Documentation/git-http-fetch.txt b/Documentation/git-http-fetch.txt
index 666b042679..e667544bb1 100644
--- a/Documentation/git-http-fetch.txt
+++ b/Documentation/git-http-fetch.txt
@@ -9,7 +9,7 @@ git-http-fetch - Download from a remote Git repository via HTTP
 SYNOPSIS
 --------
 [verse]
-'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin] <commit> <url>
+'git http-fetch' [-c] [-t] [-a] [-d] [-v] [-w filename] [--recover] [--stdin | --packfile | <commit>] <url>
 
 DESCRIPTION
 -----------
@@ -40,6 +40,11 @@ commit-id::
 
 		<commit-id>['\t'<filename-as-in--w>]
 
+--packfile::
+	Instead of a commit id on the command line (which is not expected in
+	this case), 'git http-fetch' fetches the packfile directly at the given
+	URL and generates the corresponding .idx file.
+
 --recover::
 	Verify that everything reachable from target is fetched.  Used after
 	an earlier fetch is interrupted.
diff --git a/http-fetch.c b/http-fetch.c
index a32ac118d9..d283ce83a5 100644
--- a/http-fetch.c
+++ b/http-fetch.c
@@ -5,7 +5,7 @@
 #include "walker.h"
 
 static const char http_fetch_usage[] = "git http-fetch "
-"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin] commit-id url";
+"[-c] [-t] [-a] [-v] [--recover] [-w ref] [--stdin | --packfile | commit-id] url";
 
 int cmd_main(int argc, const char **argv)
 {
@@ -19,6 +19,7 @@ int cmd_main(int argc, const char **argv)
 	int rc = 0;
 	int get_verbosely = 0;
 	int get_recover = 0;
+	int packfile = 0;
 
 	while (arg < argc && argv[arg][0] == '-') {
 		if (argv[arg][1] == 't') {
@@ -35,43 +36,81 @@ int cmd_main(int argc, const char **argv)
 			get_recover = 1;
 		} else if (!strcmp(argv[arg], "--stdin")) {
 			commits_on_stdin = 1;
+		} else if (!strcmp(argv[arg], "--packfile")) {
+			packfile = 1;
 		}
 		arg++;
 	}
-	if (argc != arg + 2 - commits_on_stdin)
+	if (argc != arg + 2 - (commits_on_stdin || packfile))
 		usage(http_fetch_usage);
 	if (commits_on_stdin) {
 		commits = walker_targets_stdin(&commit_id, &write_ref);
+	} else if (packfile) {
+		/* URL will be set later */
 	} else {
 		commit_id = (char **) &argv[arg++];
 		commits = 1;
 	}
 
-	if (argv[arg])
-		str_end_url_with_slash(argv[arg], &url);
+	if (packfile) {
+		url = xstrdup(argv[arg]);
+	} else {
+		if (argv[arg])
+			str_end_url_with_slash(argv[arg], &url);
+	}
 
 	setup_git_directory();
 
 	git_config(git_default_config, NULL);
 
 	http_init(NULL, url, 0);
-	walker = get_http_walker(url);
-	walker->get_verbosely = get_verbosely;
-	walker->get_recover = get_recover;
 
-	rc = walker_fetch(walker, commits, commit_id, write_ref, url);
+	if (packfile) {
+		struct http_pack_request *preq;
+		struct slot_results results;
+		int ret;
+		char *lockfile;
+
+		preq = new_http_pack_request(NULL, url);
+		if (preq == NULL)
+			die("couldn't create http pack request");
+		preq->slot->results = &results;
+
+		if (start_active_slot(preq->slot)) {
+			run_active_slot(preq->slot);
+			if (results.curl_result != CURLE_OK) {
+				die("Unable to get pack file %s\n%s", preq->url,
+				    curl_errorstr);
+			}
+		} else {
+			die("Unable to start request");
+		}
+
+		if ((ret = finish_http_pack_request(preq, &lockfile)))
+			die("finish_http_pack_request gave result %d", ret);
+		unlink(lockfile);
+		release_http_pack_request(preq);
+		rc = 0;
+	} else {
+		walker = get_http_walker(url);
+		walker->get_verbosely = get_verbosely;
+		walker->get_recover = get_recover;
+
+		rc = walker_fetch(walker, commits, commit_id, write_ref, url);
 
-	if (commits_on_stdin)
-		walker_targets_free(commits, commit_id, write_ref);
+		if (commits_on_stdin)
+			walker_targets_free(commits, commit_id, write_ref);
 
-	if (walker->corrupt_object_found) {
-		fprintf(stderr,
+		if (walker->corrupt_object_found) {
+			fprintf(stderr,
 "Some loose object were found to be corrupt, but they might be just\n"
 "a false '404 Not Found' error message sent with incorrect HTTP\n"
 "status code.  Suggest running 'git fsck'.\n");
+		}
+
+		walker_free(walker);
 	}
 
-	walker_free(walker);
 	http_cleanup();
 
 	free(url);
diff --git a/http.c b/http.c
index 5f8e602cd2..73c3e6295b 100644
--- a/http.c
+++ b/http.c
@@ -2208,15 +2208,18 @@ int finish_http_pack_request(struct http_pack_request *preq, char **lockfile)
 	int tmpfile_fd;
 	int ret = 0;
 
-	close_pack_index(p);
+	if (p)
+		close_pack_index(p);
 
 	fclose(preq->packfile);
 	preq->packfile = NULL;
 
-	lst = preq->lst;
-	while (*lst != p)
-		lst = &((*lst)->next);
-	*lst = (*lst)->next;
+	if (p) {
+		lst = preq->lst;
+		while (*lst != p)
+			lst = &((*lst)->next);
+		*lst = (*lst)->next;
+	}
 
 	tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY);
 
@@ -2240,7 +2243,8 @@ int finish_http_pack_request(struct http_pack_request *preq, char **lockfile)
 		goto cleanup;
 	}
 
-	install_packed_git(the_repository, p);
+	if (p)
+		install_packed_git(the_repository, p);
 cleanup:
 	close(tmpfile_fd);
 	unlink(preq->tmpfile.buf);
@@ -2258,12 +2262,24 @@ struct http_pack_request *new_http_pack_request(
 	strbuf_init(&preq->tmpfile, 0);
 	preq->target = target;
 
-	end_url_with_slash(&buf, base_url);
-	strbuf_addf(&buf, "objects/pack/pack-%s.pack",
-		sha1_to_hex(target->sha1));
-	preq->url = strbuf_detach(&buf, NULL);
+	if (target) {
+		end_url_with_slash(&buf, base_url);
+		strbuf_addf(&buf, "objects/pack/pack-%s.pack",
+			sha1_to_hex(target->sha1));
+		preq->url = strbuf_detach(&buf, NULL);
+	} else {
+		preq->url = xstrdup(base_url);
+	}
+
+	if (target) {
+		strbuf_addf(&preq->tmpfile, "%s.temp",
+			    sha1_pack_name(target->sha1));
+	} else {
+		strbuf_addf(&preq->tmpfile, "%s/pack/pack-", get_object_directory());
+		strbuf_addstr_urlencode(&preq->tmpfile, base_url, 1);
+		strbuf_addstr(&preq->tmpfile, ".temp");
+	}
 
-	strbuf_addf(&preq->tmpfile, "%s.temp", sha1_pack_name(target->sha1));
 	preq->packfile = fopen(preq->tmpfile.buf, "a");
 	if (!preq->packfile) {
 		error("Unable to open local file %s for pack",
@@ -2287,7 +2303,8 @@ struct http_pack_request *new_http_pack_request(
 		if (http_is_verbose)
 			fprintf(stderr,
 				"Resuming fetch of pack %s at byte %"PRIuMAX"\n",
-				sha1_to_hex(target->sha1), (uintmax_t)prev_posn);
+				target ? sha1_to_hex(target->sha1) : base_url,
+				(uintmax_t)prev_posn);
 		http_opt_request_remainder(preq->slot->curl, prev_posn);
 	}
 
diff --git a/http.h b/http.h
index 1aa556257e..a33f2aa4b9 100644
--- a/http.h
+++ b/http.h
@@ -210,7 +210,8 @@ struct http_pack_request {
 	struct active_request_slot *slot;
 
 	/*
-	 * After calling new_http_pack_request(), point lst to the head of the
+	 * After calling new_http_pack_request(), if fetching a pack that
+	 * http_get_info_packs() told us about, point lst to the head of the
 	 * pack list that target is in. finish_http_pack_request() will remove
 	 * target from lst and call install_packed_git() on target.
 	 */
@@ -224,8 +225,12 @@ struct http_pack_request {
 };
 
 /*
- * target must be an element in a pack list obtained from
- * http_get_info_packs().
+ * If fetching a pack that http_get_info_packs() told us about, set target to
+ * an element in a pack list obtained from http_get_info_packs(). The actual
+ * URL fetched will be base_url followed by a suffix with the hash of the pack.
+ *
+ * Otherwise, set target to NULL. The actual URL fetched will be base_url
+ * itself.
  */
 extern struct http_pack_request *new_http_pack_request(
 	struct packed_git *target, const char *base_url);
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index 6d7d88ccc9..be81d33d68 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -199,6 +199,16 @@ test_expect_success 'fetch packed objects' '
 	git clone $HTTPD_URL/dumb/repo_pack.git
 '
 
+test_expect_success 'http-fetch --packfile' '
+	git init packfileclient &&
+	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) &&
+	git -C packfileclient http-fetch --packfile "$HTTPD_URL"/dumb/repo_pack.git/$p &&
+
+	# Ensure that it has the HEAD of repo_pack, at least
+	HASH=$(git -C "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git rev-parse HEAD) &&
+	git -C packfileclient cat-file -e "$HASH"
+'
+
 test_expect_success 'fetch notices corrupt pack' '
 	cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git &&
@@ -214,6 +224,14 @@ test_expect_success 'fetch notices corrupt pack' '
 	)
 '
 
+test_expect_success 'http-fetch --packfile with corrupt pack' '
+	rm -rf packfileclient &&
+	git init packfileclient &&
+	p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad1.git && ls objects/pack/pack-*.pack) &&
+	test_must_fail git -C packfileclient http-fetch --packfile \
+		"$HTTPD_URL"/dumb/repo_bad1.git/$p
+'
+
 test_expect_success 'fetch notices corrupt idx' '
 	cp -R "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_bad2.git &&
-- 
2.19.0.271.gfe8321ec05.dirty


  parent reply	other threads:[~2019-02-23 23:39 UTC|newest]

Thread overview: 45+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-02-23 23:38 [WIP 0/7] CDN offloading of fetch response Jonathan Tan
2019-02-23 23:38 ` [WIP 1/7] http: use --stdin and --keep when downloading pack Jonathan Tan
2019-02-23 23:38 ` [WIP 2/7] http: improve documentation of http_pack_request Jonathan Tan
2019-02-23 23:38 ` Jonathan Tan [this message]
2019-02-23 23:38 ` [WIP 4/7] Documentation: order protocol v2 sections Jonathan Tan
2019-02-23 23:38 ` [WIP 5/7] Documentation: add Packfile URIs design doc Jonathan Tan
2019-02-23 23:39 ` [WIP 6/7] upload-pack: refactor reading of pack-objects out Jonathan Tan
2019-02-23 23:39 ` [WIP 7/7] upload-pack: send part of packfile response as uri Jonathan Tan
2019-02-24 15:54   ` Junio C Hamano
2019-02-25 21:04   ` Christian Couder
2019-02-26  1:53     ` Jonathan Nieder
2019-02-26  7:08       ` Christian Couder
2019-03-01  0:09   ` Josh Steadmon
2019-03-01  0:17     ` Jonathan Tan
2019-02-25 21:30 ` [WIP 0/7] CDN offloading of fetch response Christian Couder
2019-02-25 23:45   ` Jonathan Nieder
2019-02-26  8:30     ` Christian Couder
2019-02-26  9:12       ` Ævar Arnfjörð Bjarmason
2019-03-04  8:24         ` Christian Couder
2019-02-28 23:21       ` Jonathan Nieder
2019-03-04  8:54         ` Christian Couder
2019-03-08 21:55 ` [PATCH v2 0/8] " Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 1/8] http: use --stdin when getting dumb HTTP pack Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 2/8] http: improve documentation of http_pack_request Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 3/8] http-fetch: support fetching packfiles by URL Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 4/8] Documentation: order protocol v2 sections Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 5/8] Documentation: add Packfile URIs design doc Jonathan Tan
2019-04-23  5:31     ` Jeff King
2019-04-23 20:38       ` Jonathan Tan
2019-04-23 22:18         ` Ævar Arnfjörð Bjarmason
2019-04-23 22:22           ` Jonathan Nieder
2019-04-23 22:30             ` Ævar Arnfjörð Bjarmason
2019-04-23 22:51               ` Jonathan Nieder
2019-04-23 22:11       ` Jonathan Nieder
2019-04-23 22:25         ` Ævar Arnfjörð Bjarmason
2019-04-23 22:48           ` Jonathan Nieder
2019-04-24  7:48             ` Ævar Arnfjörð Bjarmason
2019-04-24  3:01       ` Junio C Hamano
2019-03-08 21:55   ` [PATCH v2 6/8] upload-pack: refactor reading of pack-objects out Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 7/8] fetch-pack: support more than one pack lockfile Jonathan Tan
2019-03-08 21:55   ` [PATCH v2 8/8] upload-pack: send part of packfile response as uri Jonathan Tan
2019-03-19 20:48   ` [PATCH v2 0/8] CDN offloading of fetch response Josh Steadmon
2019-04-23  5:21   ` Jeff King
2019-04-23 19:23     ` Jonathan Tan
2019-04-24  9:09   ` Ævar Arnfjörð Bjarmason

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0d4b4678963fa08b7d73572fa4ae50d08f9e90ce.1550963965.git.jonathantanmy@google.com \
    --to=jonathantanmy@google.com \
    --cc=avarab@gmail.com \
    --cc=christian.couder@gmail.com \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=peff@peff.net \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).