git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA
       [not found] <20201012184806.166251-1-smcallis@google.com>
@ 2020-10-12 18:48 ` Sean McAllister
  2020-10-12 19:26   ` Johannes Schindelin
  2020-10-12 18:48 ` [PATCH 3/3] http: automatically retry some requests Sean McAllister
  2020-10-12 20:19 ` [PATCH] remote-curl: add testing for intelligent retry for HTTP Sean McAllister
  2 siblings, 1 reply; 13+ messages in thread
From: Sean McAllister @ 2020-10-12 18:48 UTC (permalink / raw)
  To: git, gitster, peff, masayasuzuki; +Cc: Sean McAllister

CURLOPT_FILE has been deprecated since 2003.

Signed-off-by: Sean McAllister <smcallis@google.com>
---
 http-push.c   | 6 +++---
 http-walker.c | 2 +-
 http.c        | 6 +++---
 remote-curl.c | 4 ++--
 4 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/http-push.c b/http-push.c
index 6a4a43e07f..2e6fee3305 100644
--- a/http-push.c
+++ b/http-push.c
@@ -894,7 +894,7 @@ static struct remote_lock *lock_remote(const char *path, long timeout)
 	slot->results = &results;
 	curl_setup_http(slot->curl, url, DAV_LOCK, &out_buffer, fwrite_buffer);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
 
 	lock = xcalloc(1, sizeof(*lock));
 	lock->timeout = -1;
@@ -1151,7 +1151,7 @@ static void remote_ls(const char *path, int flags,
 	curl_setup_http(slot->curl, url, DAV_PROPFIND,
 			&out_buffer, fwrite_buffer);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
 
 	if (start_active_slot(slot)) {
 		run_active_slot(slot);
@@ -1225,7 +1225,7 @@ static int locking_available(void)
 	curl_setup_http(slot->curl, repo->url, DAV_PROPFIND,
 			&out_buffer, fwrite_buffer);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
 
 	if (start_active_slot(slot)) {
 		run_active_slot(slot);
diff --git a/http-walker.c b/http-walker.c
index 4fb1235cd4..6c630711d1 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -384,7 +384,7 @@ static void fetch_alternates(struct walker *walker, const char *base)
 	alt_req.walker = walker;
 	slot->callback_data = &alt_req;
 
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buffer);
 	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
 	curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
 
diff --git a/http.c b/http.c
index 8b23a546af..b3c1669388 100644
--- a/http.c
+++ b/http.c
@@ -1921,7 +1921,7 @@ static int http_request(const char *url,
 		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
 	} else {
 		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
-		curl_easy_setopt(slot->curl, CURLOPT_FILE, result);
+		curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, result);
 
 		if (target == HTTP_REQUEST_FILE) {
 			off_t posn = ftello(result);
@@ -2337,7 +2337,7 @@ struct http_pack_request *new_direct_http_pack_request(
 	}
 
 	preq->slot = get_active_slot();
-	curl_easy_setopt(preq->slot->curl, CURLOPT_FILE, preq->packfile);
+	curl_easy_setopt(preq->slot->curl, CURLOPT_WRITEDATA, preq->packfile);
 	curl_easy_setopt(preq->slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
 	curl_easy_setopt(preq->slot->curl, CURLOPT_URL, preq->url);
 	curl_easy_setopt(preq->slot->curl, CURLOPT_HTTPHEADER,
@@ -2508,7 +2508,7 @@ struct http_object_request *new_http_object_request(const char *base_url,
 
 	freq->slot = get_active_slot();
 
-	curl_easy_setopt(freq->slot->curl, CURLOPT_FILE, freq);
+	curl_easy_setopt(freq->slot->curl, CURLOPT_WRITEDATA, freq);
 	curl_easy_setopt(freq->slot->curl, CURLOPT_FAILONERROR, 0);
 	curl_easy_setopt(freq->slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
 	curl_easy_setopt(freq->slot->curl, CURLOPT_ERRORBUFFER, freq->errorstr);
diff --git a/remote-curl.c b/remote-curl.c
index 32cc4a0c55..7f44fa30fe 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -847,7 +847,7 @@ static int probe_rpc(struct rpc_state *rpc, struct slot_results *results)
 	curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, 4);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
 	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buf);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buf);
 
 	err = run_slot(slot, results);
 
@@ -1012,7 +1012,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece
 	rpc_in_data.slot = slot;
 	rpc_in_data.check_pktline = stateless_connect;
 	memset(&rpc_in_data.pktline_state, 0, sizeof(rpc_in_data.pktline_state));
-	curl_easy_setopt(slot->curl, CURLOPT_FILE, &rpc_in_data);
+	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &rpc_in_data);
 	curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
 
 
-- 
2.28.0.1011.ga647a8990f-goog


^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH 3/3] http: automatically retry some requests
       [not found] <20201012184806.166251-1-smcallis@google.com>
  2020-10-12 18:48 ` [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA Sean McAllister
@ 2020-10-12 18:48 ` Sean McAllister
  2020-10-12 20:15   ` Johannes Schindelin
  2020-10-12 20:19 ` [PATCH] remote-curl: add testing for intelligent retry for HTTP Sean McAllister
  2 siblings, 1 reply; 13+ messages in thread
From: Sean McAllister @ 2020-10-12 18:48 UTC (permalink / raw)
  To: git, gitster, peff, masayasuzuki; +Cc: Sean McAllister

Some HTTP response codes indicate a server state that can support
retrying the request rather than immediately erroring out.  The server
can also provide information about how long to wait before retries to
via the Retry-After header.  So check the server response and retry
some reasonable number of times before erroring out to better accomodate
transient errors.

Exiting immediately becomes irksome when pulling large multi-repo code
bases such as Android or Chromium, as often the entire fetch operation
has to be restarted from the beginning due to an error in one repo. If
we can reduce how often that occurs, then it's a big win.

Signed-off-by: Sean McAllister <smcallis@google.com>
---
 Documentation/config/http.txt |   5 ++
 http.c                        | 121 +++++++++++++++++++++++++++++++++-
 http.h                        |  13 +++-
 remote-curl.c                 |   2 +-
 t/t5539-fetch-http-shallow.sh |  41 ++++++++----
 t/t5540-http-push-webdav.sh   |  26 +++++++-
 t/t5541-http-push-smart.sh    |  15 +++++
 t/t5550-http-fetch-dumb.sh    |  12 ++++
 t/t5551-http-fetch-smart.sh   |  14 ++++
 t/t5601-clone.sh              |   6 +-
 10 files changed, 237 insertions(+), 18 deletions(-)

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 3968fbb697..0beec3d370 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -260,6 +260,11 @@ http.followRedirects::
 	the base for the follow-up requests, this is generally
 	sufficient. The default is `initial`.
 
+http.retryLimit::
+	Some HTTP error codes (eg: 429,503) can reasonably be retried if
+	they're encountered.  This value configures the number of retry attempts
+	before giving up.  The default retry limit is 3.
+
 http.<url>.*::
 	Any of the http.* options above can be applied selectively to some URLs.
 	For a config key to match a URL, each element of the config key is
diff --git a/http.c b/http.c
index b3c1669388..e41d7c5575 100644
--- a/http.c
+++ b/http.c
@@ -92,6 +92,9 @@ static const char *http_proxy_ssl_key;
 static const char *http_proxy_ssl_ca_info;
 static struct credential proxy_cert_auth = CREDENTIAL_INIT;
 static int proxy_ssl_cert_password_required;
+static int http_retry_limit = 3;
+static int http_default_delay = 2;
+static int http_max_delay = 60;
 
 static struct {
 	const char *name;
@@ -219,6 +222,51 @@ size_t fwrite_null(char *ptr, size_t eltsize, size_t nmemb, void *strbuf)
 	return nmemb;
 }
 
+
+/* return 1 for a retryable HTTP code, 0 otherwise */
+static int retryable_code(int code)
+{
+	switch(code) {
+	case 429: /* fallthrough */
+	case 502: /* fallthrough */
+	case 503: /* fallthrough */
+	case 504: return 1;
+	default:  return 0;
+	}
+}
+
+size_t http_header_value(
+	const struct strbuf headers, const char *header, char **value)
+{
+	size_t len = 0;
+	struct strbuf **lines, **line;
+	char *colon = NULL;
+
+	lines = strbuf_split(&headers, '\n');
+	for (line = lines; *line; line++) {
+		strbuf_trim(*line);
+
+		/* find colon and null it out to 'split' string */
+		colon = strchr((*line)->buf, ':');
+		if (colon) {
+			*colon = '\0';
+
+			if (!strcasecmp(header, (*line)->buf)) {
+				/* move past colon and skip whitespace */
+				colon++;
+				while (*colon && isspace(*colon)) colon++;
+				*value = xstrdup(colon);
+				len = strlen(*value);
+				goto done;
+			}
+		}
+	}
+
+done:
+	strbuf_list_free(lines);
+	return len;
+}
+
 static void closedown_active_slot(struct active_request_slot *slot)
 {
 	active_requests--;
@@ -452,6 +500,11 @@ static int http_options(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp("http.retrylimit", var)) {
+		http_retry_limit = git_config_int(var, value);
+		return 0;
+	}
+
 	/* Fall back on the default ones */
 	return git_default_config(var, value, cb);
 }
@@ -1668,7 +1721,7 @@ static int handle_curl_result(struct slot_results *results)
 }
 
 int run_one_slot(struct active_request_slot *slot,
-		 struct slot_results *results)
+		 struct slot_results *results, int *http_code)
 {
 	slot->results = results;
 	if (!start_active_slot(slot)) {
@@ -1678,6 +1731,8 @@ int run_one_slot(struct active_request_slot *slot,
 	}
 
 	run_active_slot(slot);
+	if (http_code)
+		*http_code = results->http_code;
 	return handle_curl_result(results);
 }
 
@@ -1903,20 +1958,55 @@ static void http_opt_request_remainder(CURL *curl, off_t pos)
 #define HTTP_REQUEST_STRBUF	0
 #define HTTP_REQUEST_FILE	1
 
+/* check for a retry-after header in the given headers string, if found, then
+honor it, otherwise do an exponential backoff up to the max on the current delay
+*/
+static int http_retry_after(const struct strbuf headers, int cur_delay) {
+	int len, delay;
+	char *end;
+	char *value;
+
+	len = http_header_value(headers, "retry-after", &value);
+	if (len) {
+		delay = strtol(value, &end, 0);
+		if (*value && *end == 0 && delay >= 0) {
+			if (delay > http_max_delay) {
+				die(Q_(
+						"server requested retry after %d second, which is longer than max allowed\n",
+						"server requested retry after %d seconds, which is longer than max allowed\n", delay), delay);
+			}
+			free(value);
+			return delay;
+		}
+		free(value);
+	}
+
+	cur_delay *= 2;
+	return cur_delay >= http_max_delay ? http_max_delay : cur_delay;
+}
+
 static int http_request(const char *url,
 			void *result, int target,
 			const struct http_get_options *options)
 {
 	struct active_request_slot *slot;
 	struct slot_results results;
-	struct curl_slist *headers = http_copy_default_headers();
+	struct curl_slist *headers;
 	struct strbuf buf = STRBUF_INIT;
+	struct strbuf result_headers = STRBUF_INIT;
 	const char *accept_language;
 	int ret;
+	int retry_cnt = 0;
+	int retry_delay = http_default_delay;
+	int http_code;
 
+retry:
 	slot = get_active_slot();
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
 
+	curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, &result_headers);
+	curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, fwrite_buffer);
+
 	if (result == NULL) {
 		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
 	} else {
@@ -1936,6 +2026,7 @@ static int http_request(const char *url,
 
 	accept_language = get_accept_language();
 
+	headers = http_copy_default_headers();
 	if (accept_language)
 		headers = curl_slist_append(headers, accept_language);
 
@@ -1961,7 +2052,31 @@ static int http_request(const char *url,
 	curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "");
 	curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
 
-	ret = run_one_slot(slot, &results);
+	http_code = 0;
+	ret = run_one_slot(slot, &results, &http_code);
+
+	if (ret != HTTP_OK) {
+		if (retryable_code(http_code) && (retry_cnt < http_retry_limit)) {
+			retry_cnt++;
+			retry_delay = http_retry_after(result_headers, retry_delay);
+			fprintf(stderr,
+			    Q_("got HTTP response %d, retrying after %d second (%d/%d)\n",
+				   "got HTTP response %d, retrying after %d seconds (%d/%d)\n",
+					retry_delay),
+				http_code, retry_delay, retry_cnt, http_retry_limit);
+			sleep(retry_delay);
+
+			// remove header data fields since not all slots will use them
+			curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
+			curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);
+
+			goto retry;
+		}
+	}
+
+	// remove header data fields since not all slots will use them
+	curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
+	curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);
 
 	if (options && options->content_type) {
 		struct strbuf raw = STRBUF_INIT;
diff --git a/http.h b/http.h
index 5de792ef3f..60ce03801f 100644
--- a/http.h
+++ b/http.h
@@ -86,6 +86,17 @@ size_t fwrite_null(char *ptr, size_t eltsize, size_t nmemb, void *strbuf);
 curlioerr ioctl_buffer(CURL *handle, int cmd, void *clientp);
 #endif
 
+/*
+ * Query the value of an HTTP header.
+ *
+ * If the header is found, then a newly allocate string is returned through
+ * the value parameter, and the length is returned.
+ *
+ * If not found, returns 0
+ */
+size_t http_header_value(
+	const struct strbuf headers, const char *header, char **value);
+
 /* Slot lifecycle functions */
 struct active_request_slot *get_active_slot(void);
 int start_active_slot(struct active_request_slot *slot);
@@ -99,7 +110,7 @@ void finish_all_active_slots(void);
  *
  */
 int run_one_slot(struct active_request_slot *slot,
-		 struct slot_results *results);
+		 struct slot_results *results, int *http_code);
 
 #ifdef USE_CURL_MULTI
 void fill_active_slots(void);
diff --git a/remote-curl.c b/remote-curl.c
index 7f44fa30fe..2657c95bcb 100644
--- a/remote-curl.c
+++ b/remote-curl.c
@@ -805,7 +805,7 @@ static int run_slot(struct active_request_slot *slot,
 	if (!results)
 		results = &results_buf;
 
-	err = run_one_slot(slot, results);
+	err = run_one_slot(slot, results, NULL);
 
 	if (err != HTTP_OK && err != HTTP_REAUTH) {
 		struct strbuf msg = STRBUF_INIT;
diff --git a/t/t5539-fetch-http-shallow.sh b/t/t5539-fetch-http-shallow.sh
index 82aa99ae87..e09083e2b3 100755
--- a/t/t5539-fetch-http-shallow.sh
+++ b/t/t5539-fetch-http-shallow.sh
@@ -30,20 +30,39 @@ test_expect_success 'clone http repository' '
 	git clone --bare --no-local shallow "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
 	git clone $HTTPD_URL/smart/repo.git clone &&
 	(
-	cd clone &&
-	git fsck &&
-	git log --format=%s origin/master >actual &&
-	cat <<EOF >expect &&
-7
-6
-5
-4
-3
-EOF
-	test_cmp expect actual
+		cd clone &&
+		git fsck &&
+		git log --format=%s origin/master >actual &&
+		cat <<-\EOF >expect &&
+		7
+		6
+		5
+		4
+		3
+		EOF
+		test_cmp expect actual
 	)
 '
 
+test_expect_success 'clone http repository with flaky http' '
+    rm -rf clone &&
+	git clone $HTTPD_URL/error_ntime/`gen_nonce`/3/429/1/smart/repo.git clone 2>err &&
+	(
+		cd clone &&
+		git fsck &&
+		git log --format=%s origin/master >actual &&
+		cat <<-\EOF >expect &&
+		7
+		6
+		5
+		4
+		3
+		EOF
+		test_cmp expect actual
+	) &&
+    test_i18ngrep "got HTTP response 429" err
+'
+
 # This test is tricky. We need large enough "have"s that fetch-pack
 # will put pkt-flush in between. Then we need a "have" the server
 # does not have, it'll send "ACK %s ready"
diff --git a/t/t5540-http-push-webdav.sh b/t/t5540-http-push-webdav.sh
index 450321fddb..d8234d555c 100755
--- a/t/t5540-http-push-webdav.sh
+++ b/t/t5540-http-push-webdav.sh
@@ -68,12 +68,36 @@ test_expect_success 'push already up-to-date' '
 	git push
 '
 
+test_expect_success 'push to remote repository with packed refs and flakey server' '
+	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
+	 rm -rf packed-refs &&
+	 git update-ref refs/heads/master $ORIG_HEAD &&
+	 git --bare update-server-info) &&
+    git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/test_repo.git &&
+	git push &&
+    git remote set-url origin $HTTPD_URL/dumb/test_repo.git &&
+	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
+	 test $HEAD = $(git rev-parse --verify HEAD))
+'
+
 test_expect_success 'push to remote repository with unpacked refs' '
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
-	 rm packed-refs &&
+	 rm -rf packed-refs &&
+	 git update-ref refs/heads/master $ORIG_HEAD &&
+	 git --bare update-server-info) &&
+	git push &&
+	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
+	 test $HEAD = $(git rev-parse --verify HEAD))
+'
+
+test_expect_success 'push to remote repository with unpacked refs and flakey server' '
+	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
+	 rm -rf packed-refs &&
 	 git update-ref refs/heads/master $ORIG_HEAD &&
 	 git --bare update-server-info) &&
+    git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/test_repo.git &&
 	git push &&
+    git remote set-url origin $HTTPD_URL/dumb/test_repo.git &&
 	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
 	 test $HEAD = $(git rev-parse --verify HEAD))
 '
diff --git a/t/t5541-http-push-smart.sh b/t/t5541-http-push-smart.sh
index 187454f5dd..9b2fc62e11 100755
--- a/t/t5541-http-push-smart.sh
+++ b/t/t5541-http-push-smart.sh
@@ -77,6 +77,21 @@ test_expect_success 'push to remote repository (standard)' '
 	 test $HEAD = $(git rev-parse --verify HEAD))
 '
 
+test_expect_success 'push to remote repository (flakey server)' '
+	cd "$ROOT_PATH"/test_repo_clone &&
+	: >path5 &&
+	git add path5 &&
+	test_tick &&
+	git commit -m path5 &&
+	HEAD=$(git rev-parse --verify HEAD) &&
+	git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/smart/test_repo.git &&
+	GIT_TRACE_CURL=true git push -v -v 2>err &&
+	git remote set-url origin $HTTPD_URL/smart/test_repo.git &&
+	grep "POST git-receive-pack ([0-9]* bytes)" err &&
+	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
+	test $HEAD = $(git rev-parse --verify HEAD))
+'
+
 test_expect_success 'push already up-to-date' '
 	git push
 '
diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
index 483578b2d7..350c47097b 100755
--- a/t/t5550-http-fetch-dumb.sh
+++ b/t/t5550-http-fetch-dumb.sh
@@ -176,6 +176,18 @@ test_expect_success 'fetch changes via http' '
 	test_cmp file clone/file
 '
 
+test_expect_success 'fetch changes via flakey http' '
+	echo content >>file &&
+	git commit -a -m three &&
+	git push public &&
+	(cd clone &&
+		git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/repo.git &&
+		git pull 2>../err &&
+		git remote set-url origin $HTTPD_URL/dumb/repo.git) &&
+    test_i18ngrep "got HTTP response 502" err &&
+	test_cmp file clone/file
+'
+
 test_expect_success 'fetch changes via manual http-fetch' '
 	cp -R clone-tmpl clone2 &&
 
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index e40e9ed52f..85d2a0e8b8 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -45,6 +45,7 @@ test_expect_success 'clone http repository' '
 	EOF
 	GIT_TRACE_CURL=true GIT_TEST_PROTOCOL_VERSION=0 \
 		git clone --quiet $HTTPD_URL/smart/repo.git clone 2>err &&
+    cd clone && git config pull.rebase false && cd .. &&
 	test_cmp file clone/file &&
 	tr '\''\015'\'' Q <err |
 	sed -e "
@@ -103,6 +104,19 @@ test_expect_success 'fetch changes via http' '
 	test_cmp file clone/file
 '
 
+test_expect_success 'fetch changes via flakey http' '
+	echo content >>file &&
+	git commit -a -m two &&
+	git push public &&
+	(cd clone &&
+		git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/smart/repo.git &&
+		git pull 2>../err &&
+		git remote set-url origin $HTTPD_URL/smart/repo.git) &&
+	test_cmp file clone/file &&
+    test_i18ngrep "got HTTP response 502" err
+'
+
+
 test_expect_success 'used upload-pack service' '
 	cat >exp <<-\EOF &&
 	GET  /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1 200
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index 71d4307001..9988e3ff14 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -757,13 +757,17 @@ test_expect_success 'partial clone using HTTP' '
 '
 
 test_expect_success 'partial clone using HTTP with redirect' '
-    _NONCE=`gen_nonce` && export _NONCE &&
+    _NONCE=`gen_nonce` &&
     curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
     curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
     curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
 	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server"
 '
 
+test_expect_success 'partial clone with retry' '
+	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/`gen_nonce`/3/429/1/smart/server" 2>err &&
+    test_i18ngrep "got HTTP response 429" err
+'
 
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
-- 
2.28.0.1011.ga647a8990f-goog


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA
  2020-10-12 18:48 ` [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA Sean McAllister
@ 2020-10-12 19:26   ` Johannes Schindelin
  0 siblings, 0 replies; 13+ messages in thread
From: Johannes Schindelin @ 2020-10-12 19:26 UTC (permalink / raw)
  To: Sean McAllister; +Cc: git, gitster, peff, masayasuzuki

Hi Sean,

On Mon, 12 Oct 2020, Sean McAllister wrote:

> CURLOPT_FILE has been deprecated since 2003.

To be precise, it has been aliased to `CURLOPT_WRITEDATA` in cURL v7.9.7:
https://github.com/curl/curl/commit/980a47b42b95d7b9ff3378dc7b0f2e1c453fb649

The `CURLOPT_WRITEDATA` symbol became the preferred one only in v7.37.1 in
2014, though:
https://github.com/curl/curl/commit/5fcef972b289bdc7f3dbd7a55a5ada0460b74b2d

Ciao,
Dscho

>
> Signed-off-by: Sean McAllister <smcallis@google.com>
> ---
>  http-push.c   | 6 +++---
>  http-walker.c | 2 +-
>  http.c        | 6 +++---
>  remote-curl.c | 4 ++--
>  4 files changed, 9 insertions(+), 9 deletions(-)
>
> diff --git a/http-push.c b/http-push.c
> index 6a4a43e07f..2e6fee3305 100644
> --- a/http-push.c
> +++ b/http-push.c
> @@ -894,7 +894,7 @@ static struct remote_lock *lock_remote(const char *path, long timeout)
>  	slot->results = &results;
>  	curl_setup_http(slot->curl, url, DAV_LOCK, &out_buffer, fwrite_buffer);
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
>
>  	lock = xcalloc(1, sizeof(*lock));
>  	lock->timeout = -1;
> @@ -1151,7 +1151,7 @@ static void remote_ls(const char *path, int flags,
>  	curl_setup_http(slot->curl, url, DAV_PROPFIND,
>  			&out_buffer, fwrite_buffer);
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
>
>  	if (start_active_slot(slot)) {
>  		run_active_slot(slot);
> @@ -1225,7 +1225,7 @@ static int locking_available(void)
>  	curl_setup_http(slot->curl, repo->url, DAV_PROPFIND,
>  			&out_buffer, fwrite_buffer);
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, dav_headers);
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &in_buffer);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &in_buffer);
>
>  	if (start_active_slot(slot)) {
>  		run_active_slot(slot);
> diff --git a/http-walker.c b/http-walker.c
> index 4fb1235cd4..6c630711d1 100644
> --- a/http-walker.c
> +++ b/http-walker.c
> @@ -384,7 +384,7 @@ static void fetch_alternates(struct walker *walker, const char *base)
>  	alt_req.walker = walker;
>  	slot->callback_data = &alt_req;
>
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buffer);
>  	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
>  	curl_easy_setopt(slot->curl, CURLOPT_URL, url.buf);
>
> diff --git a/http.c b/http.c
> index 8b23a546af..b3c1669388 100644
> --- a/http.c
> +++ b/http.c
> @@ -1921,7 +1921,7 @@ static int http_request(const char *url,
>  		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
>  	} else {
>  		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 0);
> -		curl_easy_setopt(slot->curl, CURLOPT_FILE, result);
> +		curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, result);
>
>  		if (target == HTTP_REQUEST_FILE) {
>  			off_t posn = ftello(result);
> @@ -2337,7 +2337,7 @@ struct http_pack_request *new_direct_http_pack_request(
>  	}
>
>  	preq->slot = get_active_slot();
> -	curl_easy_setopt(preq->slot->curl, CURLOPT_FILE, preq->packfile);
> +	curl_easy_setopt(preq->slot->curl, CURLOPT_WRITEDATA, preq->packfile);
>  	curl_easy_setopt(preq->slot->curl, CURLOPT_WRITEFUNCTION, fwrite);
>  	curl_easy_setopt(preq->slot->curl, CURLOPT_URL, preq->url);
>  	curl_easy_setopt(preq->slot->curl, CURLOPT_HTTPHEADER,
> @@ -2508,7 +2508,7 @@ struct http_object_request *new_http_object_request(const char *base_url,
>
>  	freq->slot = get_active_slot();
>
> -	curl_easy_setopt(freq->slot->curl, CURLOPT_FILE, freq);
> +	curl_easy_setopt(freq->slot->curl, CURLOPT_WRITEDATA, freq);
>  	curl_easy_setopt(freq->slot->curl, CURLOPT_FAILONERROR, 0);
>  	curl_easy_setopt(freq->slot->curl, CURLOPT_WRITEFUNCTION, fwrite_sha1_file);
>  	curl_easy_setopt(freq->slot->curl, CURLOPT_ERRORBUFFER, freq->errorstr);
> diff --git a/remote-curl.c b/remote-curl.c
> index 32cc4a0c55..7f44fa30fe 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -847,7 +847,7 @@ static int probe_rpc(struct rpc_state *rpc, struct slot_results *results)
>  	curl_easy_setopt(slot->curl, CURLOPT_POSTFIELDSIZE, 4);
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, headers);
>  	curl_easy_setopt(slot->curl, CURLOPT_WRITEFUNCTION, fwrite_buffer);
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buf);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &buf);
>
>  	err = run_slot(slot, results);
>
> @@ -1012,7 +1012,7 @@ static int post_rpc(struct rpc_state *rpc, int stateless_connect, int flush_rece
>  	rpc_in_data.slot = slot;
>  	rpc_in_data.check_pktline = stateless_connect;
>  	memset(&rpc_in_data.pktline_state, 0, sizeof(rpc_in_data.pktline_state));
> -	curl_easy_setopt(slot->curl, CURLOPT_FILE, &rpc_in_data);
> +	curl_easy_setopt(slot->curl, CURLOPT_WRITEDATA, &rpc_in_data);
>  	curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
>
>
> --
> 2.28.0.1011.ga647a8990f-goog
>
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] http: automatically retry some requests
  2020-10-12 18:48 ` [PATCH 3/3] http: automatically retry some requests Sean McAllister
@ 2020-10-12 20:15   ` Johannes Schindelin
  2020-10-12 21:00     ` Junio C Hamano
  2020-10-13 15:03     ` Sean McAllister
  0 siblings, 2 replies; 13+ messages in thread
From: Johannes Schindelin @ 2020-10-12 20:15 UTC (permalink / raw)
  To: Sean McAllister; +Cc: git, gitster, peff, masayasuzuki

Hi Sean,

On Mon, 12 Oct 2020, Sean McAllister wrote:

> Some HTTP response codes indicate a server state that can support
> retrying the request rather than immediately erroring out.  The server
> can also provide information about how long to wait before retries to
> via the Retry-After header.  So check the server response and retry
> some reasonable number of times before erroring out to better accomodate
> transient errors.
>
> Exiting immediately becomes irksome when pulling large multi-repo code
> bases such as Android or Chromium, as often the entire fetch operation
> has to be restarted from the beginning due to an error in one repo. If
> we can reduce how often that occurs, then it's a big win.

Makes a lot of sense to me.

>
> Signed-off-by: Sean McAllister <smcallis@google.com>
> ---
>  Documentation/config/http.txt |   5 ++
>  http.c                        | 121 +++++++++++++++++++++++++++++++++-
>  http.h                        |  13 +++-
>  remote-curl.c                 |   2 +-
>  t/t5539-fetch-http-shallow.sh |  41 ++++++++----
>  t/t5540-http-push-webdav.sh   |  26 +++++++-
>  t/t5541-http-push-smart.sh    |  15 +++++
>  t/t5550-http-fetch-dumb.sh    |  12 ++++
>  t/t5551-http-fetch-smart.sh   |  14 ++++
>  t/t5601-clone.sh              |   6 +-
>  10 files changed, 237 insertions(+), 18 deletions(-)
>
> diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
> index 3968fbb697..0beec3d370 100644
> --- a/Documentation/config/http.txt
> +++ b/Documentation/config/http.txt
> @@ -260,6 +260,11 @@ http.followRedirects::
>  	the base for the follow-up requests, this is generally
>  	sufficient. The default is `initial`.
>
> +http.retryLimit::
> +	Some HTTP error codes (eg: 429,503) can reasonably be retried if

Please have a space after the comma so that it is not being mistaken for a
6-digit number. Also, aren't they called "status codes"? Not all of them
indicate errors, after all.

> +	they're encountered.  This value configures the number of retry attempts
> +	before giving up.  The default retry limit is 3.
> +
>  http.<url>.*::
>  	Any of the http.* options above can be applied selectively to some URLs.
>  	For a config key to match a URL, each element of the config key is
> diff --git a/http.c b/http.c
> index b3c1669388..e41d7c5575 100644
> --- a/http.c
> +++ b/http.c
> @@ -92,6 +92,9 @@ static const char *http_proxy_ssl_key;
>  static const char *http_proxy_ssl_ca_info;
>  static struct credential proxy_cert_auth = CREDENTIAL_INIT;
>  static int proxy_ssl_cert_password_required;
> +static int http_retry_limit = 3;
> +static int http_default_delay = 2;

Should there be a config option for that? Also, it took me some time to
find the code using this variable in order to find out what unit to use:
it is seconds (not microseconds, as I had expected). Maybe this can be
documented in the variable name, or at least in a comment?

> +static int http_max_delay = 60;
>
>  static struct {
>  	const char *name;
> @@ -219,6 +222,51 @@ size_t fwrite_null(char *ptr, size_t eltsize, size_t nmemb, void *strbuf)
>  	return nmemb;
>  }
>
> +
> +/* return 1 for a retryable HTTP code, 0 otherwise */
> +static int retryable_code(int code)
> +{
> +	switch(code) {
> +	case 429: /* fallthrough */
> +	case 502: /* fallthrough */
> +	case 503: /* fallthrough */
> +	case 504: return 1;
> +	default:  return 0;
> +	}
> +}
> +
> +size_t http_header_value(

In the part of Git's code with which I am familiar, we avoid trying to
break the line after an opening parenthesis, instead preferring to break
after a comma.

Also, shouldn't we make the return type `ssize_t` to allow for a negative
value to indicate an error/missing header?

> +	const struct strbuf headers, const char *header, char **value)
> +{
> +	size_t len = 0;
> +	struct strbuf **lines, **line;
> +	char *colon = NULL;
> +
> +	lines = strbuf_split(&headers, '\n');
> +	for (line = lines; *line; line++) {
> +		strbuf_trim(*line);
> +
> +		/* find colon and null it out to 'split' string */
> +		colon = strchr((*line)->buf, ':');
> +		if (colon) {
> +			*colon = '\0';
> +
> +			if (!strcasecmp(header, (*line)->buf)) {

If all we want is to find the given header, splitting lines seems to be a
bit wasteful to me. We could instead search for the header directly:

	const char *p = strcasestr(headers.buf, header), *eol;
	size_t header_len = strlen(header);

	while (p) {
		if ((p != headers.buf && p[-1] != '\n') ||
		    p[header_len] != ':') {
			p = strcasestr(p + header_len, header);
			continue;
		}

		p += header_len + 1;
		while (isspace(*p) && *p != '\n')
			p++;
		eol = strchrnul(p, '\n');
		len =  eol - p;
		*value = xstrndup(p, len);
		return len;
	}

> +				/* move past colon and skip whitespace */
> +				colon++;
> +				while (*colon && isspace(*colon)) colon++;
> +				*value = xstrdup(colon);
> +				len = strlen(*value);
> +				goto done;
> +			}
> +		}
> +	}
> +
> +done:
> +	strbuf_list_free(lines);
> +	return len;
> +}
> +
>  static void closedown_active_slot(struct active_request_slot *slot)
>  {
>  	active_requests--;
> @@ -452,6 +500,11 @@ static int http_options(const char *var, const char *value, void *cb)
>  		return 0;
>  	}
>
> +	if (!strcmp("http.retrylimit", var)) {
> +		http_retry_limit = git_config_int(var, value);
> +		return 0;
> +	}
> +
>  	/* Fall back on the default ones */
>  	return git_default_config(var, value, cb);
>  }
> @@ -1668,7 +1721,7 @@ static int handle_curl_result(struct slot_results *results)
>  }
>
>  int run_one_slot(struct active_request_slot *slot,
> -		 struct slot_results *results)
> +		 struct slot_results *results, int *http_code)
>  {
>  	slot->results = results;
>  	if (!start_active_slot(slot)) {
> @@ -1678,6 +1731,8 @@ int run_one_slot(struct active_request_slot *slot,
>  	}
>
>  	run_active_slot(slot);
> +	if (http_code)
> +		*http_code = results->http_code;
>  	return handle_curl_result(results);
>  }
>
> @@ -1903,20 +1958,55 @@ static void http_opt_request_remainder(CURL *curl, off_t pos)
>  #define HTTP_REQUEST_STRBUF	0
>  #define HTTP_REQUEST_FILE	1
>
> +/* check for a retry-after header in the given headers string, if found, then
> +honor it, otherwise do an exponential backoff up to the max on the current delay
> +*/

Multi-line comments should be of this form:

	/*
	 * Check for a retry-after header in the given headers string, if
	 * found, then honor it, otherwise do an exponential backoff up to
	 * the maximum on the current delay.
	 */

> +static int http_retry_after(const struct strbuf headers, int cur_delay) {

For functions, the initial opening curly bracket goes on its own line.

> +	int len, delay;
> +	char *end;
> +	char *value;

Why not declare `char *end, *value;`, just like `len` and `delay` are
declared on the same line?

> +
> +	len = http_header_value(headers, "retry-after", &value);
> +	if (len) {
> +		delay = strtol(value, &end, 0);
> +		if (*value && *end == 0 && delay >= 0) {

Better: `*end == '\0'`

And why `*value` here? We already called `strtol()` on it.

> +			if (delay > http_max_delay) {
> +				die(Q_(

Let's not end the line in an opening parenthesis. Instead, use C's string
continuation like so:

				die(Q_("server requested retry after %d second,"
				       " which is longer than max allowed\n",
				       "server requested retry after %d "
				       "seconds, which is longer than max "
				       "allowed\n", delay), delay);

> +						"server requested retry after %d second, which is longer than max allowed\n",
> +						"server requested retry after %d seconds, which is longer than max allowed\n", delay), delay);
> +			}
> +			free(value);

`value` is not actually used after that `strtol()` call above, so let's
release it right then and there.

> +			return delay;
> +		}
> +		free(value);
> +	}

If the header was found, but for some reason had an empty value, we're
leaking `value` here.

> +
> +	cur_delay *= 2;
> +	return cur_delay >= http_max_delay ? http_max_delay : cur_delay;
> +}
> +
>  static int http_request(const char *url,
>  			void *result, int target,
>  			const struct http_get_options *options)
>  {
>  	struct active_request_slot *slot;
>  	struct slot_results results;
> -	struct curl_slist *headers = http_copy_default_headers();
> +	struct curl_slist *headers;
>  	struct strbuf buf = STRBUF_INIT;
> +	struct strbuf result_headers = STRBUF_INIT;
>  	const char *accept_language;
>  	int ret;
> +	int retry_cnt = 0;
> +	int retry_delay = http_default_delay;
> +	int http_code;
>
> +retry:
>  	slot = get_active_slot();
>  	curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
>
> +	curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, &result_headers);
> +	curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, fwrite_buffer);
> +
>  	if (result == NULL) {
>  		curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
>  	} else {
> @@ -1936,6 +2026,7 @@ static int http_request(const char *url,
>
>  	accept_language = get_accept_language();
>
> +	headers = http_copy_default_headers();
>  	if (accept_language)
>  		headers = curl_slist_append(headers, accept_language);
>
> @@ -1961,7 +2052,31 @@ static int http_request(const char *url,
>  	curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "");
>  	curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
>
> -	ret = run_one_slot(slot, &results);
> +	http_code = 0;
> +	ret = run_one_slot(slot, &results, &http_code);
> +
> +	if (ret != HTTP_OK) {
> +		if (retryable_code(http_code) && (retry_cnt < http_retry_limit)) {

The parentheses around the second condition should be dropped.

> +			retry_cnt++;
> +			retry_delay = http_retry_after(result_headers, retry_delay);
> +			fprintf(stderr,

Should this be a `warning()` instead? I see 5 instances in `http.c` that
use `fprintf(stderr, ...)`, but 12 that use `warning()`, making me believe
that at least some of those 5 instances should call `warning()` instead,
too.

> +			    Q_("got HTTP response %d, retrying after %d second (%d/%d)\n",
> +				   "got HTTP response %d, retrying after %d seconds (%d/%d)\n",
> +					retry_delay),
> +				http_code, retry_delay, retry_cnt, http_retry_limit);
> +			sleep(retry_delay);
> +
> +			// remove header data fields since not all slots will use them

No C++-style comments, please: use /* ... */ instead.

> +			curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
> +			curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);
> +
> +			goto retry;
> +		}
> +	}
> +
> +	// remove header data fields since not all slots will use them

No C++-style comments, please.

> +	curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
> +	curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);

Shouldn't we just perform this assignment before the `if (ret != HTTP_OK)`
condition? I do not see anything inside that block that needs it,
therefore this could be DRY'd up.

>
>  	if (options && options->content_type) {
>  		struct strbuf raw = STRBUF_INIT;
> diff --git a/http.h b/http.h
> index 5de792ef3f..60ce03801f 100644
> --- a/http.h
> +++ b/http.h
> @@ -86,6 +86,17 @@ size_t fwrite_null(char *ptr, size_t eltsize, size_t nmemb, void *strbuf);
>  curlioerr ioctl_buffer(CURL *handle, int cmd, void *clientp);
>  #endif
>
> +/*
> + * Query the value of an HTTP header.
> + *
> + * If the header is found, then a newly allocate string is returned through
> + * the value parameter, and the length is returned.
> + *
> + * If not found, returns 0
> + */
> +size_t http_header_value(
> +	const struct strbuf headers, const char *header, char **value);

Do we really need to export this function? It could stay file-local, at
least for now (i.e. be defined `static` inside `http.c`), no?

> +
>  /* Slot lifecycle functions */
>  struct active_request_slot *get_active_slot(void);
>  int start_active_slot(struct active_request_slot *slot);
> @@ -99,7 +110,7 @@ void finish_all_active_slots(void);
>   *
>   */
>  int run_one_slot(struct active_request_slot *slot,
> -		 struct slot_results *results);
> +		 struct slot_results *results, int *http_code);
>
>  #ifdef USE_CURL_MULTI
>  void fill_active_slots(void);
> diff --git a/remote-curl.c b/remote-curl.c
> index 7f44fa30fe..2657c95bcb 100644
> --- a/remote-curl.c
> +++ b/remote-curl.c
> @@ -805,7 +805,7 @@ static int run_slot(struct active_request_slot *slot,
>  	if (!results)
>  		results = &results_buf;
>
> -	err = run_one_slot(slot, results);
> +	err = run_one_slot(slot, results, NULL);
>
>  	if (err != HTTP_OK && err != HTTP_REAUTH) {
>  		struct strbuf msg = STRBUF_INIT;
> diff --git a/t/t5539-fetch-http-shallow.sh b/t/t5539-fetch-http-shallow.sh
> index 82aa99ae87..e09083e2b3 100755
> --- a/t/t5539-fetch-http-shallow.sh
> +++ b/t/t5539-fetch-http-shallow.sh
> @@ -30,20 +30,39 @@ test_expect_success 'clone http repository' '
>  	git clone --bare --no-local shallow "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
>  	git clone $HTTPD_URL/smart/repo.git clone &&
>  	(
> -	cd clone &&
> -	git fsck &&
> -	git log --format=%s origin/master >actual &&
> -	cat <<EOF >expect &&
> -7
> -6
> -5
> -4
> -3
> -EOF
> -	test_cmp expect actual
> +		cd clone &&
> +		git fsck &&
> +		git log --format=%s origin/master >actual &&
> +		cat <<-\EOF >expect &&
> +		7
> +		6
> +		5
> +		4
> +		3
> +		EOF
> +		test_cmp expect actual

This just changes the indentation, right?

I _guess_ this is a good change, but it should live in its own patch.

>  	)
>  '
>
> +test_expect_success 'clone http repository with flaky http' '
> +    rm -rf clone &&

Let's consistently use horizontal tab characters for indentation. (There
are more instances of lines indented by spaces below.)

> +	git clone $HTTPD_URL/error_ntime/`gen_nonce`/3/429/1/smart/repo.git clone 2>err &&

Let's use `$(gen_nonce)`. Also: where is the `gen_nonce` defined? I do not
see the definition in this patch (but it could be 1/3, which for some
reason did not make it to the mailing list:
https://lore.kernel.org/git/20201012184806.166251-1-smcallis@google.com/).

Another suggestion: rather than deleting `clone/`, use a separate
directory to clone into, say, `flaky/`. That will make it easier to debug
when the entire "trash" directory is tar'ed up in a failed CI build, for
example.

> +	(
> +		cd clone &&
> +		git fsck &&
> +		git log --format=%s origin/master >actual &&
> +		cat <<-\EOF >expect &&
> +		7
> +		6
> +		5
> +		4
> +		3
> +		EOF
> +		test_cmp expect actual
> +	) &&
> +    test_i18ngrep "got HTTP response 429" err
> +'
> +
>  # This test is tricky. We need large enough "have"s that fetch-pack
>  # will put pkt-flush in between. Then we need a "have" the server
>  # does not have, it'll send "ACK %s ready"
> diff --git a/t/t5540-http-push-webdav.sh b/t/t5540-http-push-webdav.sh
> index 450321fddb..d8234d555c 100755
> --- a/t/t5540-http-push-webdav.sh
> +++ b/t/t5540-http-push-webdav.sh
> @@ -68,12 +68,36 @@ test_expect_success 'push already up-to-date' '
>  	git push
>  '
>
> +test_expect_success 'push to remote repository with packed refs and flakey server' '
> +	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> +	 rm -rf packed-refs &&
> +	 git update-ref refs/heads/master $ORIG_HEAD &&
> +	 git --bare update-server-info) &&
> +    git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/test_repo.git &&
> +	git push &&
> +    git remote set-url origin $HTTPD_URL/dumb/test_repo.git &&
> +	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> +	 test $HEAD = $(git rev-parse --verify HEAD))
> +'
> +
>  test_expect_success 'push to remote repository with unpacked refs' '
>  	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> -	 rm packed-refs &&
> +	 rm -rf packed-refs &&
> +	 git update-ref refs/heads/master $ORIG_HEAD &&
> +	 git --bare update-server-info) &&
> +	git push &&
> +	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> +	 test $HEAD = $(git rev-parse --verify HEAD))
> +'
> +
> +test_expect_success 'push to remote repository with unpacked refs and flakey server' '
> +	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> +	 rm -rf packed-refs &&
>  	 git update-ref refs/heads/master $ORIG_HEAD &&
>  	 git --bare update-server-info) &&
> +    git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/test_repo.git &&
>  	git push &&
> +    git remote set-url origin $HTTPD_URL/dumb/test_repo.git &&
>  	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
>  	 test $HEAD = $(git rev-parse --verify HEAD))
>  '
> diff --git a/t/t5541-http-push-smart.sh b/t/t5541-http-push-smart.sh
> index 187454f5dd..9b2fc62e11 100755
> --- a/t/t5541-http-push-smart.sh
> +++ b/t/t5541-http-push-smart.sh
> @@ -77,6 +77,21 @@ test_expect_success 'push to remote repository (standard)' '
>  	 test $HEAD = $(git rev-parse --verify HEAD))
>  '
>
> +test_expect_success 'push to remote repository (flakey server)' '
> +	cd "$ROOT_PATH"/test_repo_clone &&
> +	: >path5 &&
> +	git add path5 &&
> +	test_tick &&
> +	git commit -m path5 &&
> +	HEAD=$(git rev-parse --verify HEAD) &&
> +	git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/smart/test_repo.git &&
> +	GIT_TRACE_CURL=true git push -v -v 2>err &&
> +	git remote set-url origin $HTTPD_URL/smart/test_repo.git &&
> +	grep "POST git-receive-pack ([0-9]* bytes)" err &&
> +	(cd "$HTTPD_DOCUMENT_ROOT_PATH"/test_repo.git &&
> +	test $HEAD = $(git rev-parse --verify HEAD))
> +'
> +
>  test_expect_success 'push already up-to-date' '
>  	git push
>  '
> diff --git a/t/t5550-http-fetch-dumb.sh b/t/t5550-http-fetch-dumb.sh
> index 483578b2d7..350c47097b 100755
> --- a/t/t5550-http-fetch-dumb.sh
> +++ b/t/t5550-http-fetch-dumb.sh
> @@ -176,6 +176,18 @@ test_expect_success 'fetch changes via http' '
>  	test_cmp file clone/file
>  '
>
> +test_expect_success 'fetch changes via flakey http' '
> +	echo content >>file &&
> +	git commit -a -m three &&
> +	git push public &&
> +	(cd clone &&
> +		git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/dumb/repo.git &&
> +		git pull 2>../err &&
> +		git remote set-url origin $HTTPD_URL/dumb/repo.git) &&
> +    test_i18ngrep "got HTTP response 502" err &&
> +	test_cmp file clone/file
> +'
> +
>  test_expect_success 'fetch changes via manual http-fetch' '
>  	cp -R clone-tmpl clone2 &&
>
> diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> index e40e9ed52f..85d2a0e8b8 100755
> --- a/t/t5551-http-fetch-smart.sh
> +++ b/t/t5551-http-fetch-smart.sh
> @@ -45,6 +45,7 @@ test_expect_success 'clone http repository' '
>  	EOF
>  	GIT_TRACE_CURL=true GIT_TEST_PROTOCOL_VERSION=0 \
>  		git clone --quiet $HTTPD_URL/smart/repo.git clone 2>err &&
> +    cd clone && git config pull.rebase false && cd .. &&

Better: test_config -C clone pull.rebase false

>  	test_cmp file clone/file &&
>  	tr '\''\015'\'' Q <err |
>  	sed -e "
> @@ -103,6 +104,19 @@ test_expect_success 'fetch changes via http' '
>  	test_cmp file clone/file
>  '
>
> +test_expect_success 'fetch changes via flakey http' '
> +	echo content >>file &&
> +	git commit -a -m two &&
> +	git push public &&
> +	(cd clone &&
> +		git remote set-url origin $HTTPD_URL/error_ntime/`gen_nonce`/3/502/1/smart/repo.git &&
> +		git pull 2>../err &&
> +		git remote set-url origin $HTTPD_URL/smart/repo.git) &&
> +	test_cmp file clone/file &&
> +    test_i18ngrep "got HTTP response 502" err
> +'
> +
> +
>  test_expect_success 'used upload-pack service' '
>  	cat >exp <<-\EOF &&
>  	GET  /smart/repo.git/info/refs?service=git-upload-pack HTTP/1.1 200
> diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
> index 71d4307001..9988e3ff14 100755
> --- a/t/t5601-clone.sh
> +++ b/t/t5601-clone.sh
> @@ -757,13 +757,17 @@ test_expect_success 'partial clone using HTTP' '
>  '
>
>  test_expect_success 'partial clone using HTTP with redirect' '
> -    _NONCE=`gen_nonce` && export _NONCE &&
> +    _NONCE=`gen_nonce` &&
>      curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
>      curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
>      curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
>  	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server"
>  '
>
> +test_expect_success 'partial clone with retry' '
> +	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/`gen_nonce`/3/429/1/smart/server" 2>err &&
> +    test_i18ngrep "got HTTP response 429" err
> +'

I wonder whether it is really necessary to add _that_ many test cases. The
test suite already takes so long to run that we had cases where
contributors simply did not run it before sending their contributions.

In this instance, I would think that it would be plenty sufficient to have
a single new test case that exercizes the added code path (and verifies
that it can see the message).

Thanks,
Dscho

>
>  # DO NOT add non-httpd-specific tests here, because the last part of this
>  # test script is only executed when httpd is available and enabled.
> --
> 2.28.0.1011.ga647a8990f-goog
>
>

^ permalink raw reply	[flat|nested] 13+ messages in thread

* [PATCH] remote-curl: add testing for intelligent retry for HTTP
       [not found] <20201012184806.166251-1-smcallis@google.com>
  2020-10-12 18:48 ` [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA Sean McAllister
  2020-10-12 18:48 ` [PATCH 3/3] http: automatically retry some requests Sean McAllister
@ 2020-10-12 20:19 ` Sean McAllister
  2020-10-12 20:51   ` Junio C Hamano
  2 siblings, 1 reply; 13+ messages in thread
From: Sean McAllister @ 2020-10-12 20:19 UTC (permalink / raw)
  To: git, gitster, peff, masayasuzuki; +Cc: Sean McAllister

HTTP servers can sometimes throw errors, but that doesn't mean we should
give up.  If we have an error condition that we can reasonably retry on,
then we should.

This change is tricky because it requires a new CGI script to test as we
need to be able to instruct the server to throw an error(s) before
succeeding.  We do this by encoding an error code and optional value for
Retry-After into the URL, followed by the real endpoint:

  /error_ntime/dc724af1/<N>/429/10/smart/server

This is a bit hacky, but really the best we can do since HTTP is
fundamentally stateless.  The URL is uniquefied by a nonce and we leave
a breadcrumb on disk so all accesses after the first <N> redirect to the
appropriate endpoint.

Signed-off-by: Sean McAllister <smcallis@google.com>
---
 t/lib-httpd.sh             |  6 +++
 t/lib-httpd/apache.conf    |  9 +++++
 t/lib-httpd/error-ntime.sh | 79 ++++++++++++++++++++++++++++++++++++++
 t/t5601-clone.sh           |  9 +++++
 4 files changed, 103 insertions(+)
 create mode 100755 t/lib-httpd/error-ntime.sh

diff --git a/t/lib-httpd.sh b/t/lib-httpd.sh
index d2edfa4c50..da1d4adfb4 100644
--- a/t/lib-httpd.sh
+++ b/t/lib-httpd.sh
@@ -132,6 +132,7 @@ prepare_httpd() {
 	install_script incomplete-length-upload-pack-v2-http.sh
 	install_script incomplete-body-upload-pack-v2-http.sh
 	install_script broken-smart-http.sh
+	install_script error-ntime.sh
 	install_script error-smart-http.sh
 	install_script error.sh
 	install_script apply-one-time-perl.sh
@@ -308,3 +309,8 @@ check_access_log() {
 		test_cmp "$1" access.log.stripped
 	fi
 }
+
+# generate a random 12 digit string
+gen_nonce() {
+    test_copy_bytes 12 < /dev/urandom | tr -dc A-Za-z0-9
+}
diff --git a/t/lib-httpd/apache.conf b/t/lib-httpd/apache.conf
index afa91e38b0..77c495e164 100644
--- a/t/lib-httpd/apache.conf
+++ b/t/lib-httpd/apache.conf
@@ -117,6 +117,12 @@ Alias /auth/dumb/ www/auth/dumb/
 	SetEnv GIT_EXEC_PATH ${GIT_EXEC_PATH}
 	SetEnv GIT_HTTP_EXPORT_ALL
 </LocationMatch>
+
+# This may be suffixed with any path for redirection, so it should come before
+# any of the other aliases, particularly the /smart_*[^/]*/(.*) alias as that can
+# match a lot of URLs
+ScriptAlias /error_ntime/ error-ntime.sh/
+
 ScriptAlias /smart/incomplete_length/git-upload-pack incomplete-length-upload-pack-v2-http.sh/
 ScriptAlias /smart/incomplete_body/git-upload-pack incomplete-body-upload-pack-v2-http.sh/
 ScriptAliasMatch /error_git_upload_pack/(.*)/git-upload-pack error.sh/
@@ -137,6 +143,9 @@ ScriptAliasMatch /one_time_perl/(.*) apply-one-time-perl.sh/$1
 <Files broken-smart-http.sh>
 	Options ExecCGI
 </Files>
+<Files error-ntime.sh>
+	Options ExecCGI
+</Files>
 <Files error-smart-http.sh>
 	Options ExecCGI
 </Files>
diff --git a/t/lib-httpd/error-ntime.sh b/t/lib-httpd/error-ntime.sh
new file mode 100755
index 0000000000..e4f91ab816
--- /dev/null
+++ b/t/lib-httpd/error-ntime.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+# Script to simulate a transient error code with Retry-After header set.
+#
+# PATH_INFO must be of the form /<nonce>/<times>/<retcode>/<retry-after>/<path>
+#   (eg: /dc724af1/3/429/10/some/url)
+#
+# The <nonce> value uniquely identifies the URL, since we're simulating
+# a stateful operation using a stateless protocol, we need a way to "namespace"
+# URLs so that they don't step on each other.
+#
+# The first <times> times this endpoint is called, it will return the given
+# <retcode>, and if the <retry-after> is non-negative, it will set the
+# Retry-After head to that value.
+#
+# Subsequent calls will return a 302 redirect to <path>.
+#
+# Supported error codes are 429,502,503, and 504
+
+print_status() {
+      if [ "$1" -eq "302" ]; then printf "Status: 302 Found\n"
+    elif [ "$1" -eq "429" ]; then printf "Status: 429 Too Many Requests\n"
+    elif [ "$1" -eq "502" ]; then printf "Status: 502 Bad Gateway\n"
+    elif [ "$1" -eq "503" ]; then printf "Status: 503 Service Unavailable\n"
+    elif [ "$1" -eq "504" ]; then printf "Status: 504 Gateway Timeout\n"
+    else
+        printf "Status: 500 Internal Server Error\n"
+    fi
+    printf "Content-Type: text/plain\n"
+}
+
+# read in path split into cmoponents
+IFS='/'
+tokens=($PATH_INFO)
+
+# break out code/retry parts of path
+nonce=${tokens[1]}
+times=${tokens[2]}
+code=${tokens[3]}
+retry=${tokens[4]}
+
+# get redirect path
+cnt=0
+path=""
+for ((ii=0; ii < ${#PATH_INFO}; ii++)); do
+    if [ "${PATH_INFO:${ii}:1}" == "/" ]; then
+        let cnt=${cnt}+1
+    fi
+    if [ "${cnt}" -eq 5 ]; then
+        path="${PATH_INFO:${ii}}"
+        break
+    fi
+done
+
+# leave a cookie for this request/retry count
+state_file="request_${REMOTE_ADDR}_${nonce}_${times}_${code}_${retry}"
+
+if [ ! -f "$state_file" ]; then
+    echo 0 > "$state_file"
+fi
+
+
+read cnt < "$state_file"
+if [ "$cnt" -lt "$times" ]; then
+    let cnt=cnt+1
+    echo "$cnt" > "$state_file"
+
+    # return error
+    print_status "$code"
+    if [ "$retry" -ge "0" ]; then
+        printf "Retry-After: %s\n" "$retry"
+    fi
+else
+    # redirect
+    print_status 302
+    printf "Location: %s?%s\n" "$path" "${QUERY_STRING}"
+fi
+
+echo
diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
index 7df3c5373a..71d4307001 100755
--- a/t/t5601-clone.sh
+++ b/t/t5601-clone.sh
@@ -756,6 +756,15 @@ test_expect_success 'partial clone using HTTP' '
 	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server"
 '
 
+test_expect_success 'partial clone using HTTP with redirect' '
+    _NONCE=`gen_nonce` && export _NONCE &&
+    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
+    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
+    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
+	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server"
+'
+
+
 # DO NOT add non-httpd-specific tests here, because the last part of this
 # test script is only executed when httpd is available and enabled.
 
-- 
2.28.0.1011.ga647a8990f-goog


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] remote-curl: add testing for intelligent retry for HTTP
  2020-10-12 20:19 ` [PATCH] remote-curl: add testing for intelligent retry for HTTP Sean McAllister
@ 2020-10-12 20:51   ` Junio C Hamano
  2020-10-12 22:20     ` Sean McAllister
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2020-10-12 20:51 UTC (permalink / raw)
  To: Sean McAllister; +Cc: git, peff, masayasuzuki

Sean McAllister <smcallis@google.com> writes:

> +# generate a random 12 digit string
> +gen_nonce() {
> +    test_copy_bytes 12 < /dev/urandom | tr -dc A-Za-z0-9
> +}

What is the randomness requirement of this application?  IOW, what
breaks if we just change this to "echo 0123456789ab"?

Or "date | git hash-object --stdin" for that matter?

We'd want to make our tests more predictiable, not less.

> diff --git a/t/lib-httpd/error-ntime.sh b/t/lib-httpd/error-ntime.sh
> new file mode 100755
> index 0000000000..e4f91ab816
> --- /dev/null
> +++ b/t/lib-httpd/error-ntime.sh
> @@ -0,0 +1,79 @@
> +#!/bin/sh
> +
> +# Script to simulate a transient error code with Retry-After header set.
> +#
> +# PATH_INFO must be of the form /<nonce>/<times>/<retcode>/<retry-after>/<path>
> +#   (eg: /dc724af1/3/429/10/some/url)
> +#
> +# The <nonce> value uniquely identifies the URL, since we're simulating
> +# a stateful operation using a stateless protocol, we need a way to "namespace"
> +# URLs so that they don't step on each other.
> +#
> +# The first <times> times this endpoint is called, it will return the given
> +# <retcode>, and if the <retry-after> is non-negative, it will set the
> +# Retry-After head to that value.
> +#
> +# Subsequent calls will return a 302 redirect to <path>.
> +#
> +# Supported error codes are 429,502,503, and 504
> +
> +print_status() {
> +      if [ "$1" -eq "302" ]; then printf "Status: 302 Found\n"
> +    elif [ "$1" -eq "429" ]; then printf "Status: 429 Too Many Requests\n"
> +    elif [ "$1" -eq "502" ]; then printf "Status: 502 Bad Gateway\n"
> +    elif [ "$1" -eq "503" ]; then printf "Status: 503 Service Unavailable\n"
> +    elif [ "$1" -eq "504" ]; then printf "Status: 504 Gateway Timeout\n"
> +    else
> +        printf "Status: 500 Internal Server Error\n"
> +    fi
> +    printf "Content-Type: text/plain\n"

Style????? (I won't repeat this comment for the rest of this script)

I briefly wondered "oh, are t/lib-httpd/* scripts excempt from the
coding guidelines?" but a quick look at them tells me that that is
not the case.

> +}
> +
> +# read in path split into cmoponents
> +IFS='/'
> +tokens=($PATH_INFO)
> +
> +# break out code/retry parts of path
> +nonce=${tokens[1]}
> +times=${tokens[2]}
> +code=${tokens[3]}
> +retry=${tokens[4]}

You said /bin/sh upfront.  Don't use non-POSIX shell arrays.

> +
> +# get redirect path
> +cnt=0
> +path=""
> +for ((ii=0; ii < ${#PATH_INFO}; ii++)); do
> +    if [ "${PATH_INFO:${ii}:1}" == "/" ]; then
> +        let cnt=${cnt}+1
> +    fi
> +    if [ "${cnt}" -eq 5 ]; then
> +        path="${PATH_INFO:${ii}}"
> +        break
> +    fi
> +done
> +
> +# leave a cookie for this request/retry count
> +state_file="request_${REMOTE_ADDR}_${nonce}_${times}_${code}_${retry}"
> +
> +if [ ! -f "$state_file" ]; then
> +    echo 0 > "$state_file"
> +fi
> +
> +
> +read cnt < "$state_file"
> +if [ "$cnt" -lt "$times" ]; then
> +    let cnt=cnt+1
> +    echo "$cnt" > "$state_file"
> +
> +    # return error
> +    print_status "$code"
> +    if [ "$retry" -ge "0" ]; then
> +        printf "Retry-After: %s\n" "$retry"
> +    fi
> +else
> +    # redirect
> +    print_status 302
> +    printf "Location: %s?%s\n" "$path" "${QUERY_STRING}"
> +fi
> +
> +echo
> diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
> index 7df3c5373a..71d4307001 100755
> --- a/t/t5601-clone.sh
> +++ b/t/t5601-clone.sh
> @@ -756,6 +756,15 @@ test_expect_success 'partial clone using HTTP' '
>  	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server"
>  '
>  
> +test_expect_success 'partial clone using HTTP with redirect' '
> +    _NONCE=`gen_nonce` && export _NONCE &&
> +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
> +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
> +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&

These lines are not indented with HT?

Don't redirect test output to /dev/null, which is done by test_expect_success
for us.  >/dev/null makes it less useful to run the test under "-v" option.

> +	partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server"
> +'
> +
> +
>  # DO NOT add non-httpd-specific tests here, because the last part of this
>  # test script is only executed when httpd is available and enabled.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] http: automatically retry some requests
  2020-10-12 20:15   ` Johannes Schindelin
@ 2020-10-12 21:00     ` Junio C Hamano
  2020-10-13 15:03     ` Sean McAllister
  1 sibling, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2020-10-12 21:00 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Sean McAllister, git, peff, masayasuzuki

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Hi Sean,
>
> On Mon, 12 Oct 2020, Sean McAllister wrote:
>
>> Some HTTP response codes indicate a server state that can support
>> retrying the request rather than immediately erroring out.  The server
>> can also provide information about how long to wait before retries to
>> via the Retry-After header.  So check the server response and retry
>> some reasonable number of times before erroring out to better accomodate
>> transient errors.
>>
>> Exiting immediately becomes irksome when pulling large multi-repo code
>> bases such as Android or Chromium, as often the entire fetch operation
>> has to be restarted from the beginning due to an error in one repo. If
>> we can reduce how often that occurs, then it's a big win.
>
> Makes a lot of sense to me.
> ...
>> +http.retryLimit::
>> +	Some HTTP error codes (eg: 429,503) can reasonably be retried if
>
> Please have a space after the comma so that it is not being mistaken for a
> 6-digit number. Also, aren't they called "status codes"? Not all of them
> indicate errors, after all.
> ...

I've read your comments and agree to them all.  Thanks for a
detailed and excellent review.


^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] remote-curl: add testing for intelligent retry for HTTP
  2020-10-12 20:51   ` Junio C Hamano
@ 2020-10-12 22:20     ` Sean McAllister
  2020-10-12 22:30       ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Sean McAllister @ 2020-10-12 22:20 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, peff, masayasuzuki

> Sean McAllister <smcallis@google.com> writes:
>
> > +# generate a random 12 digit string
> > +gen_nonce() {
> > +    test_copy_bytes 12 < /dev/urandom | tr -dc A-Za-z0-9
> > +}
>
> What is the randomness requirement of this application?  IOW, what
> breaks if we just change this to "echo 0123456789ab"?
>
> Or "date | git hash-object --stdin" for that matter?
>
> We'd want to make our tests more predictiable, not less.

The randomness requirement is just that I need nonces to be unique
during a single run of the HTTP server
as they uniquefy the files I put on disk to make the HTTP hack-ily
stateful.  I'd be fine with your date/hash-object
solution, but I don't know that it will help make the tests more predictable.

>
> > diff --git a/t/lib-httpd/error-ntime.sh b/t/lib-httpd/error-ntime.sh
> > new file mode 100755
> > index 0000000000..e4f91ab816
> > --- /dev/null
> > +++ b/t/lib-httpd/error-ntime.sh
> > @@ -0,0 +1,79 @@
> > +#!/bin/sh
> > +
> > +# Script to simulate a transient error code with Retry-After header set.
> > +#
> > +# PATH_INFO must be of the form /<nonce>/<times>/<retcode>/<retry-after>/<path>
> > +#   (eg: /dc724af1/3/429/10/some/url)
> > +#
> > +# The <nonce> value uniquely identifies the URL, since we're simulating
> > +# a stateful operation using a stateless protocol, we need a way to "namespace"
> > +# URLs so that they don't step on each other.
> > +#
> > +# The first <times> times this endpoint is called, it will return the given
> > +# <retcode>, and if the <retry-after> is non-negative, it will set the
> > +# Retry-After head to that value.
> > +#
> > +# Subsequent calls will return a 302 redirect to <path>.
> > +#
> > +# Supported error codes are 429,502,503, and 504
> > +
> > +print_status() {
> > +      if [ "$1" -eq "302" ]; then printf "Status: 302 Found\n"
> > +    elif [ "$1" -eq "429" ]; then printf "Status: 429 Too Many Requests\n"
> > +    elif [ "$1" -eq "502" ]; then printf "Status: 502 Bad Gateway\n"
> > +    elif [ "$1" -eq "503" ]; then printf "Status: 503 Service Unavailable\n"
> > +    elif [ "$1" -eq "504" ]; then printf "Status: 504 Gateway Timeout\n"
> > +    else
> > +        printf "Status: 500 Internal Server Error\n"
> > +    fi
> > +    printf "Content-Type: text/plain\n"
>
> Style????? (I won't repeat this comment for the rest of this script)
>
> I briefly wondered "oh, are t/lib-httpd/* scripts excempt from the
> coding guidelines?" but a quick look at them tells me that that is
> not the case.
>

I mistakenly thought the Makefile in t/ was linting these as well.
I've gone back through and fixed formatting issues and removed
non-posix constructs.

> > +}
> > +
> > +# read in path split into cmoponents
> > +IFS='/'
> > +tokens=($PATH_INFO)
> > +
> > +# break out code/retry parts of path
> > +nonce=${tokens[1]}
> > +times=${tokens[2]}
> > +code=${tokens[3]}
> > +retry=${tokens[4]}
>
> You said /bin/sh upfront.  Don't use non-POSIX shell arrays.
>
> > +
> > +# get redirect path
> > +cnt=0
> > +path=""
> > +for ((ii=0; ii < ${#PATH_INFO}; ii++)); do
> > +    if [ "${PATH_INFO:${ii}:1}" == "/" ]; then
> > +        let cnt=${cnt}+1
> > +    fi
> > +    if [ "${cnt}" -eq 5 ]; then
> > +        path="${PATH_INFO:${ii}}"
> > +        break
> > +    fi
> > +done
> > +
> > +# leave a cookie for this request/retry count
> > +state_file="request_${REMOTE_ADDR}_${nonce}_${times}_${code}_${retry}"
> > +
> > +if [ ! -f "$state_file" ]; then
> > +    echo 0 > "$state_file"
> > +fi
> > +
> > +
> > +read cnt < "$state_file"
> > +if [ "$cnt" -lt "$times" ]; then
> > +    let cnt=cnt+1
> > +    echo "$cnt" > "$state_file"
> > +
> > +    # return error
> > +    print_status "$code"
> > +    if [ "$retry" -ge "0" ]; then
> > +        printf "Retry-After: %s\n" "$retry"
> > +    fi
> > +else
> > +    # redirect
> > +    print_status 302
> > +    printf "Location: %s?%s\n" "$path" "${QUERY_STRING}"
> > +fi
> > +
> > +echo
> > diff --git a/t/t5601-clone.sh b/t/t5601-clone.sh
> > index 7df3c5373a..71d4307001 100755
> > --- a/t/t5601-clone.sh
> > +++ b/t/t5601-clone.sh
> > @@ -756,6 +756,15 @@ test_expect_success 'partial clone using HTTP' '
> >       partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/smart/server"
> >  '
> >
> > +test_expect_success 'partial clone using HTTP with redirect' '
> > +    _NONCE=`gen_nonce` && export _NONCE &&
> > +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
> > +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
> > +    curl "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server" > /dev/null &&
>
> These lines are not indented with HT?
>
> Don't redirect test output to /dev/null, which is done by test_expect_success
> for us.  >/dev/null makes it less useful to run the test under "-v" option.
>

Fixed in v2.

> > +     partial_clone "$HTTPD_DOCUMENT_ROOT_PATH/server" "$HTTPD_URL/error_ntime/${_NONCE}/3/502/10/smart/server"
> > +'
> > +
> > +
> >  # DO NOT add non-httpd-specific tests here, because the last part of this
> >  # test script is only executed when httpd is available and enabled.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] remote-curl: add testing for intelligent retry for HTTP
  2020-10-12 22:20     ` Sean McAllister
@ 2020-10-12 22:30       ` Junio C Hamano
  2020-10-13 14:25         ` Johannes Schindelin
  0 siblings, 1 reply; 13+ messages in thread
From: Junio C Hamano @ 2020-10-12 22:30 UTC (permalink / raw)
  To: Sean McAllister; +Cc: git, peff, masayasuzuki

Sean McAllister <smcallis@google.com> writes:

>> Sean McAllister <smcallis@google.com> writes:
>>
>> > +# generate a random 12 digit string
>> > +gen_nonce() {
>> > +    test_copy_bytes 12 < /dev/urandom | tr -dc A-Za-z0-9
>> > +}
>>
>> What is the randomness requirement of this application?  IOW, what
>> breaks if we just change this to "echo 0123456789ab"?
>>
>> Or "date | git hash-object --stdin" for that matter?
>>
>> We'd want to make our tests more predictiable, not less.
>
> The randomness requirement is just that I need nonces to be unique
> during a single run of the HTTP server
> as they uniquefy the files I put on disk to make the HTTP hack-ily
> stateful.  I'd be fine with your date/hash-object
> solution, but I don't know that it will help make the tests more predictable.

If so, would something like this be

    global_counter_for_nonce=0
    gen_nonce () {
	global_counter_for_nonce=$(( global_counter_for_nonce + 1 )) &&
	echo "$global_counter_for_nonce"
    }

more appropriate?  It is utterly predictable and yields the same
answer only once during a single run.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] remote-curl: add testing for intelligent retry for HTTP
  2020-10-12 22:30       ` Junio C Hamano
@ 2020-10-13 14:25         ` Johannes Schindelin
  2020-10-13 17:29           ` Junio C Hamano
  0 siblings, 1 reply; 13+ messages in thread
From: Johannes Schindelin @ 2020-10-13 14:25 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: Sean McAllister, git, peff, masayasuzuki

Hi Junio,

On Mon, 12 Oct 2020, Junio C Hamano wrote:

> Sean McAllister <smcallis@google.com> writes:
>
> >> Sean McAllister <smcallis@google.com> writes:
> >>
> >> > +# generate a random 12 digit string
> >> > +gen_nonce() {
> >> > +    test_copy_bytes 12 < /dev/urandom | tr -dc A-Za-z0-9
> >> > +}
> >>
> >> What is the randomness requirement of this application?  IOW, what
> >> breaks if we just change this to "echo 0123456789ab"?
> >>
> >> Or "date | git hash-object --stdin" for that matter?
> >>
> >> We'd want to make our tests more predictiable, not less.
> >
> > The randomness requirement is just that I need nonces to be unique
> > during a single run of the HTTP server
> > as they uniquefy the files I put on disk to make the HTTP hack-ily
> > stateful.  I'd be fine with your date/hash-object
> > solution, but I don't know that it will help make the tests more predictable.
>
> If so, would something like this be
>
>     global_counter_for_nonce=0
>     gen_nonce () {
> 	global_counter_for_nonce=$(( global_counter_for_nonce + 1 )) &&
> 	echo "$global_counter_for_nonce"
>     }
>
> more appropriate?  It is utterly predictable and yields the same
> answer only once during a single run.

We should also consider using `test-tool genrandom <seed>` instead (where
`<seed>` would have to be predictable, but probably would have to change
between `gen_nonce()` calls).

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] http: automatically retry some requests
  2020-10-12 20:15   ` Johannes Schindelin
  2020-10-12 21:00     ` Junio C Hamano
@ 2020-10-13 15:03     ` Sean McAllister
  2020-10-13 17:45       ` Junio C Hamano
  1 sibling, 1 reply; 13+ messages in thread
From: Sean McAllister @ 2020-10-13 15:03 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: git, Junio C Hamano, peff, Masaya Suzuki

> >
> > +http.retryLimit::
> > +     Some HTTP error codes (eg: 429,503) can reasonably be retried if
>
> Please have a space after the comma so that it is not being mistaken for a
> 6-digit number. Also, aren't they called "status codes"? Not all of them
> indicate errors, after all.

Done for v2, good point on the nomenclature.

> > diff --git a/http.c b/http.c
> > index b3c1669388..e41d7c5575 100644
> > --- a/http.c
> > +++ b/http.c
> > @@ -92,6 +92,9 @@ static const char *http_proxy_ssl_key;
> >  static const char *http_proxy_ssl_ca_info;
> >  static struct credential proxy_cert_auth = CREDENTIAL_INIT;
> >  static int proxy_ssl_cert_password_required;
> > +static int http_retry_limit = 3;
> > +static int http_default_delay = 2;
>
> Should there be a config option for that? Also, it took me some time to
> find the code using this variable in order to find out what unit to use:
> it is seconds (not microseconds, as I had expected). Maybe this can be
> documented in the variable name, or at least in a comment?
>

Junio tossed that out during our private review and I think we decided to just
leave them as non-const so we have that option going forward.  I'm not
sure there's
a strong story for configuring the default delay.

I went through and changed all the _delay variables to be more explicit they're
in seconds.

> > @@ -219,6 +222,51 @@ size_t fwrite_null(char *ptr, size_t eltsize, size_t nmemb, void *strbuf)
> >       return nmemb;
> >  }
> >
> > +
> > +/* return 1 for a retryable HTTP code, 0 otherwise */
> > +static int retryable_code(int code)
> > +{
> > +     switch(code) {
> > +     case 429: /* fallthrough */
> > +     case 502: /* fallthrough */
> > +     case 503: /* fallthrough */
> > +     case 504: return 1;
> > +     default:  return 0;
> > +     }
> > +}
> > +
> > +size_t http_header_value(
>
> In the part of Git's code with which I am familiar, we avoid trying to
> break the line after an opening parenthesis, instead preferring to break
> after a comma.
>
> Also, shouldn't we make the return type `ssize_t` to allow for a negative
> value to indicate an error/missing header?

I return zero to indicate no header found (or zero-length value), so this should
never return a negative value.  One can check value to see if a string
was allocated
for a zero-length header.

>
> > +     const struct strbuf headers, const char *header, char **value)
> > +{
> > +     size_t len = 0;
> > +     struct strbuf **lines, **line;
> > +     char *colon = NULL;
> > +
> > +     lines = strbuf_split(&headers, '\n');
> > +     for (line = lines; *line; line++) {
> > +             strbuf_trim(*line);
> > +
> > +             /* find colon and null it out to 'split' string */
> > +             colon = strchr((*line)->buf, ':');
> > +             if (colon) {
> > +                     *colon = '\0';
> > +
> > +                     if (!strcasecmp(header, (*line)->buf)) {
>
> If all we want is to find the given header, splitting lines seems to be a
> bit wasteful to me. We could instead search for the header directly:
>
>         const char *p = strcasestr(headers.buf, header), *eol;
>         size_t header_len = strlen(header);
>
>         while (p) {
>                 if ((p != headers.buf && p[-1] != '\n') ||
>                     p[header_len] != ':') {
>                         p = strcasestr(p + header_len, header);
>                         continue;
>                 }
>
>                 p += header_len + 1;
>                 while (isspace(*p) && *p != '\n')
>                         p++;
>                 eol = strchrnul(p, '\n');
>                 len =  eol - p;
>                 *value = xstrndup(p, len);
>                 return len;
>         }
>

I've been writing a lot of python code lately =D  So splitting into
lines was a natural paradigm for me.  You're right, I like yours more.  I've
refactored it to be closer to that.  Little bit of fiddling to deal with header
whitespace properly, but it's pretty close.

I also modified to just return the value pointer directly, then it's clear when
we get a zero-length header or don't find it completely, and it fixes the
leak issue below.

> > @@ -1903,20 +1958,55 @@ static void http_opt_request_remainder(CURL *curl, off_t pos)
> >  #define HTTP_REQUEST_STRBUF  0
> >  #define HTTP_REQUEST_FILE    1
> >
> > +/* check for a retry-after header in the given headers string, if found, then
> > +honor it, otherwise do an exponential backoff up to the max on the current delay
> > +*/
>
> Multi-line comments should be of this form:
>
>         /*
>          * Check for a retry-after header in the given headers string, if
>          * found, then honor it, otherwise do an exponential backoff up to
>          * the maximum on the current delay.
>          */
>
Done.

> > +static int http_retry_after(const struct strbuf headers, int cur_delay) {
>
> For functions, the initial opening curly bracket goes on its own line.
>
Done.

> > +     int len, delay;
> > +     char *end;
> > +     char *value;
>
> Why not declare `char *end, *value;`, just like `len` and `delay` are
> declared on the same line?
>
Done.

> > +
> > +     len = http_header_value(headers, "retry-after", &value);
> > +     if (len) {
> > +             delay = strtol(value, &end, 0);
> > +             if (*value && *end == 0 && delay >= 0) {
>
> Better: `*end == '\0'`
>
> And why `*value` here? We already called `strtol()` on it.
>
Fixed, and I check value per the man page on strtol:
    > In particular, if *nptr is not '\0' but **endptr is '\0' on
return, the entire string is valid.
I wanted to make sure I converted the entire header value, so this
seemed correct?

> > +                     if (delay > http_max_delay) {
> > +                             die(Q_(
>
> Let's not end the line in an opening parenthesis. Instead, use C's string
> continuation like so:
>
>                                 die(Q_("server requested retry after %d second,"
>                                        " which is longer than max allowed\n",
>                                        "server requested retry after %d "
>                                        "seconds, which is longer than max "
>                                        "allowed\n", delay), delay);
>
Done.

> > +                                             "server requested retry after %d second, which is longer than max allowed\n",
> > +                                             "server requested retry after %d seconds, which is longer than max allowed\n", delay), delay);
> > +                     }
> > +                     free(value);
>
> `value` is not actually used after that `strtol()` call above, so let's
> release it right then and there.
>
Good call, done.

> > +                     return delay;
> > +             }
> > +             free(value);
> > +     }
>
> If the header was found, but for some reason had an empty value, we're
> leaking `value` here.
>
I return the pointer directly and check that now, so if it's allocated, we'll
always call free, even if it's an empty string.

> > +
> > +     cur_delay *= 2;
> > +     return cur_delay >= http_max_delay ? http_max_delay : cur_delay;
> > +}
> > +
> >  static int http_request(const char *url,
> >                       void *result, int target,
> >                       const struct http_get_options *options)
> >  {
> >       struct active_request_slot *slot;
> >       struct slot_results results;
> > -     struct curl_slist *headers = http_copy_default_headers();
> > +     struct curl_slist *headers;
> >       struct strbuf buf = STRBUF_INIT;
> > +     struct strbuf result_headers = STRBUF_INIT;
> >       const char *accept_language;
> >       int ret;
> > +     int retry_cnt = 0;
> > +     int retry_delay = http_default_delay;
> > +     int http_code;
> >
> > +retry:
> >       slot = get_active_slot();
> >       curl_easy_setopt(slot->curl, CURLOPT_HTTPGET, 1);
> >
> > +     curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, &result_headers);
> > +     curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, fwrite_buffer);
> > +
> >       if (result == NULL) {
> >               curl_easy_setopt(slot->curl, CURLOPT_NOBODY, 1);
> >       } else {
> > @@ -1936,6 +2026,7 @@ static int http_request(const char *url,
> >
> >       accept_language = get_accept_language();
> >
> > +     headers = http_copy_default_headers();
> >       if (accept_language)
> >               headers = curl_slist_append(headers, accept_language);
> >
> > @@ -1961,7 +2052,31 @@ static int http_request(const char *url,
> >       curl_easy_setopt(slot->curl, CURLOPT_ENCODING, "");
> >       curl_easy_setopt(slot->curl, CURLOPT_FAILONERROR, 0);
> >
> > -     ret = run_one_slot(slot, &results);
> > +     http_code = 0;
> > +     ret = run_one_slot(slot, &results, &http_code);
> > +
> > +     if (ret != HTTP_OK) {
> > +             if (retryable_code(http_code) && (retry_cnt < http_retry_limit)) {
>
> The parentheses around the second condition should be dropped.
>
Done.

> > +                     retry_cnt++;
> > +                     retry_delay = http_retry_after(result_headers, retry_delay);
> > +                     fprintf(stderr,
>
> Should this be a `warning()` instead? I see 5 instances in `http.c` that
> use `fprintf(stderr, ...)`, but 12 that use `warning()`, making me believe
> that at least some of those 5 instances should call `warning()` instead,
> too.
>
At your discretion.  I'm not familiar with the logging framework.
Only one of those 5 fprintf
is in my patch, so I changed that one to use warning().

> > +                         Q_("got HTTP response %d, retrying after %d second (%d/%d)\n",
> > +                                "got HTTP response %d, retrying after %d seconds (%d/%d)\n",
> > +                                     retry_delay),
> > +                             http_code, retry_delay, retry_cnt, http_retry_limit);
> > +                     sleep(retry_delay);
> > +
> > +                     // remove header data fields since not all slots will use them
>
> No C++-style comments, please: use /* ... */ instead.
>
Thought I got them all.  Done.

> > +                     curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
> > +                     curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);
> > +
> > +                     goto retry;
> > +             }
> > +     }
> > +
> > +     // remove header data fields since not all slots will use them
>
> No C++-style comments, please.
>
Done.

> > +     curl_easy_setopt(slot->curl, CURLOPT_HEADERDATA, NULL);
> > +     curl_easy_setopt(slot->curl, CURLOPT_HEADERFUNCTION, NULL);
>
> Shouldn't we just perform this assignment before the `if (ret != HTTP_OK)`
> condition? I do not see anything inside that block that needs it,
> therefore this could be DRY'd up.
>
Excellent idea, done.

> > +/*
> > + * Query the value of an HTTP header.
> > + *
> > + * If the header is found, then a newly allocate string is returned through
> > + * the value parameter, and the length is returned.
> > + *
> > + * If not found, returns 0
> > + */
> > +size_t http_header_value(
> > +     const struct strbuf headers, const char *header, char **value);
>
> Do we really need to export this function? It could stay file-local, at
> least for now (i.e. be defined `static` inside `http.c`), no?
>
I originally thought I'd need it elsewhere, so a big of a relic there.  I made
it static for now.

> > --- a/t/t5539-fetch-http-shallow.sh
> > +++ b/t/t5539-fetch-http-shallow.sh
> > @@ -30,20 +30,39 @@ test_expect_success 'clone http repository' '
> >       git clone --bare --no-local shallow "$HTTPD_DOCUMENT_ROOT_PATH/repo.git" &&
> >       git clone $HTTPD_URL/smart/repo.git clone &&
> >       (
> > -     cd clone &&
> > -     git fsck &&
> > -     git log --format=%s origin/master >actual &&
> > -     cat <<EOF >expect &&
> > -7
> > -6
> > -5
> > -4
> > -3
> > -EOF
> > -     test_cmp expect actual
> > +             cd clone &&
> > +             git fsck &&
> > +             git log --format=%s origin/master >actual &&
> > +             cat <<-\EOF >expect &&
> > +             7
> > +             6
> > +             5
> > +             4
> > +             3
> > +             EOF
> > +             test_cmp expect actual
>
> This just changes the indentation, right?
>
> I _guess_ this is a good change, but it should live in its own patch.
>
This was in combination with my adding a new test, we cleaned up the
whitespace too per Junio rather than copy-and-pasting ill formatted code.

It's OBE now because this change has been removed in v2.

> > +test_expect_success 'clone http repository with flaky http' '
> > +    rm -rf clone &&
>
> Let's consistently use horizontal tab characters for indentation. (There
> are more instances of lines indented by spaces below.)
>
*sigh* thought I got them all, fixed.

> > +     git clone $HTTPD_URL/error_ntime/`gen_nonce`/3/429/1/smart/repo.git clone 2>err &&
>
> Let's use `$(gen_nonce)`. Also: where is the `gen_nonce` defined? I do not
> see the definition in this patch (but it could be 1/3, which for some
> reason did not make it to the mailing list:
> https://lore.kernel.org/git/20201012184806.166251-1-smcallis@google.com/).
>
Done, and it is indeed in 1/3 which got caught by the spam filter
(should be available now).

> Another suggestion: rather than deleting `clone/`, use a separate
> directory to clone into, say, `flaky/`. That will make it easier to debug
> when the entire "trash" directory is tar'ed up in a failed CI build, for
> example.
>
Done.

> > diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> > index e40e9ed52f..85d2a0e8b8 100755
> > --- a/t/t5551-http-fetch-smart.sh
> > +++ b/t/t5551-http-fetch-smart.sh
> > @@ -45,6 +45,7 @@ test_expect_success 'clone http repository' '
> >       EOF
> >       GIT_TRACE_CURL=true GIT_TEST_PROTOCOL_VERSION=0 \
> >               git clone --quiet $HTTPD_URL/smart/repo.git clone 2>err &&
> > +    cd clone && git config pull.rebase false && cd .. &&
>
> Better: test_config -C clone pull.rebase false
>
Done, but also OBE by removing tests.

>
> I wonder whether it is really necessary to add _that_ many test cases. The
> test suite already takes so long to run that we had cases where
> contributors simply did not run it before sending their contributions.
>
> In this instance, I would think that it would be plenty sufficient to have
> a single new test case that exercizes the added code path (and verifies
> that it can see the message).
>
Done, down to a single test (clone) that should exercise everything.

> Thanks,
> Dscho
>
> >
> >  # DO NOT add non-httpd-specific tests here, because the last part of this
> >  # test script is only executed when httpd is available and enabled.
> > --
> > 2.28.0.1011.ga647a8990f-goog
> >
> >

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH] remote-curl: add testing for intelligent retry for HTTP
  2020-10-13 14:25         ` Johannes Schindelin
@ 2020-10-13 17:29           ` Junio C Hamano
  0 siblings, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2020-10-13 17:29 UTC (permalink / raw)
  To: Johannes Schindelin; +Cc: Sean McAllister, git, peff, masayasuzuki

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> We should also consider using `test-tool genrandom <seed>` instead (where
> `<seed>` would have to be predictable, but probably would have to change
> between `gen_nonce()` calls).

Yup, that is exactly why I asked Sean about randomness requirement.

It turns out that they care only about uniqueness, so the comparison
is between keeping an ever-incrementing counter and (1) echoing its
current contents and/or (2) feeding it to "test-tool genrandom" as
the seed.  The complexity of the code _we_ need to write anew is the
same, but echo would probably be a win in both the number of forks
and cycles departments.

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: [PATCH 3/3] http: automatically retry some requests
  2020-10-13 15:03     ` Sean McAllister
@ 2020-10-13 17:45       ` Junio C Hamano
  0 siblings, 0 replies; 13+ messages in thread
From: Junio C Hamano @ 2020-10-13 17:45 UTC (permalink / raw)
  To: Sean McAllister; +Cc: Johannes Schindelin, git, peff, Masaya Suzuki

Sean McAllister <smcallis@google.com> writes:

>> > +static int http_retry_limit = 3;
>> > +static int http_default_delay = 2;
>>
>> Should there be a config option for that? Also, it took me some time to
>> find the code using this variable in order to find out what unit to use:
>> it is seconds (not microseconds, as I had expected). Maybe this can be
>> documented in the variable name, or at least in a comment?
>
> Junio tossed that out during our private review and I think we decided to just

Needs clarification.  Here "that" in "tossed that out" only refers
to "static int const http_retry_limit = 3" and friends and nothing
else.  There weren't any discussion on units or comments.  I did
mention that it is an obvious future possibility to make these
configurable and that was why I suggested to "toss out" the const.

It seems we'll see names with "seconds" in them somewhere, which is
good.

> I've been writing a lot of python code lately =D  So splitting into
> lines was a natural paradigm for me.  You're right, I like yours more.  I've
> refactored it to be closer to that.  Little bit of fiddling to deal with header
> whitespace properly, but it's pretty close.

Good.  I personally think strbuf_split() is a mistaken API whose use
needs to be killed, so it makes me happy to see one new callsite we
didn't have to add ;-)

Thanks.

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2020-10-13 17:45 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <20201012184806.166251-1-smcallis@google.com>
2020-10-12 18:48 ` [PATCH 2/3] replace CURLOPT_FILE With CURLOPT_WRITEDATA Sean McAllister
2020-10-12 19:26   ` Johannes Schindelin
2020-10-12 18:48 ` [PATCH 3/3] http: automatically retry some requests Sean McAllister
2020-10-12 20:15   ` Johannes Schindelin
2020-10-12 21:00     ` Junio C Hamano
2020-10-13 15:03     ` Sean McAllister
2020-10-13 17:45       ` Junio C Hamano
2020-10-12 20:19 ` [PATCH] remote-curl: add testing for intelligent retry for HTTP Sean McAllister
2020-10-12 20:51   ` Junio C Hamano
2020-10-12 22:20     ` Sean McAllister
2020-10-12 22:30       ` Junio C Hamano
2020-10-13 14:25         ` Johannes Schindelin
2020-10-13 17:29           ` Junio C Hamano

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).