git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
* [PATCH] handle http urls with query string ("?foo") correctly
@ 2008-06-04 23:28 David ‘Bombe’ Roden
  2008-06-05  0:12 ` Johannes Schindelin
  0 siblings, 1 reply; 8+ messages in thread
From: David ‘Bombe’ Roden @ 2008-06-04 23:28 UTC (permalink / raw
  To: git

[-- Attachment #1: Type: text/plain, Size: 6792 bytes --]

From 1f55b9176248673b78c77ff292fb5c6c988a7f8b Mon Sep 17 00:00:00 2001
From: =?utf-8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= <bombe@pterodactylus.net>
Date: Thu, 5 Jun 2008 00:53:56 +0200
Subject: [PATCH] handle http urls with query string ("?foo") correctly

---
I'm a developer for the Freenet Project[1] and tried to get git running with Freenet's http-gateway. For several reasons I have to use URLs 
like "http://host:8888/USK@<lots-of-stuff>/project.git/4?type=text/plain". This breaks the way git creates http URLs to retrieve. The 
following patch remedys this situation by inserting the git-generated part of the URL (like "/info/refs") before the query string.

 http-walker.c |   31 ++++++++++++-------------------
 http.c        |   43 ++++++++++++++++++++++++++++++++++---------
 http.h        |    2 ++
 transport.c   |    5 +++--
 4 files changed, 51 insertions(+), 30 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index 99f397e..711e55b 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -100,7 +100,6 @@ static void start_object_request(struct walker *walker,
 	char *hex = sha1_to_hex(obj_req->sha1);
 	char prevfile[PATH_MAX];
 	char *url;
-	char *posn;
 	int prevlocal;
 	unsigned char prev_buf[PREV_BUF_SIZE];
 	ssize_t prev_read = 0;
@@ -146,16 +145,11 @@ static void start_object_request(struct walker *walker,
 
 	SHA1_Init(&obj_req->c);
 
-	url = xmalloc(strlen(obj_req->repo->base) + 51);
+	char *suffix = xmalloc(51);
 	obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51);
-	strcpy(url, obj_req->repo->base);
-	posn = url + strlen(obj_req->repo->base);
-	strcpy(posn, "/objects/");
-	posn += 9;
-	memcpy(posn, hex, 2);
-	posn += 2;
-	*(posn++) = '/';
-	strcpy(posn, hex + 2);
+	strcpy(suffix, "/objects/");
+	sprintf(suffix + 9, "%c%c/%s", hex[0], hex[1], hex + 2);
+	url = transform_url(obj_req->repo->base, suffix);
 	strcpy(obj_req->url, url);
 
 	/* If a previous temp file is present, process what was already
@@ -384,8 +378,9 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting index for pack %s\n", hex);
 
-	url = xmalloc(strlen(repo->base) + 64);
-	sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
+	char *index_url = xmalloc(64);
+	sprintf(index_url, "/objects/pack/pack-%s.idx", hex);
+	url = transform_url(repo->base, index_url);
 
 	filename = sha1_pack_index_name(sha1);
 	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
@@ -608,8 +603,7 @@ static void fetch_alternates(struct walker *walker, const char *base)
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting alternates list for %s\n", base);
 
-	url = xmalloc(strlen(base) + 31);
-	sprintf(url, "%s/objects/info/http-alternates", base);
+	url = transform_url(base, "/objects/info/http-alternates");
 
 	/* Use a callback to process the result, since another request
 	   may fail and need to have alternates loaded before continuing */
@@ -655,8 +649,7 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo)
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting pack list for %s\n", repo->base);
 
-	url = xmalloc(strlen(repo->base) + 21);
-	sprintf(url, "%s/objects/info/packs", repo->base);
+	url = transform_url(repo->base, "/objects/info/packs");
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -739,9 +732,9 @@ static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned cha
 			sha1_to_hex(sha1));
 	}
 
-	url = xmalloc(strlen(repo->base) + 65);
-	sprintf(url, "%s/objects/pack/pack-%s.pack",
-		repo->base, sha1_to_hex(target->sha1));
+	char *pack_url = xmalloc(65);
+	sprintf(pack_url, "/objects/pack/pack-%s.pack", sha1_to_hex(target->sha1));
+	url = transform_url(repo->base, pack_url);
 
 	filename = sha1_pack_name(target->sha1);
 	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
diff --git a/http.c b/http.c
index 2a21ccb..e2b764a 100644
--- a/http.c
+++ b/http.c
@@ -590,15 +590,17 @@ static char *quote_ref_url(const char *base, const char *ref)
 	qref = xmalloc(len);
 	memcpy(qref, base, baselen);
 	dp = qref + baselen;
-	*(dp++) = '/';
-	for (cp = ref; (ch = *cp) != 0; cp++) {
-		if (needs_quote(ch)) {
-			*dp++ = '%';
-			*dp++ = hex((ch >> 4) & 0xF);
-			*dp++ = hex(ch & 0xF);
+	if (*ref) {
+		*(dp++) = '/';
+		for (cp = ref; (ch = *cp) != 0; cp++) {
+			if (needs_quote(ch)) {
+				*dp++ = '%';
+				*dp++ = hex((ch >> 4) & 0xF);
+				*dp++ = hex(ch & 0xF);
+			}
+			else
+				*dp++ = ch;
 		}
-		else
-			*dp++ = ch;
 	}
 	*dp = 0;
 
@@ -613,7 +615,10 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	struct slot_results results;
 	int ret;
 
-	url = quote_ref_url(base, ref->name);
+	char *real_ref_name = xmalloc(strlen(ref->name) + 2);
+	sprintf(real_ref_name, "/%s", ref->name);
+	char *real_url = transform_url(base, real_ref_name);
+	url = quote_ref_url(real_url, "");
 	slot = get_active_slot();
 	slot->results = &results;
 	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
@@ -643,3 +648,23 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	free(url);
 	return ret;
 }
+
+char *transform_url(const char *url, const char *suffix)
+{
+	char *new_url;
+	char *question_mark;
+	ptrdiff_t offset;
+
+	new_url = xmalloc(strlen(url) + strlen(suffix) + 1);
+
+	if ((question_mark = strchr(url, '?'))) {
+		offset = (ptrdiff_t) question_mark - (ptrdiff_t) url;
+		snprintf(new_url, offset + 1, "%s", url);
+		strcat(new_url, suffix);
+		strcat(new_url, url + offset);
+	} else {
+		sprintf(new_url, "%s%s", url, suffix);
+	}
+	return new_url;
+}
+
diff --git a/http.h b/http.h
index a04fc6a..58730b8 100644
--- a/http.h
+++ b/http.h
@@ -107,4 +107,6 @@ static inline int missing__target(int code, int result)
 
 extern int http_fetch_ref(const char *base, struct ref *ref);
 
+extern char *transform_url(const char *url, const char *suffix);
+
 #endif /* HTTP_H */
diff --git a/transport.c b/transport.c
index 3ff8519..b1966d8 100644
--- a/transport.c
+++ b/transport.c
@@ -1,3 +1,4 @@
+#include <stddef.h>
 #include "cache.h"
 #include "transport.h"
 #include "run-command.h"
@@ -449,8 +450,7 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 
 	walker = transport->data;
 
-	refs_url = xmalloc(strlen(transport->url) + 11);
-	sprintf(refs_url, "%s/info/refs", transport->url);
+	refs_url = transform_url(transport->url, "/info/refs");
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -833,3 +833,4 @@ int transport_disconnect(struct transport *transport)
 	free(transport);
 	return ret;
 }
+
-- 
1.5.6.rc1.6.gc8c52


[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-04 23:28 [PATCH] handle http urls with query string ("?foo") correctly David ‘Bombe’ Roden
@ 2008-06-05  0:12 ` Johannes Schindelin
  2008-06-05  6:48   ` David ‘Bombe’ Roden
  0 siblings, 1 reply; 8+ messages in thread
From: Johannes Schindelin @ 2008-06-05  0:12 UTC (permalink / raw
  To: David ‘Bombe’ Roden; +Cc: git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3063 bytes --]

Hi,

On Thu, 5 Jun 2008, David ‘Bombe’ Roden wrote:

> From 1f55b9176248673b78c77ff292fb5c6c988a7f8b Mon Sep 17 00:00:00 2001
> From: =?utf-8?q?David=20=E2=80=98Bombe=E2=80=99=20Roden?= <bombe@pterodactylus.net>
> Date: Thu, 5 Jun 2008 00:53:56 +0200
> Subject: [PATCH] handle http urls with query string ("?foo") correctly

Please follow the hints in Documentation/SubmittingPatches, or just 
imitate others' mails with patches.

> I'm a developer for the Freenet Project[1] and tried to get git running 
> with Freenet's http-gateway. For several reasons I have to use URLs like 
> "http://host:8888/USK@<lots-of-stuff>/project.git/4?type=text/plain". 
> This breaks the way git creates http URLs to retrieve. The following 
> patch remedys this situation by inserting the git-generated part of the 
> URL (like "/info/refs") before the query string.

I think that this paragraph should be rewritten into a less "I did" form, 
and be the commit message.

> diff --git a/http-walker.c b/http-walker.c
> index 99f397e..711e55b 100644
> --- a/http-walker.c
> +++ b/http-walker.c
> @@ -100,7 +100,6 @@ static void start_object_request(struct walker *walker,
>  	char *hex = sha1_to_hex(obj_req->sha1);
>  	char prevfile[PATH_MAX];
>  	char *url;
> -	char *posn;
>  	int prevlocal;
>  	unsigned char prev_buf[PREV_BUF_SIZE];
>  	ssize_t prev_read = 0;
> @@ -146,16 +145,11 @@ static void start_object_request(struct walker *walker,
>  
>  	SHA1_Init(&obj_req->c);
>  
> -	url = xmalloc(strlen(obj_req->repo->base) + 51);
> +	char *suffix = xmalloc(51);

I guess you are a Java programmer?  In C, this would be "char 
suffix[51];".

Also, we like to have C89 compatibility, especially when it is too simple: 
declarations have to come _before_ statements.
 
>  	obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51);
> -	strcpy(url, obj_req->repo->base);
> -	posn = url + strlen(obj_req->repo->base);
> -	strcpy(posn, "/objects/");
> -	posn += 9;
> -	memcpy(posn, hex, 2);
> -	posn += 2;
> -	*(posn++) = '/';
> -	strcpy(posn, hex + 2);
> +	strcpy(suffix, "/objects/");
> +	sprintf(suffix + 9, "%c%c/%s", hex[0], hex[1], hex + 2);
> +	url = transform_url(obj_req->repo->base, suffix);

This part of Git's source code predates the strbuf work, and therefore it 
is understandable that strbufs are not used there.  However, I think that 
your changes just cry for want of strbufs.

Also, the strcpy() could be merged into the sprintf().

> @@ -384,8 +378,9 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
>  	if (walker->get_verbosely)
>  		fprintf(stderr, "Getting index for pack %s\n", hex);
>  
> -	url = xmalloc(strlen(repo->base) + 64);
> -	sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
> +	char *index_url = xmalloc(64);
> +	sprintf(index_url, "/objects/pack/pack-%s.idx", hex);
> +	url = transform_url(repo->base, index_url);

Again, declaration-after-statement, and again an opportunity for an 
strbuf.

Maybe you want to rewrite your patch before I keep on repeating these two 
comments? ;-)

Thanks,
Dscho

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  0:12 ` Johannes Schindelin
@ 2008-06-05  6:48   ` David ‘Bombe’ Roden
  2008-06-05  7:15     ` Johannes Schindelin
  0 siblings, 1 reply; 8+ messages in thread
From: David ‘Bombe’ Roden @ 2008-06-05  6:48 UTC (permalink / raw
  To: git; +Cc: Johannes Schindelin

[-- Attachment #1: Type: text/plain, Size: 8137 bytes --]

On Thursday 05 June 2008 02:12:21 Johannes Schindelin wrote:

> I think that this paragraph should be rewritten into a less "I did" form,
> and be the commit message.

Okay.


> This part of Git's source code predates the strbuf work, and therefore it
> is understandable that strbufs are not used there.  However, I think that
> your changes just cry for want of strbufs.

Sounds good. strbuf is indeed a tad more convenient than sprintf and strcat. :)


> Maybe you want to rewrite your patch before I keep on repeating these two
> comments? ;-)

I did. Find the revised version below. :)

---
handle http urls with query string ("?foo") correctly
git breaks when a repository is cloned from an http url that contains a
query string. This patch fixes this behaviour be inserting the name of
the requested object (like "/info/refs") before the query string.

 http-walker.c |   31 ++++++++++++-------------------
 http.c        |   42 +++++++++++++++++++++++++++++++++---------
 http.h        |    2 ++
 transport.c   |    5 +++--
 4 files changed, 50 insertions(+), 30 deletions(-)

diff --git a/http-walker.c b/http-walker.c
index 99f397e..b14497a 100644
--- a/http-walker.c
+++ b/http-walker.c
@@ -100,7 +100,6 @@ static void start_object_request(struct walker *walker,
 	char *hex = sha1_to_hex(obj_req->sha1);
 	char prevfile[PATH_MAX];
 	char *url;
-	char *posn;
 	int prevlocal;
 	unsigned char prev_buf[PREV_BUF_SIZE];
 	ssize_t prev_read = 0;
@@ -109,6 +108,7 @@ static void start_object_request(struct walker *walker,
 	struct curl_slist *range_header = NULL;
 	struct active_request_slot *slot;
 	struct walker_data *data = walker->data;
+	struct strbuf suffix_buffer = STRBUF_INIT;
 
 	snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
 	unlink(prevfile);
@@ -146,16 +146,10 @@ static void start_object_request(struct walker *walker,
 
 	SHA1_Init(&obj_req->c);
 
-	url = xmalloc(strlen(obj_req->repo->base) + 51);
+	strbuf_addstr(&suffix_buffer, "/objects/");
+	strbuf_addf(&suffix_buffer, "%c%c/%s", hex[0], hex[1], hex + 2);
+	url = transform_url(obj_req->repo->base, strbuf_detach(&suffix_buffer, NULL));
 	obj_req->url = xmalloc(strlen(obj_req->repo->base) + 51);
-	strcpy(url, obj_req->repo->base);
-	posn = url + strlen(obj_req->repo->base);
-	strcpy(posn, "/objects/");
-	posn += 9;
-	memcpy(posn, hex, 2);
-	posn += 2;
-	*(posn++) = '/';
-	strcpy(posn, hex + 2);
 	strcpy(obj_req->url, url);
 
 	/* If a previous temp file is present, process what was already
@@ -373,6 +367,7 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
 	char range[RANGE_HEADER_SIZE];
 	struct curl_slist *range_header = NULL;
 	struct walker_data *data = walker->data;
+	struct strbuf suffix_buffer = STRBUF_INIT;
 
 	FILE *indexfile;
 	struct active_request_slot *slot;
@@ -384,8 +379,8 @@ static int fetch_index(struct walker *walker, struct alt_base *repo, unsigned ch
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting index for pack %s\n", hex);
 
-	url = xmalloc(strlen(repo->base) + 64);
-	sprintf(url, "%s/objects/pack/pack-%s.idx", repo->base, hex);
+	strbuf_addf(&suffix_buffer, "/objects/pack/pack-%s.idx", hex);
+	url = transform_url(repo->base, strbuf_detach(&suffix_buffer, NULL));
 
 	filename = sha1_pack_index_name(sha1);
 	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
@@ -608,8 +603,7 @@ static void fetch_alternates(struct walker *walker, const char *base)
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting alternates list for %s\n", base);
 
-	url = xmalloc(strlen(base) + 31);
-	sprintf(url, "%s/objects/info/http-alternates", base);
+	url = transform_url(base, "/objects/info/http-alternates");
 
 	/* Use a callback to process the result, since another request
 	   may fail and need to have alternates loaded before continuing */
@@ -655,8 +649,7 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo)
 	if (walker->get_verbosely)
 		fprintf(stderr, "Getting pack list for %s\n", repo->base);
 
-	url = xmalloc(strlen(repo->base) + 21);
-	sprintf(url, "%s/objects/info/packs", repo->base);
+	url = transform_url(repo->base, "/objects/info/packs");
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -722,6 +715,7 @@ static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned cha
 	char range[RANGE_HEADER_SIZE];
 	struct curl_slist *range_header = NULL;
 	struct walker_data *data = walker->data;
+	struct strbuf suffix_buffer = STRBUF_INIT;
 
 	struct active_request_slot *slot;
 	struct slot_results results;
@@ -739,9 +733,8 @@ static int fetch_pack(struct walker *walker, struct alt_base *repo, unsigned cha
 			sha1_to_hex(sha1));
 	}
 
-	url = xmalloc(strlen(repo->base) + 65);
-	sprintf(url, "%s/objects/pack/pack-%s.pack",
-		repo->base, sha1_to_hex(target->sha1));
+	strbuf_addf(&suffix_buffer, "/objects/pack/pack-%s.pack", sha1_to_hex(target->sha1));
+	url = transform_url(repo->base, strbuf_detach(&suffix_buffer, NULL));
 
 	filename = sha1_pack_name(target->sha1);
 	snprintf(tmpfile, sizeof(tmpfile), "%s.temp", filename);
diff --git a/http.c b/http.c
index 2a21ccb..a60f9ea 100644
--- a/http.c
+++ b/http.c
@@ -590,15 +590,17 @@ static char *quote_ref_url(const char *base, const char *ref)
 	qref = xmalloc(len);
 	memcpy(qref, base, baselen);
 	dp = qref + baselen;
-	*(dp++) = '/';
-	for (cp = ref; (ch = *cp) != 0; cp++) {
-		if (needs_quote(ch)) {
-			*dp++ = '%';
-			*dp++ = hex((ch >> 4) & 0xF);
-			*dp++ = hex(ch & 0xF);
+	if (*ref) {
+		*(dp++) = '/';
+		for (cp = ref; (ch = *cp) != 0; cp++) {
+			if (needs_quote(ch)) {
+				*dp++ = '%';
+				*dp++ = hex((ch >> 4) & 0xF);
+				*dp++ = hex(ch & 0xF);
+			}
+			else
+				*dp++ = ch;
 		}
-		else
-			*dp++ = ch;
 	}
 	*dp = 0;
 
@@ -611,9 +613,12 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	struct strbuf buffer = STRBUF_INIT;
 	struct active_request_slot *slot;
 	struct slot_results results;
+	struct strbuf suffix_buffer = STRBUF_INIT;
 	int ret;
 
-	url = quote_ref_url(base, ref->name);
+	strbuf_addf(&suffix_buffer, "/%s", ref->name);
+	url = transform_url(base, strbuf_detach(&suffix_buffer, NULL));
+	url = quote_ref_url(url, "");
 	slot = get_active_slot();
 	slot->results = &results;
 	curl_easy_setopt(slot->curl, CURLOPT_FILE, &buffer);
@@ -643,3 +648,22 @@ int http_fetch_ref(const char *base, struct ref *ref)
 	free(url);
 	return ret;
 }
+
+char *transform_url(const char *url, const char *suffix)
+{
+	struct strbuf new_url = STRBUF_INIT;
+	char *question_mark;
+	ptrdiff_t offset;
+
+	if ((question_mark = strchr(url, '?'))) {
+		offset = (ptrdiff_t) question_mark - (ptrdiff_t) url;
+		strbuf_add(&new_url, url, offset);
+		strbuf_addstr(&new_url, suffix);
+		strbuf_addstr(&new_url, url + offset);
+	} else {
+		strbuf_addstr(&new_url, url);
+		strbuf_addstr(&new_url, suffix);
+	}
+	return strbuf_detach(&new_url, NULL);
+}
+
diff --git a/http.h b/http.h
index a04fc6a..58730b8 100644
--- a/http.h
+++ b/http.h
@@ -107,4 +107,6 @@ static inline int missing__target(int code, int result)
 
 extern int http_fetch_ref(const char *base, struct ref *ref);
 
+extern char *transform_url(const char *url, const char *suffix);
+
 #endif /* HTTP_H */
diff --git a/transport.c b/transport.c
index 3ff8519..b1966d8 100644
--- a/transport.c
+++ b/transport.c
@@ -1,3 +1,4 @@
+#include <stddef.h>
 #include "cache.h"
 #include "transport.h"
 #include "run-command.h"
@@ -449,8 +450,7 @@ static struct ref *get_refs_via_curl(struct transport *transport)
 
 	walker = transport->data;
 
-	refs_url = xmalloc(strlen(transport->url) + 11);
-	sprintf(refs_url, "%s/info/refs", transport->url);
+	refs_url = transform_url(transport->url, "/info/refs");
 
 	slot = get_active_slot();
 	slot->results = &results;
@@ -833,3 +833,4 @@ int transport_disconnect(struct transport *transport)
 	free(transport);
 	return ret;
 }
+
-- 
1.5.5.3


Thanks,

	David

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply related	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  6:48   ` David ‘Bombe’ Roden
@ 2008-06-05  7:15     ` Johannes Schindelin
  2008-06-05  7:57       ` Junio C Hamano
  2008-06-05  8:29       ` David ‘Bombe’ Roden
  0 siblings, 2 replies; 8+ messages in thread
From: Johannes Schindelin @ 2008-06-05  7:15 UTC (permalink / raw
  To: David ‘Bombe’ Roden; +Cc: git

[-- Attachment #1: Type: TEXT/PLAIN, Size: 5766 bytes --]

Hi,

On Thu, 5 Jun 2008, David ‘Bombe’ Roden wrote:

> 
> ---
> handle http urls with query string ("?foo") correctly
> git breaks when a repository is cloned from an http url that contains a
> query string. This patch fixes this behaviour be inserting the name of
> the requested object (like "/info/refs") before the query string.

Usually, this comes before the "---", and your comments/answers after it.  
And the first line would be the subject:

-- snip --
Handle http urls with query string ("?foo") correctly

Git breaks when a repository is cloned from an http url that contains a
query string. This patch fixes this behaviour be inserting the name of
the requested object (like "/info/refs") before the query string.
-- snap --

And of course, you usually sign off your patches.

> diff --git a/http-walker.c b/http-walker.c
> index 99f397e..b14497a 100644
> --- a/http-walker.c
> +++ b/http-walker.c
> @@ -100,7 +100,6 @@ static void start_object_request(struct walker *walker,
>  	char *hex = sha1_to_hex(obj_req->sha1);
>  	char prevfile[PATH_MAX];
>  	char *url;
> -	char *posn;
>  	int prevlocal;
>  	unsigned char prev_buf[PREV_BUF_SIZE];
>  	ssize_t prev_read = 0;
> @@ -109,6 +108,7 @@ static void start_object_request(struct walker *walker,
>  	struct curl_slist *range_header = NULL;
>  	struct active_request_slot *slot;
>  	struct walker_data *data = walker->data;
> +	struct strbuf suffix_buffer = STRBUF_INIT;
>  
>  	snprintf(prevfile, sizeof(prevfile), "%s.prev", obj_req->filename);
>  	unlink(prevfile);
> @@ -146,16 +146,10 @@ static void start_object_request(struct walker *walker,
>  
>  	SHA1_Init(&obj_req->c);
>  
> -	url = xmalloc(strlen(obj_req->repo->base) + 51);
> +	strbuf_addstr(&suffix_buffer, "/objects/");
> +	strbuf_addf(&suffix_buffer, "%c%c/%s", hex[0], hex[1], hex + 2);

How about

	strbuf_addf(&suffix_buffer, "/objects/%c%c/%s",
		hex[0], hex[1], hex + 2);

?

> +	url = transform_url(obj_req->repo->base, strbuf_detach(&suffix_buffer, NULL));

And this line wrapped, like so?

	url = transform_url(obj_req->repo->base,
		strbuf_detach(&suffix_buffer, NULL));

Oh, but maybe it makes more sense to let transform_url() work on the 
strbuf itself?

> @@ -608,8 +603,7 @@ static void fetch_alternates(struct walker *walker, const char *base)
>  	if (walker->get_verbosely)
>  		fprintf(stderr, "Getting alternates list for %s\n", base);
>  
> -	url = xmalloc(strlen(base) + 31);
> -	sprintf(url, "%s/objects/info/http-alternates", base);
> +	url = transform_url(base, "/objects/info/http-alternates");
>  
>  	/* Use a callback to process the result, since another request
>  	   may fail and need to have alternates loaded before continuing */
> @@ -655,8 +649,7 @@ static int fetch_indices(struct walker *walker, struct alt_base *repo)
>  	if (walker->get_verbosely)
>  		fprintf(stderr, "Getting pack list for %s\n", repo->base);
>  
> -	url = xmalloc(strlen(repo->base) + 21);
> -	sprintf(url, "%s/objects/info/packs", repo->base);
> +	url = transform_url(repo->base, "/objects/info/packs");
>  
>  	slot = get_active_slot();
>  	slot->results = &results;

Of course, these two would have to initialise their strbuf, then.

> diff --git a/http.c b/http.c
> index 2a21ccb..a60f9ea 100644
> --- a/http.c
> +++ b/http.c
> @@ -590,15 +590,17 @@ static char *quote_ref_url(const char *base, const char *ref)
>  	qref = xmalloc(len);
>  	memcpy(qref, base, baselen);
>  	dp = qref + baselen;
> -	*(dp++) = '/';
> -	for (cp = ref; (ch = *cp) != 0; cp++) {
> -		if (needs_quote(ch)) {
> -			*dp++ = '%';
> -			*dp++ = hex((ch >> 4) & 0xF);
> -			*dp++ = hex(ch & 0xF);
> +	if (*ref) {
> +		*(dp++) = '/';
> +		for (cp = ref; (ch = *cp) != 0; cp++) {
> +			if (needs_quote(ch)) {
> +				*dp++ = '%';
> +				*dp++ = hex((ch >> 4) & 0xF);
> +				*dp++ = hex(ch & 0xF);
> +			}
> +			else
> +				*dp++ = ch;
>  		}
> -		else
> -			*dp++ = ch;
>  	}
>  	*dp = 0;
>  

Is this correct?  Either it should work the old way, or you will run into 
problems when ref is not empty...

> @@ -611,9 +613,12 @@ int http_fetch_ref(const char *base, struct ref *ref)
>  	struct strbuf buffer = STRBUF_INIT;
>  	struct active_request_slot *slot;
>  	struct slot_results results;
> +	struct strbuf suffix_buffer = STRBUF_INIT;
>  	int ret;
>  
> -	url = quote_ref_url(base, ref->name);
> +	strbuf_addf(&suffix_buffer, "/%s", ref->name);
> +	url = transform_url(base, strbuf_detach(&suffix_buffer, NULL));
> +	url = quote_ref_url(url, "");

... and indeed, it seems that by now, you would not want to add the ref in 
quote_ref_url() anymore... which you do not, since you pass "" as ref 
(and this is the only caller of quote_ref_url).

> @@ -643,3 +648,22 @@ int http_fetch_ref(const char *base, struct ref *ref)
>  	free(url);
>  	return ret;
>  }
> +
> +char *transform_url(const char *url, const char *suffix)
> +{
> +	struct strbuf new_url = STRBUF_INIT;
> +	char *question_mark;
> +	ptrdiff_t offset;
> +
> +	if ((question_mark = strchr(url, '?'))) {

Junio usually prefers assignments outside of the if() statement.

> +		offset = (ptrdiff_t) question_mark - (ptrdiff_t) url;
> +		strbuf_add(&new_url, url, offset);
> +		strbuf_addstr(&new_url, suffix);
> +		strbuf_addstr(&new_url, url + offset);
> +	} else {
> +		strbuf_addstr(&new_url, url);
> +		strbuf_addstr(&new_url, suffix);
> +	}
> +	return strbuf_detach(&new_url, NULL);
> +}
> +

There is an extra empty line at the end.

But another thing is more pressing: obviously, there were reasons to quote 
the ref names with this funny URL encoding (%45%75%6E%6E%79), and you 
effectively undid that change.

> diff --git a/transport.c b/transport.c
> index 3ff8519..b1966d8 100644
> --- a/transport.c
> +++ b/transport.c
> @@ -1,3 +1,4 @@
> +#include <stddef.h>

Why?

Ciao,
Dscho

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  7:15     ` Johannes Schindelin
@ 2008-06-05  7:57       ` Junio C Hamano
  2008-06-05  8:37         ` David ‘Bombe’ Roden
  2008-06-05 12:14         ` David ‘Bombe’ Roden
  2008-06-05  8:29       ` David ‘Bombe’ Roden
  1 sibling, 2 replies; 8+ messages in thread
From: Junio C Hamano @ 2008-06-05  7:57 UTC (permalink / raw
  To: Johannes Schindelin; +Cc: David ‘Bombe’ Roden, git

Johannes Schindelin <Johannes.Schindelin@gmx.de> writes:

> Usually, this comes before the "---", and your comments/answers after it.  
> And the first line would be the subject:
>
> -- snip --
> Handle http urls with query string ("?foo") correctly
>
> Git breaks when a repository is cloned from an http url that contains a
> query string. This patch fixes this behaviour be inserting the name of
> the requested object (like "/info/refs") before the query string.
> -- snap --
>
> And of course, you usually sign off your patches.

Please wait a minute and step back.

Before going into the presentation, I have a strong doubt about what this
is trying to solve.

Without reading the patch at all (and the lazyness is only half the reason
for not reading the patch before thinking about the issue --- it is also a
good lithmus test to make sure that the commit log explains what is done
well), my understanding is that this peculiar http-hosted git repository
takes:

	http://foo.bar.xz/serve.cgi?repo=foo.git/

as the base URL, and the patch author wants us to ask for (for example)
"info/alternates" as

	http://foo.bar.xz/serve.cgi/info/alternates?repo=foo.git/

or something like that, not the usual:

	http://foo.bar.xz/serve.cgi?repo=foo.git/info/alternates

Two comments.

 (1) If that is not the problem being tackled, the commit log needs to
     explain the issue much better.  "git breaks" is obviously not good
     enough to convey the issue to me, and if the description was not
     clear for me to understand what is being fixed, it has no hope to
     explain the fix to other people.

 (2) If that is indeed the issue being tackled, sorry, it is not how "dumb
     protocol" http server is expected to behave.  Your server needs
     fixing.

If the protocol being used is still the "dumb commit walker" protocol,
then, given the base URL of the repository $URL, "info/refs" must exist at
"$URL/info/refs", and a loose object deadbeef... must exist at
"$URL/objects/de/adbeef...".  That's how the protocol is defined.

If we want to have a CGI on the server side, the client _could_ even talk
"git native" protocol or something similar to it.  But that is not what is
attempted with this patch as far as I can tell.

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  7:15     ` Johannes Schindelin
  2008-06-05  7:57       ` Junio C Hamano
@ 2008-06-05  8:29       ` David ‘Bombe’ Roden
  1 sibling, 0 replies; 8+ messages in thread
From: David ‘Bombe’ Roden @ 2008-06-05  8:29 UTC (permalink / raw
  To: git

[-- Attachment #1: Type: text/plain, Size: 1671 bytes --]

On Thursday 05 June 2008 09:15:01 Johannes Schindelin wrote:

> And of course, you usually sign off your patches.

As soon as there are no more objections, I will. :)


> How about
>
> 	strbuf_addf(&suffix_buffer, "/objects/%c%c/%s",
> 		hex[0], hex[1], hex + 2);
>
> ?

Now that you mention it... ;) Changed.


> And this line wrapped, like so?
>
> 	url = transform_url(obj_req->repo->base,
> 		strbuf_detach(&suffix_buffer, NULL));

I usually don't do line-wrapping because all my terminals are wide enough but 
I might just make an exception here. :)


> Oh, but maybe it makes more sense to let transform_url() work on the
> strbuf itself?

Then I would have to rewrite more unrelated stuff because everything else in 
those files still relies on char*. I'd rather not meddle with that for now; I 
agree that at some point it should all be rewritten to use strbuf (more or 
less) exclusively. :)


> Junio usually prefers assignments outside of the if() statement.

Can do.


> But another thing is more pressing: obviously, there were reasons to quote
> the ref names with this funny URL encoding (%45%75%6E%6E%79), and you
> effectively undid that change.

Hmm, yes. Ref names should probably be url-escaped.


> > +#include <stddef.h>
> Why?

For ptrdiff_t. Yes, I know that the offset between the start of a URL and the 
question mark will never really exceed the 32-bit range and it's also highly 
unlikely that the two pointers in comparison differ in the upper 32 bits but 
as I'm actually calculating the difference between two pointers ptrdiff_t was 
the logical choice over int.


> Dscho

	David

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  7:57       ` Junio C Hamano
@ 2008-06-05  8:37         ` David ‘Bombe’ Roden
  2008-06-05 12:14         ` David ‘Bombe’ Roden
  1 sibling, 0 replies; 8+ messages in thread
From: David ‘Bombe’ Roden @ 2008-06-05  8:37 UTC (permalink / raw
  To: git

[-- Attachment #1: Type: text/plain, Size: 1285 bytes --]

On Thursday 05 June 2008 09:57:39 Junio C Hamano wrote:

> my understanding is that this peculiar http-hosted git repository
> takes:
> 	http://foo.bar.xz/serve.cgi?repo=foo.git/
> as the base URL, and the patch author wants us to ask for (for example)
> "info/alternates" as
> 	http://foo.bar.xz/serve.cgi/info/alternates?repo=foo.git/

Actually the base URL is more like

	http://foo.bar.xz/foo.git/0?type=text/plain

and I want to retrieve objects with

	http://foo.bar.xz/foo.git/0/info/refs?type=text/plain

but the gist is basically the same. My patch will indeed break git for URLs 
like the one you gave.


>  (2) If that is indeed the issue being tackled, sorry, it is not how "dumb
>      protocol" http server is expected to behave.  Your server needs
>      fixing.
> If the protocol being used is still the "dumb commit walker" protocol,
> then, given the base URL of the repository $URL, "info/refs" must exist at
> "$URL/info/refs", and a loose object deadbeef... must exist at
> "$URL/objects/de/adbeef...".  That's how the protocol is defined.

If that is indeed the case, you're right. That could probably be tackled by 
some small fixes in Freenet's http-gateway. I'll talk to the other developers 
about that.


Thanks,

	David

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: [PATCH] handle http urls with query string ("?foo") correctly
  2008-06-05  7:57       ` Junio C Hamano
  2008-06-05  8:37         ` David ‘Bombe’ Roden
@ 2008-06-05 12:14         ` David ‘Bombe’ Roden
  1 sibling, 0 replies; 8+ messages in thread
From: David ‘Bombe’ Roden @ 2008-06-05 12:14 UTC (permalink / raw
  To: git

[-- Attachment #1: Type: text/plain, Size: 522 bytes --]

On Thursday 05 June 2008 09:57:39 Junio C Hamano wrote:

> If the protocol being used is still the "dumb commit walker" protocol,
> then, given the base URL of the repository $URL, "info/refs" must exist at
> "$URL/info/refs", and a loose object deadbeef... must exist at
> "$URL/objects/de/adbeef...".  That's how the protocol is defined.

Through a small patch in Freenet it is now possible to use the existing http 
transport, without any modifications in git.


Thanks for all your help, anyway,

	David

[-- Attachment #2: This is a digitally signed message part. --]
[-- Type: application/pgp-signature, Size: 197 bytes --]

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~2008-06-05 12:16 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-06-04 23:28 [PATCH] handle http urls with query string ("?foo") correctly David ‘Bombe’ Roden
2008-06-05  0:12 ` Johannes Schindelin
2008-06-05  6:48   ` David ‘Bombe’ Roden
2008-06-05  7:15     ` Johannes Schindelin
2008-06-05  7:57       ` Junio C Hamano
2008-06-05  8:37         ` David ‘Bombe’ Roden
2008-06-05 12:14         ` David ‘Bombe’ Roden
2008-06-05  8:29       ` David ‘Bombe’ Roden

Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).