git@vger.kernel.org list mirror (unofficial, one of many)
 help / color / mirror / code / Atom feed
* [PATCH] http: add custom hostname to IP address resolves
@ 2022-05-02  8:36 Christian Couder
  2022-05-02 19:04 ` Junio C Hamano
  2022-05-04 10:46 ` [PATCH v2] http: add custom hostname to IP address resolutions Christian Couder
  0 siblings, 2 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-02  8:36 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Christian Couder, Derrick Stolee, Jacob Vosmaer

Libcurl has a CURLOPT_RESOLVE easy option that allows
hostname resolve information in the following form to
be passed:

	[+]HOST:PORT:ADDRESS[,ADDRESS]

This way, redirects and everything operating against the
HOST+PORT will use the provided ADDRESS(s).

The following form is also allowed to stop using these
resolves:

	-HOST:PORT

Let's add a corresponding "http.hostResolve" config
option that takes advantage of CURLOPT_RESOLVE.

Each value configured for the "http.hostResolve" key
is passed "as is" to curl through CURLOPT_RESOLVE, so it
should be in one of the above 2 forms. This keeps the
implementation simple and makes us consistent with
libcurl's CURLOPT_RESOLVE, and with curl's corresponding
`--resolve` command line option.

The implementation is similar to what is done for the
"http.extraHeader" config option, except that we use
CURLOPT_RESOLVE only in get_active_slot() which is
called by all the HTTP request sending functions.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---

I am not sure if some tests could/should be added. Ideas about how to
test this are welcome.

Documentation/config/http.txt | 16 ++++++++++++++++
 http.c                        | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 7003661c0d..37b293a73b 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -98,6 +98,22 @@ http.version::
 	- HTTP/2
 	- HTTP/1.1
 
+http.hostResolve::
+	Hostname resolve information that will be used first when sending
+	HTTP requests.  This information should be in one of the following
+	forms:
+
+	- [+]HOST:PORT:ADDRESS[,ADDRESS]
+	- -HOST:PORT
+
++
+The first form redirects all requests to the given `HOST:PORT`
+to the provided `ADDRESS`(s). The second form clears all previous
+config values for that `HOST:PORT` combination.  To allow easy
+overriding of all the settings inherited from the system config,
+an empty value will reset all resolve information to the empty
+list.
+
 http.sslVersion::
 	The SSL version to use when negotiating an SSL connection, if you
 	want to force the default.  The available and default version
diff --git a/http.c b/http.c
index 229da4d148..e9cc46ee52 100644
--- a/http.c
+++ b/http.c
@@ -128,6 +128,9 @@ static struct curl_slist *pragma_header;
 static struct curl_slist *no_pragma_header;
 static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
 
+static struct curl_slist *host_resolves;
+static struct string_list http_host_resolve = STRING_LIST_INIT_DUP;
+
 static struct active_request_slot *active_queue_head;
 
 static char *cached_accept_language;
@@ -393,6 +396,17 @@ static int http_options(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp("http.hostresolve", var)) {
+		if (!value) {
+			return config_error_nonbool(var);
+		} else if (!*value) {
+			string_list_clear(&http_host_resolve, 0);
+		} else {
+			string_list_append(&http_host_resolve, value);
+		}
+		return 0;
+	}
+
 	if (!strcmp("http.followredirects", var)) {
 		if (value && !strcmp(value, "initial"))
 			http_follow_config = HTTP_FOLLOW_INITIAL;
@@ -985,6 +999,17 @@ static void set_from_env(const char **var, const char *envname)
 		*var = val;
 }
 
+static struct curl_slist *http_copy_host_resolve(void)
+{
+	struct curl_slist *hosts = NULL;
+	const struct string_list_item *item;
+
+	for_each_string_list_item(item, &http_host_resolve)
+		hosts = curl_slist_append(hosts, item->string);
+
+	return hosts;
+}
+
 void http_init(struct remote *remote, const char *url, int proactive_auth)
 {
 	char *low_speed_limit;
@@ -1048,6 +1073,8 @@ void http_init(struct remote *remote, const char *url, int proactive_auth)
 	no_pragma_header = curl_slist_append(http_copy_default_headers(),
 		"Pragma:");
 
+	host_resolves = http_copy_host_resolve();
+
 	{
 		char *http_max_requests = getenv("GIT_HTTP_MAX_REQUESTS");
 		if (http_max_requests != NULL)
@@ -1124,6 +1151,7 @@ void http_cleanup(void)
 	curl_global_cleanup();
 
 	string_list_clear(&extra_http_headers, 0);
+	string_list_clear(&http_host_resolve, 0);
 
 	curl_slist_free_all(pragma_header);
 	pragma_header = NULL;
@@ -1131,6 +1159,9 @@ void http_cleanup(void)
 	curl_slist_free_all(no_pragma_header);
 	no_pragma_header = NULL;
 
+	curl_slist_free_all(host_resolves);
+	host_resolves = NULL;
+
 	if (curl_http_proxy) {
 		free((void *)curl_http_proxy);
 		curl_http_proxy = NULL;
@@ -1211,6 +1242,7 @@ struct active_request_slot *get_active_slot(void)
 	if (curl_save_cookies)
 		curl_easy_setopt(slot->curl, CURLOPT_COOKIEJAR, curl_cookie_file);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, pragma_header);
+	curl_easy_setopt(slot->curl, CURLOPT_RESOLVE, host_resolves);
 	curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 	curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, NULL);
 	curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, NULL);
-- 
2.36.0


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-02  8:36 [PATCH] http: add custom hostname to IP address resolves Christian Couder
@ 2022-05-02 19:04 ` Junio C Hamano
  2022-05-04 10:07   ` Christian Couder
  2022-05-04 10:46 ` [PATCH v2] http: add custom hostname to IP address resolutions Christian Couder
  1 sibling, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2022-05-02 19:04 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, Christian Couder, Derrick Stolee, Jacob Vosmaer

Christian Couder <christian.couder@gmail.com> writes:

> Subject: Re: [PATCH] http: add custom hostname to IP address resolves

I can guess what you means, but I am not able to quite parse the
above.  I guess the phrase that makes me hiccup when I read is
"resolve" used as a noun.

> Libcurl has a CURLOPT_RESOLVE easy option that allows
> hostname resolve information in the following form to

The same here, "... allows the result of hostname resolution in the
following format ...", perhaps?

> be passed:
>
> 	[+]HOST:PORT:ADDRESS[,ADDRESS]
>
> This way, redirects and everything operating against the
> HOST+PORT will use the provided ADDRESS(s).
>
> The following form is also allowed to stop using these
> resolves:
>
> 	-HOST:PORT

The above is a reasonable summary of CURLOPT_RESOLVE documentation
that is appropriate to have here for those of us who are not
familiar with it.  For those of us who may want to learn more, it
would help to have an URL to the canonical documentation page, e.g.
https://curl.se/libcurl/c/CURLOPT_RESOLVE.html but it is not
required.  People should be able to find it easily.

> Let's add a corresponding "http.hostResolve" config
> option that takes advantage of CURLOPT_RESOLVE.

CURLOPT_RESOLVE allows us to feed cURL a list of these <host,port>
-> <address> mappings, so we use that mechansim to feed the values
listed on the multi-valued configuration variable (spell it out as
such, by the way, instead of saying "config option", which may give
a false impression that it is a last-one-wins single string with
many such mapping entries on it).

OK.

> Each value configured for the "http.hostResolve" key
> is passed "as is" to curl through CURLOPT_RESOLVE, so it
> should be in one of the above 2 forms. This keeps the
> implementation simple and makes us consistent with
> libcurl's CURLOPT_RESOLVE, and with curl's corresponding
> `--resolve` command line option.

OK.

> I am not sure if some tests could/should be added. Ideas about how to
> test this are welcome.

It should.  Perhaps invent a totally bogus domain name, map that to
localhost ::1, run a test server locally, and try to clone from that
bogus domain?

> +http.hostResolve::

Is "host" a good prefix for it?  

In the context of HTTP(s), if there is no other thing than host that
we resolve, "http.resolve" may be sufficient.  For those who are
looking for CURLOPT_RESOLVE equivalent, "http.curloptResolve" may
make it easier to find.

> +	Hostname resolve information that will be used first when sending
> +	HTTP requests.  This information should be in one of the following
> +	forms:
> +
> +	- [+]HOST:PORT:ADDRESS[,ADDRESS]
> +	- -HOST:PORT
> +
> ++
> +The first form redirects all requests to the given `HOST:PORT`
> +to the provided `ADDRESS`(s). The second form clears all previous
> +config values for that `HOST:PORT` combination.  To allow easy
> +overriding of all the settings inherited from the system config,
> +an empty value will reset all resolve information to the empty
> +list.

If I understand your use case correctly, this is not something you
would want to hardcode in your configuration files for long term, is
it?  I am wondering if we want to mention the expected use case here
as well, something like

    This is designed to be used primarily from the command line
    configuration variable override, e.g.

	$ git -c http.resolve=example.com:443:127.0.0.1 \
	    clone https://example.com/user/project.git

perhaps?  Not a suggestion, but soliciting thoughts.

> diff --git a/http.c b/http.c
> index 229da4d148..e9cc46ee52 100644
> --- a/http.c
> +++ b/http.c
> @@ -128,6 +128,9 @@ static struct curl_slist *pragma_header;
>  static struct curl_slist *no_pragma_header;
>  static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
>  
> +static struct curl_slist *host_resolves;
> +static struct string_list http_host_resolve = STRING_LIST_INIT_DUP;
> +
>  static struct active_request_slot *active_queue_head;
>  
>  static char *cached_accept_language;
> @@ -393,6 +396,17 @@ static int http_options(const char *var, const char *value, void *cb)
>  		return 0;
>  	}
>  
> +	if (!strcmp("http.hostresolve", var)) {
> +		if (!value) {
> +			return config_error_nonbool(var);
> +		} else if (!*value) {
> +			string_list_clear(&http_host_resolve, 0);


OK, this is a way to "clear" the list of entries accumulated on this
multi-valued configuration variable so far.  And it is documented in
the above, too.  Good.

> +		} else {
> +			string_list_append(&http_host_resolve, value);
> +		}
> +		return 0;
> +	}
> +
>  	if (!strcmp("http.followredirects", var)) {
>  		if (value && !strcmp(value, "initial"))
>  			http_follow_config = HTTP_FOLLOW_INITIAL;
> @@ -985,6 +999,17 @@ static void set_from_env(const char **var, const char *envname)
>  		*var = val;
>  }
>  
> +static struct curl_slist *http_copy_host_resolve(void)
> +{
> +	struct curl_slist *hosts = NULL;
> +	const struct string_list_item *item;
> +
> +	for_each_string_list_item(item, &http_host_resolve)
> +		hosts = curl_slist_append(hosts, item->string);
> +
> +	return hosts;
> +}
> +
>  void http_init(struct remote *remote, const char *url, int proactive_auth)
>  {
>  	char *low_speed_limit;
> @@ -1048,6 +1073,8 @@ void http_init(struct remote *remote, const char *url, int proactive_auth)
>  	no_pragma_header = curl_slist_append(http_copy_default_headers(),
>  		"Pragma:");
>  
> +	host_resolves = http_copy_host_resolve();

This is curious.  I imagined that the reason why you keep the
original in a string_list and copy it to a curl_slist was perhaps
because you'll lose the latter every time you make a curl request,
but it does not appear to be the case.  You http_init() just once
and then the same CURL *curl instance will be reused until you clear
it with http_cleanup().  So I do not see offhand the need to have
the string_list at all.

Does it work equally well if you used curl_slist_append() in
http_options() and maintain ONLY the curl_slist version of the
host_resolve list?  That would make it unnecessary to keep
http_host_resolve and add http_copy_host_resolve() function, no?

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-02 19:04 ` Junio C Hamano
@ 2022-05-04 10:07   ` Christian Couder
  2022-05-04 14:34     ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: Christian Couder @ 2022-05-04 10:07 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Christian Couder, Derrick Stolee, Jacob Vosmaer

On Mon, May 2, 2022 at 9:04 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > Subject: Re: [PATCH] http: add custom hostname to IP address resolves
>
> I can guess what you means, but I am not able to quite parse the
> above.  I guess the phrase that makes me hiccup when I read is
> "resolve" used as a noun.

Ok, I will use "http: add custom hostname to IP address resolutions"
in the next version then.

> > Libcurl has a CURLOPT_RESOLVE easy option that allows
> > hostname resolve information in the following form to
>
> The same here, "... allows the result of hostname resolution in the
> following format ...", perhaps?

Ok, I will use your suggestion.

> > be passed:
> >
> >       [+]HOST:PORT:ADDRESS[,ADDRESS]
> >
> > This way, redirects and everything operating against the
> > HOST+PORT will use the provided ADDRESS(s).
> >
> > The following form is also allowed to stop using these
> > resolves:
> >
> >       -HOST:PORT
>
> The above is a reasonable summary of CURLOPT_RESOLVE documentation
> that is appropriate to have here for those of us who are not
> familiar with it.  For those of us who may want to learn more, it
> would help to have an URL to the canonical documentation page, e.g.
> https://curl.se/libcurl/c/CURLOPT_RESOLVE.html but it is not
> required.  People should be able to find it easily.

Yeah, I also thought that it wasn't required, but I will add it
anyway, as I agree it could be useful and hopefully it doesn't change
very often.

> > Let's add a corresponding "http.hostResolve" config
> > option that takes advantage of CURLOPT_RESOLVE.
>
> CURLOPT_RESOLVE allows us to feed cURL a list of these <host,port>
> -> <address> mappings, so we use that mechansim to feed the values
> listed on the multi-valued configuration variable (spell it out as
> such, by the way, instead of saying "config option", which may give
> a false impression that it is a last-one-wins single string with
> many such mapping entries on it).
>
> OK.
>
> > Each value configured for the "http.hostResolve" key
> > is passed "as is" to curl through CURLOPT_RESOLVE, so it
> > should be in one of the above 2 forms. This keeps the
> > implementation simple and makes us consistent with
> > libcurl's CURLOPT_RESOLVE, and with curl's corresponding
> > `--resolve` command line option.
>
> OK.
>
> > I am not sure if some tests could/should be added. Ideas about how to
> > test this are welcome.
>
> It should.  Perhaps invent a totally bogus domain name, map that to
> localhost ::1, run a test server locally, and try to clone from that
> bogus domain?

Ok, I will add a simple test like this.

> > +http.hostResolve::
>
> Is "host" a good prefix for it?
>
> In the context of HTTP(s), if there is no other thing than host that
> we resolve, "http.resolve" may be sufficient.  For those who are
> looking for CURLOPT_RESOLVE equivalent, "http.curloptResolve" may
> make it easier to find.

I am Ok with just "http.resolve". I think using "curlopt" is perhaps
going into too many details about the implementation of the feature,
which could theoretically change if we ever decided to use something
other than curl.

> > +     Hostname resolve information that will be used first when sending
> > +     HTTP requests.  This information should be in one of the following
> > +     forms:
> > +
> > +     - [+]HOST:PORT:ADDRESS[,ADDRESS]
> > +     - -HOST:PORT
> > +
> > ++
> > +The first form redirects all requests to the given `HOST:PORT`
> > +to the provided `ADDRESS`(s). The second form clears all previous
> > +config values for that `HOST:PORT` combination.  To allow easy
> > +overriding of all the settings inherited from the system config,
> > +an empty value will reset all resolve information to the empty
> > +list.
>
> If I understand your use case correctly, this is not something you
> would want to hardcode in your configuration files for long term, is
> it?

Right, but I wonder if there are other use cases where people would
like to hardcode it, for example on a private network where IP
addresses rarely change. Also a config option makes it more consistent
with "http.extraHeaders" and other such options.

> I am wondering if we want to mention the expected use case here
> as well, something like
>
>     This is designed to be used primarily from the command line
>     configuration variable override, e.g.
>
>         $ git -c http.resolve=example.com:443:127.0.0.1 \
>             clone https://example.com/user/project.git
>
> perhaps?  Not a suggestion, but soliciting thoughts.

I am also interested in others' thoughts about this. If no one thinks
that a config option could be useful, I am Ok with making it a
"--resolve" command line option that can be passed to any Git command
similar to "-c <name>=<value>":

git --resolve=... <command> [<args>]

> > diff --git a/http.c b/http.c
> > index 229da4d148..e9cc46ee52 100644
> > --- a/http.c
> > +++ b/http.c
> > @@ -128,6 +128,9 @@ static struct curl_slist *pragma_header;
> >  static struct curl_slist *no_pragma_header;
> >  static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
> >
> > +static struct curl_slist *host_resolves;
> > +static struct string_list http_host_resolve = STRING_LIST_INIT_DUP;
> > +
> >  static struct active_request_slot *active_queue_head;
> >
> >  static char *cached_accept_language;
> > @@ -393,6 +396,17 @@ static int http_options(const char *var, const char *value, void *cb)
> >               return 0;
> >       }
> >
> > +     if (!strcmp("http.hostresolve", var)) {
> > +             if (!value) {
> > +                     return config_error_nonbool(var);
> > +             } else if (!*value) {
> > +                     string_list_clear(&http_host_resolve, 0);
>
> OK, this is a way to "clear" the list of entries accumulated on this
> multi-valued configuration variable so far.  And it is documented in
> the above, too.  Good.
>
> > +             } else {
> > +                     string_list_append(&http_host_resolve, value);
> > +             }
> > +             return 0;
> > +     }
> > +
> >       if (!strcmp("http.followredirects", var)) {
> >               if (value && !strcmp(value, "initial"))
> >                       http_follow_config = HTTP_FOLLOW_INITIAL;
> > @@ -985,6 +999,17 @@ static void set_from_env(const char **var, const char *envname)
> >               *var = val;
> >  }
> >
> > +static struct curl_slist *http_copy_host_resolve(void)
> > +{
> > +     struct curl_slist *hosts = NULL;
> > +     const struct string_list_item *item;
> > +
> > +     for_each_string_list_item(item, &http_host_resolve)
> > +             hosts = curl_slist_append(hosts, item->string);
> > +
> > +     return hosts;
> > +}
> > +
> >  void http_init(struct remote *remote, const char *url, int proactive_auth)
> >  {
> >       char *low_speed_limit;
> > @@ -1048,6 +1073,8 @@ void http_init(struct remote *remote, const char *url, int proactive_auth)
> >       no_pragma_header = curl_slist_append(http_copy_default_headers(),
> >               "Pragma:");
> >
> > +     host_resolves = http_copy_host_resolve();
>
> This is curious.  I imagined that the reason why you keep the
> original in a string_list and copy it to a curl_slist was perhaps
> because you'll lose the latter every time you make a curl request,
> but it does not appear to be the case.  You http_init() just once
> and then the same CURL *curl instance will be reused until you clear
> it with http_cleanup().  So I do not see offhand the need to have
> the string_list at all.

Ok, I will remove it.

> Does it work equally well if you used curl_slist_append() in
> http_options() and maintain ONLY the curl_slist version of the
> host_resolve list?  That would make it unnecessary to keep
> http_host_resolve and add http_copy_host_resolve() function, no?

Yeah, right. I will remove http_host_resolve and http_copy_host_resolve().

Thanks!

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v2] http: add custom hostname to IP address resolutions
  2022-05-02  8:36 [PATCH] http: add custom hostname to IP address resolves Christian Couder
  2022-05-02 19:04 ` Junio C Hamano
@ 2022-05-04 10:46 ` Christian Couder
  2022-05-05 11:21   ` Carlo Marcelo Arenas Belón
  2022-05-09 15:38   ` [PATCH v3] " Christian Couder
  1 sibling, 2 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-04 10:46 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Christian Couder, Derrick Stolee, Carlo Arenas

Libcurl has a CURLOPT_RESOLVE easy option that allows
the result of hostname resolution in the following
format to be passed:

	[+]HOST:PORT:ADDRESS[,ADDRESS]

This way, redirects and everything operating against the
HOST+PORT will use the provided ADDRESS(s).

The following format is also allowed to stop using
hostname resolutions that have already been passed:

	-HOST:PORT

See https://curl.se/libcurl/c/CURLOPT_RESOLVE.html for
more details.

Let's add a corresponding "http.resolve" config option
that takes advantage of CURLOPT_RESOLVE.

Each value configured for the "http.resolve" key is
passed "as is" to curl through CURLOPT_RESOLVE, so it
should be in one of the above 2 formats. This keeps the
implementation simple and makes us consistent with
libcurl's CURLOPT_RESOLVE, and with curl's corresponding
`--resolve` command line option.

The implementation uses CURLOPT_RESOLVE only in
get_active_slot() which is called by all the HTTP
request sending functions.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---

Changes since v1 are the following:

  - rename the new config option to "http.resolve" 
  - use "resolution" instead of "resolve" for the noun
  - use "format" instead of "form"
  - improved commit message and documentation
  - stop using a string_list and remove unnecessary
    related variables and functions
  - add a simple test

Thanks to Junio and Carlo for the suggestions.

As this version is very different from v1, I am not
sure a range diff would be very useful as it would
be very long compared to the size of the patch.

 Documentation/config/http.txt | 16 ++++++++++++++++
 http.c                        | 18 ++++++++++++++++++
 t/t5551-http-fetch-smart.sh   |  7 +++++++
 3 files changed, 41 insertions(+)

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 7003661c0d..86f8a5978f 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -98,6 +98,22 @@ http.version::
 	- HTTP/2
 	- HTTP/1.1
 
+http.resolve::
+	Hostname resolution information that will be used first when sending
+	HTTP requests.  This information should be in one of the following
+	formats:
+
+	- [+]HOST:PORT:ADDRESS[,ADDRESS]
+	- -HOST:PORT
+
++
+The first format redirects all requests to the given `HOST:PORT`
+to the provided `ADDRESS`(s). The second format clears all
+previous config values for that `HOST:PORT` combination.  To
+allow easy overriding of all the settings inherited from the
+system config, an empty value will reset all resolution
+information to the empty list.
+
 http.sslVersion::
 	The SSL version to use when negotiating an SSL connection, if you
 	want to force the default.  The available and default version
diff --git a/http.c b/http.c
index 229da4d148..7f3b7403ce 100644
--- a/http.c
+++ b/http.c
@@ -128,6 +128,8 @@ static struct curl_slist *pragma_header;
 static struct curl_slist *no_pragma_header;
 static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
 
+static struct curl_slist *host_resolutions;
+
 static struct active_request_slot *active_queue_head;
 
 static char *cached_accept_language;
@@ -393,6 +395,18 @@ static int http_options(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp("http.resolve", var)) {
+		if (!value) {
+			return config_error_nonbool(var);
+		} else if (!*value) {
+			curl_slist_free_all(host_resolutions);
+			host_resolutions = NULL;
+		} else {
+			host_resolutions = curl_slist_append(host_resolutions, value);
+		}
+		return 0;
+	}
+
 	if (!strcmp("http.followredirects", var)) {
 		if (value && !strcmp(value, "initial"))
 			http_follow_config = HTTP_FOLLOW_INITIAL;
@@ -1131,6 +1145,9 @@ void http_cleanup(void)
 	curl_slist_free_all(no_pragma_header);
 	no_pragma_header = NULL;
 
+	curl_slist_free_all(host_resolutions);
+	host_resolutions = NULL;
+
 	if (curl_http_proxy) {
 		free((void *)curl_http_proxy);
 		curl_http_proxy = NULL;
@@ -1211,6 +1228,7 @@ struct active_request_slot *get_active_slot(void)
 	if (curl_save_cookies)
 		curl_easy_setopt(slot->curl, CURLOPT_COOKIEJAR, curl_cookie_file);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, pragma_header);
+	curl_easy_setopt(slot->curl, CURLOPT_RESOLVE, host_resolutions);
 	curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 	curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, NULL);
 	curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, NULL);
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index f92c79c132..d97380be87 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
 	grep symref=HEAD:refs/heads/ trace
 '
 
+test_expect_success 'passing hostname resolution information works' '
+	BOGUS_HOST=gitbogusexamplehost.com &&
+	BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
+	test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
+	git -c "http.resolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
+'
+
 test_done
-- 
2.36.0.1.g9c537b8458.dirty


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-04 10:07   ` Christian Couder
@ 2022-05-04 14:34     ` Junio C Hamano
  2022-05-05 10:48       ` Christian Couder
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2022-05-04 14:34 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, Christian Couder, Derrick Stolee, Jacob Vosmaer

Christian Couder <christian.couder@gmail.com> writes:

>> The above is a reasonable summary of CURLOPT_RESOLVE documentation
>> that is appropriate to have here for those of us who are not
>> familiar with it.  For those of us who may want to learn more, it
>> would help to have an URL to the canonical documentation page, e.g.
>> https://curl.se/libcurl/c/CURLOPT_RESOLVE.html but it is not
>> required.  People should be able to find it easily.
>
> Yeah, I also thought that it wasn't required, but I will add it
> anyway, as I agree it could be useful and hopefully it doesn't change
> very often.

Ah, I didn't consider the URL going stale at all.  Forcing readers
to look for the keyword certainly is a way to avoid it, but they
will do that once they realize URL went stale, so there is not a
strong incentive to avoid recording the now-current URL, I would
think.

>> > +http.hostResolve::
>>
>> Is "host" a good prefix for it?
>>
>> In the context of HTTP(s), if there is no other thing than host that
>> we resolve, "http.resolve" may be sufficient.  For those who are
>> looking for CURLOPT_RESOLVE equivalent, "http.curloptResolve" may
>> make it easier to find.
>
> I am Ok with just "http.resolve". I think using "curlopt" is perhaps
> going into too many details about the implementation of the feature,
> which could theoretically change if we ever decided to use something
> other than curl.

You may want to step back a bit and rethink.

Even if we decide to rewrite that part of the system not to depend
on cURL, end-user facing documented interface, i.e. how the mappings
are given to the system, will stay with us, and it is clear that it
was modeled after CURLOPT_RESOLVE---well, it was stolen from them
verbatim ;-).

So we may wean ourselves off of cURL, but CURLOPT_RESOLVE will stay
with us for this particular feature.

>> I am wondering if we want to mention the expected use case here
>> as well, something like
>>
>>     This is designed to be used primarily from the command line
>>     configuration variable override, e.g.
>>
>>         $ git -c http.resolve=example.com:443:127.0.0.1 \
>>             clone https://example.com/user/project.git
>>
>> perhaps?  Not a suggestion, but soliciting thoughts.
>
> I am also interested in others' thoughts about this. If no one thinks
> that a config option could be useful, I am Ok with making it a
> "--resolve" command line option that can be passed to any Git command
> similar to "-c <name>=<value>":
>
> git --resolve=... <command> [<args>]

Absolutely not.

"git [push|fetch|clone|ls-remote] --dns-pre-resolve=..." that is
*NOT* git wide, but is only for transport commands might be a
possibility, but even then, you'd need to invent a way to do the
same for non cURL transports (we want to be able to pin the IP when
going over SSH to a certain host, for the same reason) if we promote
it to an officially supported command line option.

Unless we do that, it is probably better to leave it as an obscure
configuration meant to help server operators.  At least, with the
name of the configuration variable prefixed with http.*, we have a
valid excuse when somebody complains "the feature does not do
anything for git:// transport".

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-04 14:34     ` Junio C Hamano
@ 2022-05-05 10:48       ` Christian Couder
  2022-05-05 11:16         ` Carlo Marcelo Arenas Belón
  0 siblings, 1 reply; 20+ messages in thread
From: Christian Couder @ 2022-05-05 10:48 UTC (permalink / raw)
  To: Junio C Hamano; +Cc: git, Christian Couder, Derrick Stolee, Jacob Vosmaer

On Wed, May 4, 2022 at 4:34 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:

> > I am Ok with just "http.resolve". I think using "curlopt" is perhaps
> > going into too many details about the implementation of the feature,
> > which could theoretically change if we ever decided to use something
> > other than curl.
>
> You may want to step back a bit and rethink.
>
> Even if we decide to rewrite that part of the system not to depend
> on cURL, end-user facing documented interface, i.e. how the mappings
> are given to the system, will stay with us, and it is clear that it
> was modeled after CURLOPT_RESOLVE---well, it was stolen from them
> verbatim ;-).
>
> So we may wean ourselves off of cURL, but CURLOPT_RESOLVE will stay
> with us for this particular feature.

Yeah, the CURLOPT_RESOLVE format will stay with us, so Ok, I will
rename it "http.curloptResolve" in the next iteration then.

> >> I am wondering if we want to mention the expected use case here
> >> as well, something like
> >>
> >>     This is designed to be used primarily from the command line
> >>     configuration variable override, e.g.
> >>
> >>         $ git -c http.resolve=example.com:443:127.0.0.1 \
> >>             clone https://example.com/user/project.git
> >>
> >> perhaps?  Not a suggestion, but soliciting thoughts.
> >
> > I am also interested in others' thoughts about this. If no one thinks
> > that a config option could be useful, I am Ok with making it a
> > "--resolve" command line option that can be passed to any Git command
> > similar to "-c <name>=<value>":
> >
> > git --resolve=... <command> [<args>]
>
> Absolutely not.
>
> "git [push|fetch|clone|ls-remote] --dns-pre-resolve=..." that is
> *NOT* git wide, but is only for transport commands might be a
> possibility, but even then, you'd need to invent a way to do the
> same for non cURL transports (we want to be able to pin the IP when
> going over SSH to a certain host, for the same reason) if we promote
> it to an officially supported command line option.

Ok with renaming and implementing it only in transport commands. I
don't want, and I don't think it should be necessary, to invent a way
to do the same for non cURL transports though. I think it should be Ok
with the doc saying that the option has only been implemented for
HTTP(S) yet and will have no effect when other transports are used.

If there is a simple way to do the same thing for ssh, then I might
take a look at it later. For "file" or bundle transports, I don't
think it makes sense, and the "git" transport is not used much in big
hosting services where this feature is likely to be used.

> Unless we do that, it is probably better to leave it as an obscure
> configuration meant to help server operators.  At least, with the
> name of the configuration variable prefixed with http.*, we have a
> valid excuse when somebody complains "the feature does not do
> anything for git:// transport".

I am happy with leaving it as an obscure configuration meant to help
server operators. So I will just rename it "http.curloptResolve" in
the next iteration.

Thanks!

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-05 10:48       ` Christian Couder
@ 2022-05-05 11:16         ` Carlo Marcelo Arenas Belón
  2022-05-09 15:40           ` Christian Couder
  0 siblings, 1 reply; 20+ messages in thread
From: Carlo Marcelo Arenas Belón @ 2022-05-05 11:16 UTC (permalink / raw)
  To: Christian Couder
  Cc: Junio C Hamano, git, Christian Couder, Derrick Stolee, Jacob Vosmaer

On Thu, May 05, 2022 at 12:48:50PM +0200, Christian Couder wrote:
> On Wed, May 4, 2022 at 4:34 PM Junio C Hamano <gitster@pobox.com> wrote:
> > >
> > > I am also interested in others' thoughts about this. If no one thinks
> > > that a config option could be useful, I am Ok with making it a
> > > "--resolve" command line option that can be passed to any Git command
> > > similar to "-c <name>=<value>":
> > >
> > > git --resolve=... <command> [<args>]
> >
> > Absolutely not.
> >
> > "git [push|fetch|clone|ls-remote] --dns-pre-resolve=..." that is
> > *NOT* git wide, but is only for transport commands might be a
> > possibility, but even then, you'd need to invent a way to do the
> > same for non cURL transports (we want to be able to pin the IP when
> > going over SSH to a certain host, for the same reason) if we promote
> > it to an officially supported command line option.
> 
> Ok with renaming and implementing it only in transport commands. I
> don't want, and I don't think it should be necessary, to invent a way
> to do the same for non cURL transports though. I think it should be Ok
> with the doc saying that the option has only been implemented for
> HTTP(S) yet and will have no effect when other transports are used.

I think it will be better if git aborts with an error if it is used for
a transport that it doesn't support, instead of relying in the documentation,
though.
 
> If there is a simple way to do the same thing for ssh, then I might
> take a look at it later. For "file" or bundle transports, I don't
> think it makes sense, and the "git" transport is not used much in big
> hosting services where this feature is likely to be used.

This seems definitely useful also for ssh which is also used in big
hosting services.

Ironically, I think would be even more useful for the "git" transport
specially because it doesn't have other protections to rely on that
would help prevent spoofing (like TLS), which might be also why it is
not that widely used anymore.

Carlo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] http: add custom hostname to IP address resolutions
  2022-05-04 10:46 ` [PATCH v2] http: add custom hostname to IP address resolutions Christian Couder
@ 2022-05-05 11:21   ` Carlo Marcelo Arenas Belón
  2022-05-12  8:52     ` Christian Couder
  2022-05-09 15:38   ` [PATCH v3] " Christian Couder
  1 sibling, 1 reply; 20+ messages in thread
From: Carlo Marcelo Arenas Belón @ 2022-05-05 11:21 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, Junio C Hamano, Christian Couder, Derrick Stolee

On Wed, May 04, 2022 at 12:46:01PM +0200, Christian Couder wrote:
> diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> index f92c79c132..d97380be87 100755
> --- a/t/t5551-http-fetch-smart.sh
> +++ b/t/t5551-http-fetch-smart.sh
> @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
>  	grep symref=HEAD:refs/heads/ trace
>  '
>  
> +test_expect_success 'passing hostname resolution information works' '
> +	BOGUS_HOST=gitbogusexamplehost.com &&

minor nitpick, but better to use example.com here which is reserved for this
type of uses and therefore unlikely to conflict with a possibly assigned
domain.

Carlo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-04 10:46 ` [PATCH v2] http: add custom hostname to IP address resolutions Christian Couder
  2022-05-05 11:21   ` Carlo Marcelo Arenas Belón
@ 2022-05-09 15:38   ` Christian Couder
  2022-05-10 18:20     ` Carlo Arenas
  2022-05-16  8:38     ` [PATCH v4] " Christian Couder
  1 sibling, 2 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-09 15:38 UTC (permalink / raw)
  To: git; +Cc: Junio C Hamano, Christian Couder, Derrick Stolee, Carlo Arenas

Libcurl has a CURLOPT_RESOLVE easy option that allows
the result of hostname resolution in the following
format to be passed:

	[+]HOST:PORT:ADDRESS[,ADDRESS]

This way, redirects and everything operating against the
HOST+PORT will use the provided ADDRESS(s).

The following format is also allowed to stop using
hostname resolutions that have already been passed:

	-HOST:PORT

See https://curl.se/libcurl/c/CURLOPT_RESOLVE.html for
more details.

Let's add a corresponding "http.curloptResolve" config
option that takes advantage of CURLOPT_RESOLVE.

Each value configured for the "http.curloptResolve" key
is passed "as is" to libcurl through CURLOPT_RESOLVE, so
it should be in one of the above 2 formats. This keeps
the implementation simple and makes us consistent with
libcurl's CURLOPT_RESOLVE, and with curl's corresponding
`--resolve` command line option.

The implementation uses CURLOPT_RESOLVE only in
get_active_slot() which is called by all the HTTP
request sending functions.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---

Changes since v2 are the following:

  - option renamed from "http.resolve" to "http.curloptResolve"
  - mention "libcurl" instead of "curl" in the commit message
  - mention "libculr" in the doc to make it easier to understand
    the new option name

Range diff:

1:  28c3bf9d02 ! 1:  3d689f8a6f http: add custom hostname to IP address resolutions
    @@ Commit message
         See https://curl.se/libcurl/c/CURLOPT_RESOLVE.html for
         more details.
     
    -    Let's add a corresponding "http.resolve" config option
    -    that takes advantage of CURLOPT_RESOLVE.
    +    Let's add a corresponding "http.curloptResolve" config
    +    option that takes advantage of CURLOPT_RESOLVE.
     
    -    Each value configured for the "http.resolve" key is
    -    passed "as is" to curl through CURLOPT_RESOLVE, so it
    -    should be in one of the above 2 formats. This keeps the
    -    implementation simple and makes us consistent with
    +    Each value configured for the "http.curloptResolve" key
    +    is passed "as is" to libcurl through CURLOPT_RESOLVE, so
    +    it should be in one of the above 2 formats. This keeps
    +    the implementation simple and makes us consistent with
         libcurl's CURLOPT_RESOLVE, and with curl's corresponding
         `--resolve` command line option.
     
    @@ Documentation/config/http.txt: http.version::
        - HTTP/2
        - HTTP/1.1
      
    -+http.resolve::
    -+  Hostname resolution information that will be used first when sending
    -+  HTTP requests.  This information should be in one of the following
    -+  formats:
    ++http.curloptResolve::
    ++  Hostname resolution information that will be used first by
    ++  libcurl when sending HTTP requests.  This information should
    ++  be in one of the following formats:
     +
     +  - [+]HOST:PORT:ADDRESS[,ADDRESS]
     +  - -HOST:PORT
    @@ http.c: static int http_options(const char *var, const char *value, void *cb)
                return 0;
        }
      
    -+  if (!strcmp("http.resolve", var)) {
    ++  if (!strcmp("http.curloptresolve", var)) {
     +          if (!value) {
     +                  return config_error_nonbool(var);
     +          } else if (!*value) {
    @@ t/t5551-http-fetch-smart.sh: test_expect_success 'client falls back from v2 to v
     +  BOGUS_HOST=gitbogusexamplehost.com &&
     +  BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
     +  test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
    -+  git -c "http.resolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
    ++  git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
     +'
     +
      test_done


 Documentation/config/http.txt | 16 ++++++++++++++++
 http.c                        | 18 ++++++++++++++++++
 t/t5551-http-fetch-smart.sh   |  7 +++++++
 3 files changed, 41 insertions(+)

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 7003661c0d..179d03e57b 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -98,6 +98,22 @@ http.version::
 	- HTTP/2
 	- HTTP/1.1
 
+http.curloptResolve::
+	Hostname resolution information that will be used first by
+	libcurl when sending HTTP requests.  This information should
+	be in one of the following formats:
+
+	- [+]HOST:PORT:ADDRESS[,ADDRESS]
+	- -HOST:PORT
+
++
+The first format redirects all requests to the given `HOST:PORT`
+to the provided `ADDRESS`(s). The second format clears all
+previous config values for that `HOST:PORT` combination.  To
+allow easy overriding of all the settings inherited from the
+system config, an empty value will reset all resolution
+information to the empty list.
+
 http.sslVersion::
 	The SSL version to use when negotiating an SSL connection, if you
 	want to force the default.  The available and default version
diff --git a/http.c b/http.c
index 229da4d148..8beacb95cc 100644
--- a/http.c
+++ b/http.c
@@ -128,6 +128,8 @@ static struct curl_slist *pragma_header;
 static struct curl_slist *no_pragma_header;
 static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
 
+static struct curl_slist *host_resolutions;
+
 static struct active_request_slot *active_queue_head;
 
 static char *cached_accept_language;
@@ -393,6 +395,18 @@ static int http_options(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp("http.curloptresolve", var)) {
+		if (!value) {
+			return config_error_nonbool(var);
+		} else if (!*value) {
+			curl_slist_free_all(host_resolutions);
+			host_resolutions = NULL;
+		} else {
+			host_resolutions = curl_slist_append(host_resolutions, value);
+		}
+		return 0;
+	}
+
 	if (!strcmp("http.followredirects", var)) {
 		if (value && !strcmp(value, "initial"))
 			http_follow_config = HTTP_FOLLOW_INITIAL;
@@ -1131,6 +1145,9 @@ void http_cleanup(void)
 	curl_slist_free_all(no_pragma_header);
 	no_pragma_header = NULL;
 
+	curl_slist_free_all(host_resolutions);
+	host_resolutions = NULL;
+
 	if (curl_http_proxy) {
 		free((void *)curl_http_proxy);
 		curl_http_proxy = NULL;
@@ -1211,6 +1228,7 @@ struct active_request_slot *get_active_slot(void)
 	if (curl_save_cookies)
 		curl_easy_setopt(slot->curl, CURLOPT_COOKIEJAR, curl_cookie_file);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, pragma_header);
+	curl_easy_setopt(slot->curl, CURLOPT_RESOLVE, host_resolutions);
 	curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 	curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, NULL);
 	curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, NULL);
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index f92c79c132..4a8dbb7eee 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
 	grep symref=HEAD:refs/heads/ trace
 '
 
+test_expect_success 'passing hostname resolution information works' '
+	BOGUS_HOST=gitbogusexamplehost.com &&
+	BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
+	test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
+	git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
+'
+
 test_done
-- 
2.36.1.20.g51170867bd


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH] http: add custom hostname to IP address resolves
  2022-05-05 11:16         ` Carlo Marcelo Arenas Belón
@ 2022-05-09 15:40           ` Christian Couder
  0 siblings, 0 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-09 15:40 UTC (permalink / raw)
  To: Carlo Marcelo Arenas Belón
  Cc: Junio C Hamano, git, Christian Couder, Derrick Stolee, Jacob Vosmaer

On Thu, May 5, 2022 at 1:16 PM Carlo Marcelo Arenas Belón
<carenas@gmail.com> wrote:
> On Thu, May 05, 2022 at 12:48:50PM +0200, Christian Couder wrote:

> > Ok with renaming and implementing it only in transport commands. I
> > don't want, and I don't think it should be necessary, to invent a way
> > to do the same for non cURL transports though. I think it should be Ok
> > with the doc saying that the option has only been implemented for
> > HTTP(S) yet and will have no effect when other transports are used.
>
> I think it will be better if git aborts with an error if it is used for
> a transport that it doesn't support, instead of relying in the documentation,
> though.

This has drawbacks, as this is likely to be called through scripts or
apps and the calling code would have to be a bit more complex as
passing the option couldn't be done independently of the transport/URL
used.

Anyway until others complain, I prefer to leave it as an obscure
configuration called "http.curloptResolve" and meant to help server
operators as Junio says. This avoids wondering about such issues.

> > If there is a simple way to do the same thing for ssh, then I might
> > take a look at it later. For "file" or bundle transports, I don't
> > think it makes sense, and the "git" transport is not used much in big
> > hosting services where this feature is likely to be used.
>
> This seems definitely useful also for ssh which is also used in big
> hosting services.

I am not sure it is as useful for ssh. And maybe someone will take a
look at implementing it if that's the case.

> Ironically, I think would be even more useful for the "git" transport
> specially because it doesn't have other protections to rely on that
> would help prevent spoofing (like TLS), which might be also why it is
> not that widely used anymore.

Yeah, more secure alternatives have taken over for good.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-09 15:38   ` [PATCH v3] " Christian Couder
@ 2022-05-10 18:20     ` Carlo Arenas
  2022-05-12  8:29       ` Christian Couder
  2022-05-12 13:01       ` Patrick Steinhardt
  2022-05-16  8:38     ` [PATCH v4] " Christian Couder
  1 sibling, 2 replies; 20+ messages in thread
From: Carlo Arenas @ 2022-05-10 18:20 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, Junio C Hamano, Christian Couder, Derrick Stolee

On Mon, May 9, 2022 at 8:38 AM Christian Couder
<christian.couder@gmail.com> wrote:
> diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> index f92c79c132..4a8dbb7eee 100755
> --- a/t/t5551-http-fetch-smart.sh
> +++ b/t/t5551-http-fetch-smart.sh
> @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
>         grep symref=HEAD:refs/heads/ trace
>  '
>
> +test_expect_success 'passing hostname resolution information works' '
> +       BOGUS_HOST=gitbogusexamplehost.com &&
> +       BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
> +       test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
> +       git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
> +'

Is setting it up as a command line config option the way you expect to
use this, and if so why not make it a full blown command line option
with the previous caveats that were discussed before?

I also think it might be a little confusing (and probably warranted of
an advice message) if git will decide based on a configuration
somewhere in its resolution tree that the IP I am connecting is
different than the one I expect it to use through the system
configured resolution mechanism for such a thing.

I assume that if you want to use this frequently, having that advice
disabled in your global config wouldn't be a hassle, but it might be
useful to know that I am interacting with a potentially different IP
when referring to some host by name in my local repo, maybe because I
forgot to change that setting after some debugging.

I am sure all those folks that forget to edit their /etc/hosts after
they are done with their local site versions might instead use this
and then be happy to be warned about it later.

Carlo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-10 18:20     ` Carlo Arenas
@ 2022-05-12  8:29       ` Christian Couder
  2022-05-12 11:55         ` Carlo Arenas
  2022-05-12 13:01       ` Patrick Steinhardt
  1 sibling, 1 reply; 20+ messages in thread
From: Christian Couder @ 2022-05-12  8:29 UTC (permalink / raw)
  To: Carlo Arenas; +Cc: git, Junio C Hamano, Christian Couder, Derrick Stolee

On Tue, May 10, 2022 at 8:20 PM Carlo Arenas <carenas@gmail.com> wrote:
>
> On Mon, May 9, 2022 at 8:38 AM Christian Couder
> <christian.couder@gmail.com> wrote:
> > diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> > index f92c79c132..4a8dbb7eee 100755
> > --- a/t/t5551-http-fetch-smart.sh
> > +++ b/t/t5551-http-fetch-smart.sh
> > @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
> >         grep symref=HEAD:refs/heads/ trace
> >  '
> >
> > +test_expect_success 'passing hostname resolution information works' '
> > +       BOGUS_HOST=gitbogusexamplehost.com &&
> > +       BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
> > +       test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
> > +       git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
> > +'
>
> Is setting it up as a command line config option the way you expect to
> use this, and if so why not make it a full blown command line option
> with the previous caveats that were discussed before?

Yeah, it's how GitLab will likely use this, but this is the same for
most (if not all) config options these days in GitLab. So I don't
think it's a good criteria.

I already talked about it, but one of the issues with a command line
option is that such an option might not be worth implementing for SSH
(which might not need it) or other protocols for different reasons. So
we would have a CLI option with probably a generic name that would
actually work only with one (or a few) protocols, and we would need to
decide what to do in case this option is used along with a protocol
that it doesn't support.

> I also think it might be a little confusing (and probably warranted of
> an advice message) if git will decide based on a configuration
> somewhere in its resolution tree that the IP I am connecting is
> different than the one I expect it to use through the system
> configured resolution mechanism for such a thing.

I would be Ok to add an advice message or another kind of message
telling users that the IP used is based on the config variable. It
could break scripts parsing Git's output though (even if it's bad
practice to do so). So we would need to decide the kind of message and
its content. Suggestions welcome.

> I assume that if you want to use this frequently, having that advice
> disabled in your global config wouldn't be a hassle, but it might be
> useful to know that I am interacting with a potentially different IP
> when referring to some host by name in my local repo, maybe because I
> forgot to change that setting after some debugging.

Yeah, maybe. On the other hand GIT_CURL_VERBOSE might already be the
canonical way to debug this and might already tell about this.

Yeah, it does:

<= Recv header:
== Info: Connection #0 to host gitbogusexamplehost.com left intact
== Info: RESOLVE gitbogusexamplehost.com:5551 is - old addresses discarded!
== Info: Added gitbogusexamplehost.com:5551:127.0.0.1 to DNS cache
== Info: Couldn't find host gitbogusexamplehost.com in the .netrc
file; using defaults
== Info: Found bundle for host gitbogusexamplehost.com: 0x5556d2bd1340
[serially]
== Info: Can not multiplex, even if we wanted to!
== Info: Re-using existing connection! (#0) with host gitbogusexamplehost.com
== Info: Connected to gitbogusexamplehost.com (127.0.0.1) port 5551 (#0)

I agree it might not be very clear that it's because
"http.curloptResolve" is used though. But maybe we could output a more
explicit warning message only if GIT_CURL_VERBOSE is set.

> I am sure all those folks that forget to edit their /etc/hosts after
> they are done with their local site versions might instead use this
> and then be happy to be warned about it later.

Do you mean that those folks might like a config option ;-)

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] http: add custom hostname to IP address resolutions
  2022-05-05 11:21   ` Carlo Marcelo Arenas Belón
@ 2022-05-12  8:52     ` Christian Couder
  2022-05-12 16:22       ` Junio C Hamano
  0 siblings, 1 reply; 20+ messages in thread
From: Christian Couder @ 2022-05-12  8:52 UTC (permalink / raw)
  To: Carlo Marcelo Arenas Belón
  Cc: git, Junio C Hamano, Christian Couder, Derrick Stolee

On Thu, May 5, 2022 at 1:21 PM Carlo Marcelo Arenas Belón
<carenas@gmail.com> wrote:
>
> On Wed, May 04, 2022 at 12:46:01PM +0200, Christian Couder wrote:
> > diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> > index f92c79c132..d97380be87 100755
> > --- a/t/t5551-http-fetch-smart.sh
> > +++ b/t/t5551-http-fetch-smart.sh
> > @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
> >       grep symref=HEAD:refs/heads/ trace
> >  '
> >
> > +test_expect_success 'passing hostname resolution information works' '
> > +     BOGUS_HOST=gitbogusexamplehost.com &&
>
> minor nitpick, but better to use example.com here which is reserved for this
> type of uses and therefore unlikely to conflict with a possibly assigned
> domain.

Sorry for not replying to this earlier, but Junio previously suggested
the following:

"Perhaps invent a totally bogus domain name, map that to
localhost ::1, run a test server locally, and try to clone from that
bogus domain?"

(See: https://lore.kernel.org/git/xmqqfslrycvp.fsf@gitster.g/)

I think "a totally bogus domain name" refers to something other than
"example.com".

Also "example.com" does seem to resolve to an IP address and even has
an HTTP(S) server on it, while I think the purpose of the test would
be to check that there is not even a valid DNS resolution when the new
option is not used.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-12  8:29       ` Christian Couder
@ 2022-05-12 11:55         ` Carlo Arenas
  0 siblings, 0 replies; 20+ messages in thread
From: Carlo Arenas @ 2022-05-12 11:55 UTC (permalink / raw)
  To: Christian Couder; +Cc: git, Junio C Hamano, Christian Couder, Derrick Stolee

On Thu, May 12, 2022 at 1:30 AM Christian Couder
<christian.couder@gmail.com> wrote:
> On Tue, May 10, 2022 at 8:20 PM Carlo Arenas <carenas@gmail.com> wrote:
> > I also think it might be a little confusing (and probably warranted of
> > an advice message) if git will decide based on a configuration
> > somewhere in its resolution tree that the IP I am connecting is
> > different than the one I expect it to use through the system
> > configured resolution mechanism for such a thing.
>
> I would be Ok to add an advice message or another kind of message
> telling users that the IP used is based on the config variable. It
> could break scripts parsing Git's output though (even if it's bad
> practice to do so).

Only if they added that config option, which is an obscure one that
nobody should be using anyway, so very unlikely, right?
I also think that breaking my script could be avoided by turning off
the advice (as suggested previously), and I MIGHT want to have my
script broken if I picked up this config by mistake.

> So we would need to decide the kind of message and
> its content. Suggestions welcome.

I am not good at wording those, but I would think something innocuous like :

  "curl override detected to point name %s to %s instead of using DNS"

> > I assume that if you want to use this frequently, having that advice
> > disabled in your global config wouldn't be a hassle, but it might be
> > useful to know that I am interacting with a potentially different IP
> > when referring to some host by name in my local repo, maybe because I
> > forgot to change that setting after some debugging.
>
> Yeah, maybe. On the other hand GIT_CURL_VERBOSE might already be the
> canonical way to debug this and might already tell about this.

of course, but that is mostly used when debugging HTTP issues, not
when your DNS seems to have gone nuts, and you are looking at your
screen in disbelief because the code you were working on before lunch
and having released is now suddenly in production.

> Yeah, it does:
>
> <= Recv header:
> == Info: Connection #0 to host gitbogusexamplehost.com left intact
> == Info: RESOLVE gitbogusexamplehost.com:5551 is - old addresses discarded!
> == Info: Added gitbogusexamplehost.com:5551:127.0.0.1 to DNS cache
> == Info: Couldn't find host gitbogusexamplehost.com in the .netrc
> file; using defaults
> == Info: Found bundle for host gitbogusexamplehost.com: 0x5556d2bd1340
> [serially]
> == Info: Can not multiplex, even if we wanted to!
> == Info: Re-using existing connection! (#0) with host gitbogusexamplehost.com
> == Info: Connected to gitbogusexamplehost.com (127.0.0.1) port 5551 (#0)
>
> I agree it might not be very clear that it's because
> "http.curloptResolve" is used though. But maybe we could output a more
> explicit warning message only if GIT_CURL_VERBOSE is set.

As I pointed in my example I can see this being useful also for some
web developing which are mostly concerned about JavaScript and might
not be aware of the OSI layer stack and how to get an HTTP packet
dump, so I would think better done even when GIT_CURL_VERBOSE is not
set.

> > I am sure all those folks that forget to edit their /etc/hosts after
> > they are done with their local site versions might instead use this
> > and then be happy to be warned about it later.
>
> Do you mean that those folks might like a config option ;-)

Yes, and I think it is not THAT of an obscure config option if it even
has documentation as well ;)

Carlo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-10 18:20     ` Carlo Arenas
  2022-05-12  8:29       ` Christian Couder
@ 2022-05-12 13:01       ` Patrick Steinhardt
  2022-05-12 13:56         ` Carlo Arenas
  2022-05-12 15:58         ` Junio C Hamano
  1 sibling, 2 replies; 20+ messages in thread
From: Patrick Steinhardt @ 2022-05-12 13:01 UTC (permalink / raw)
  To: Carlo Arenas
  Cc: Christian Couder, git, Junio C Hamano, Christian Couder, Derrick Stolee

[-- Attachment #1: Type: text/plain, Size: 3394 bytes --]

On Tue, May 10, 2022 at 11:20:41AM -0700, Carlo Arenas wrote:
> On Mon, May 9, 2022 at 8:38 AM Christian Couder
> <christian.couder@gmail.com> wrote:
> > diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> > index f92c79c132..4a8dbb7eee 100755
> > --- a/t/t5551-http-fetch-smart.sh
> > +++ b/t/t5551-http-fetch-smart.sh
> > @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
> >         grep symref=HEAD:refs/heads/ trace
> >  '
> >
> > +test_expect_success 'passing hostname resolution information works' '
> > +       BOGUS_HOST=gitbogusexamplehost.com &&
> > +       BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
> > +       test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
> > +       git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
> > +'
> 
> Is setting it up as a command line config option the way you expect to
> use this, and if so why not make it a full blown command line option
> with the previous caveats that were discussed before?

If you did this as a command-line option, you'd now be forced to add it
to every single command you want to support this: git-fetch, git-pull,
git-remote, git-ls-remote and maybe others I forgot about. On the other
hand, by having this as a configuration variable in `http.c` all of
those commands benefit the same.

Furthermore, using a config option is a lot more flexible: you can
persist it at different levels of your gitconfig, can easily inject it
in a script via the use of environment variables, or directly override
it when spawning a command with `-c`.

Overall, I think it is preferable to keep this as an option as opposed
to adding such an obscure parameter to all of the commands.

> I also think it might be a little confusing (and probably warranted of
> an advice message) if git will decide based on a configuration
> somewhere in its resolution tree that the IP I am connecting is
> different than the one I expect it to use through the system
> configured resolution mechanism for such a thing.

That's true already though, isn't it? A user may set `url.*.insteadOf`
and be surprised at a later point that their URLs are getting redirected
somewhere else. And there's probably a lot more examples where a user
may be confused when forgetting about certain configuration variables
that change the way Git behaves.

I also don't think that using an advise here would be ideal. The main
use case of this configuration variable is going to be servers, and
there is a high chance that they might actually be parsing output of any
such commands. Forcing them to always disable this advise doesn't feel
like the right thing to do.

Patrick

> I assume that if you want to use this frequently, having that advice
> disabled in your global config wouldn't be a hassle, but it might be
> useful to know that I am interacting with a potentially different IP
> when referring to some host by name in my local repo, maybe because I
> forgot to change that setting after some debugging.
> 
> I am sure all those folks that forget to edit their /etc/hosts after
> they are done with their local site versions might instead use this
> and then be happy to be warned about it later.
> 
> Carlo

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-12 13:01       ` Patrick Steinhardt
@ 2022-05-12 13:56         ` Carlo Arenas
  2022-05-12 15:58         ` Junio C Hamano
  1 sibling, 0 replies; 20+ messages in thread
From: Carlo Arenas @ 2022-05-12 13:56 UTC (permalink / raw)
  To: Patrick Steinhardt
  Cc: Christian Couder, git, Junio C Hamano, Christian Couder, Derrick Stolee

On thu, May 12, 2022 at 6:01 AM Patrick Steinhardt <ps@pks.im> wrote:
> On Tue, May 10, 2022 at 11:20:41AM -0700, Carlo Arenas wrote:
> > On Mon, May 9, 2022 at 8:38 AM Christian Couder
> > <christian.couder@gmail.com> wrote:
> > > diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
> > > index f92c79c132..4a8dbb7eee 100755
> > > --- a/t/t5551-http-fetch-smart.sh
> > > +++ b/t/t5551-http-fetch-smart.sh
> > > @@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
> > >         grep symref=HEAD:refs/heads/ trace
> > >  '
> > >
> > > +test_expect_success 'passing hostname resolution information works' '
> > > +       BOGUS_HOST=gitbogusexamplehost.com &&
> > > +       BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
> > > +       test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
> > > +       git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
> > > +'
> >
> > Is setting it up as a command line config option the way you expect to
> > use this, and if so why not make it a full blown command line option
> > with the previous caveats that were discussed before?
>
> If you did this as a command-line option, you'd now be forced to add it
> to every single command you want to support this: git-fetch, git-pull,
> git-remote, git-ls-remote and maybe others I forgot about. On the other
> hand, by having this as a configuration variable in `http.c` all of
> those commands benefit the same.

There are ways to add common options to all commands that would help
here, but as Junio pointed out it is not ideal because then you have
to ALSO provide implementations, which you don't seem interested in
doing.

> Furthermore, using a config option is a lot more flexible: you can
> persist it at different levels of your gitconfig, can easily inject it
> in a script via the use of environment variables, or directly override
> it when spawning a command with `-c`.
>
> Overall, I think it is preferable to keep this as an option as opposed
> to adding such an obscure parameter to all of the commands.

I think we had already decided that a config is more flexible, even if
I personally don't agree.

> > I also think it might be a little confusing (and probably warranted of
> > an advice message) if git will decide based on a configuration
> > somewhere in its resolution tree that the IP I am connecting is
> > different than the one I expect it to use through the system
> > configured resolution mechanism for such a thing.
>
> That's true already though, isn't it? A user may set `url.*.insteadOf`
> and be surprised at a later point that their URLs are getting redirected
> somewhere else. And there's probably a lot more examples where a user
> may be confused when forgetting about certain configuration variables
> that change the way Git behaves.

That is a good point, but unlike url.*.insteadOf, this is meant to be
an obscure setting that shouldn't be enabled by default (or under
common circumstances), so having the advice there is helpful for when
we find ourselves in an unexpected situation and to avoid confusion.

I would even argue YOUR use of it in a server might even benefit from
this advice, because it could be strange to get a different IP than
the one you set in the command line if there is also another entry in
some config that you happened to read.

> I also don't think that using an advise here would be ideal. The main
> use case of this configuration variable is going to be servers.

My feedback about servers is indeed below, so I won't repeat myself
but keeping a global config that has this advice disabled in a server
shouldn't be that difficult; indeed it MIGHT be already there since
most features that are meant for interactive users (like an advice)
are better disabled in servers.

> > I assume that if you want to use this frequently, having that advice
> > disabled in your global config wouldn't be a hassle, but it might be
> > useful to know that I am interacting with a potentially different IP
> > when referring to some host by name in my local repo, maybe because I
> > forgot to change that setting after some debugging.
> >
> > I am sure all those folks that forget to edit their /etc/hosts after
> > they are done with their local site versions might instead use this
> > and then be happy to be warned about it later.
> >
> > Carlo

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v3] http: add custom hostname to IP address resolutions
  2022-05-12 13:01       ` Patrick Steinhardt
  2022-05-12 13:56         ` Carlo Arenas
@ 2022-05-12 15:58         ` Junio C Hamano
  1 sibling, 0 replies; 20+ messages in thread
From: Junio C Hamano @ 2022-05-12 15:58 UTC (permalink / raw)
  To: Patrick Steinhardt
  Cc: Carlo Arenas, Christian Couder, git, Christian Couder, Derrick Stolee

Patrick Steinhardt <ps@pks.im> writes:

>> Is setting it up as a command line config option the way you expect to
>> use this, and if so why not make it a full blown command line option
>> with the previous caveats that were discussed before?
>
> If you did this as a command-line option, you'd now be forced to add it
> to every single command you want to support this: git-fetch, git-pull,
> git-remote, git-ls-remote and maybe others I forgot about. On the other
> hand, by having this as a configuration variable in `http.c` all of
> those commands benefit the same.

It is not an argument against the command line option that you find
it more work to implement and cumbersome to plumb the information
through the callgraph, though.

Subcommands like "git commit" shouldn't have to know how host names
are mapped to IP addresses, and teaching the option only to
subcommands that the feature is relevant, and documenting the option
in their manual pages, would make it much easier to discover and
learn.

> Overall, I think it is preferable to keep this as an option as opposed
> to adding such an obscure parameter to all of the commands.

I favor implementing this as a configuration that is primarily meant
to be used from the command line (i.e. "git -c var=val"), ONLY
BECAUSE the feature itself is not something that should be widely
used (the users should futz with their DNS if they need something
more permanent), and adding it as a configuration would be a more
quick and dirty way that needs less developer resources now ;-)

To purists, it may make more sense to add this feature and make it
accessible only from the command line without matching configuration
variable---that would enforce the assumed use case (i.e. only after
another part of the system asked DNS, performed some check on the
resulting IP address, and decided to ask Git to interact with that
URL, use this mechanism to ensure Git interacts with that IP address
that was vetted, to avoid TOCTOU mistakes) more clearly.

I am personally open to such a purer counterproposal with working
code ;-)

>> I also think it might be a little confusing (and probably warranted of
>> an advice message) if git will decide based on a configuration
>> somewhere in its resolution tree that the IP I am connecting is
>> different than the one I expect it to use through the system
>> configured resolution mechanism for such a thing.
>
> That's true already though, isn't it? A user may set `url.*.insteadOf`
> and be surprised at a later point that their URLs are getting redirected
> somewhere else. And there's probably a lot more examples where a user
> may be confused when forgetting about certain configuration variables
> that change the way Git behaves.
>
> I also don't think that using an advise here would be ideal. The main
> use case of this configuration variable is going to be servers, and
> there is a high chance that they might actually be parsing output of any
> such commands. Forcing them to always disable this advise doesn't feel
> like the right thing to do.

All correct.  If the users set configuration variables, the fact
that we honored their configuration variables settings and behaved
accordingly is *NOT* an advise-worthy event.

Thanks.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] http: add custom hostname to IP address resolutions
  2022-05-12  8:52     ` Christian Couder
@ 2022-05-12 16:22       ` Junio C Hamano
  2022-05-12 18:57         ` Christian Couder
  0 siblings, 1 reply; 20+ messages in thread
From: Junio C Hamano @ 2022-05-12 16:22 UTC (permalink / raw)
  To: Christian Couder
  Cc: Carlo Marcelo Arenas Belón, git, Christian Couder, Derrick Stolee

Christian Couder <christian.couder@gmail.com> writes:

> "Perhaps invent a totally bogus domain name, map that to
> localhost ::1, run a test server locally, and try to clone from that
> bogus domain?"
>
> (See: https://lore.kernel.org/git/xmqqfslrycvp.fsf@gitster.g/)
>
> I think "a totally bogus domain name" refers to something other than
> "example.com".

I meant a domain that should not be used for purposes other than
being examples in the real world, including "example.com".

But RFC6761, which is an update to RFC2606, describes a set of
properties that make .invalid nice domain to use, including:

 1.  Users are free to use "invalid" names as they would any other
     domain names.  Users MAY assume that queries for "invalid"
     names will always return NXDOMAIN responses.

 3.  Name resolution APIs and libraries SHOULD recognize "invalid"
     names as special and SHOULD always return immediate negative
     responses.  Name resolution APIs SHOULD NOT send queries for
     "invalid" names to their configured caching DNS server(s).
       
Another possibility is ".test" but it is more for testing DNS, not
application, i.e.

 1.  Users are free to use these test names as they would any other
     domain names.  However, since there is no central authority
     responsible for use of test names, users SHOULD be aware that
     these names are likely to yield different results on different
     networks.

 3.  Name resolution APIs and libraries SHOULD NOT recognize test
     names as special and SHOULD NOT treat them differently.  Name
     resolution APIs SHOULD send queries for test names to their
     configured caching DNS server(s).

so for a code like what we are discussing, which would not want the
names to be shown to DNS and yield any IP address, ".test" makes a
poorer "bogus domain name" than ".invalid", I think.

By the way, we seem to have references to .xz top-level domain,
which appeared only in earlier drafts of what became RFC2606 (which
was updated by RFC6761) in both documentation pages and tests.  At
some point we may want to update the former to ".example" and the
latter to ".invalid" as a clean-up.

> Also "example.com" does seem to resolve to an IP address and even has
> an HTTP(S) server on it, while I think the purpose of the test would
> be to check that there is not even a valid DNS resolution when the new
> option is not used.

Yup, that makes ".invalid" a better candidate, I think.

Thanks.


^ permalink raw reply	[flat|nested] 20+ messages in thread

* Re: [PATCH v2] http: add custom hostname to IP address resolutions
  2022-05-12 16:22       ` Junio C Hamano
@ 2022-05-12 18:57         ` Christian Couder
  0 siblings, 0 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-12 18:57 UTC (permalink / raw)
  To: Junio C Hamano
  Cc: Carlo Marcelo Arenas Belón, git, Christian Couder, Derrick Stolee

On Thu, May 12, 2022 at 6:22 PM Junio C Hamano <gitster@pobox.com> wrote:
>
> Christian Couder <christian.couder@gmail.com> writes:
>
> > "Perhaps invent a totally bogus domain name, map that to
> > localhost ::1, run a test server locally, and try to clone from that
> > bogus domain?"
> >
> > (See: https://lore.kernel.org/git/xmqqfslrycvp.fsf@gitster.g/)
> >
> > I think "a totally bogus domain name" refers to something other than
> > "example.com".
>
> I meant a domain that should not be used for purposes other than
> being examples in the real world, including "example.com".

Ok, thanks for the clarification and for copying the relevant RFC
information below.

> But RFC6761, which is an update to RFC2606, describes a set of
> properties that make .invalid nice domain to use, including:
>
>  1.  Users are free to use "invalid" names as they would any other
>      domain names.  Users MAY assume that queries for "invalid"
>      names will always return NXDOMAIN responses.
>
>  3.  Name resolution APIs and libraries SHOULD recognize "invalid"
>      names as special and SHOULD always return immediate negative
>      responses.  Name resolution APIs SHOULD NOT send queries for
>      "invalid" names to their configured caching DNS server(s).

I wonder if libcurl considers itself as a name resolution library or
not. It has a DNS cache, so maybe in some ways it is. Also however it
considers itself now, it could perhaps change in the future. Even if
the current developers are against such a change, a new RFC might be
more precise and specify something for libraries like libcurl which
could make it change.

So I am not so sure that using "invalid" is our best bet.

> Another possibility is ".test" but it is more for testing DNS, not
> application, i.e.

In a way we are testing DNS, as we are actually testing libcurl's DNS
caching and its CURLOPT_RESOLVE option (even if we also test that Git
is correctly passing the config option to libcurl at the same time).

>  1.  Users are free to use these test names as they would any other
>      domain names.  However, since there is no central authority
>      responsible for use of test names, users SHOULD be aware that
>      these names are likely to yield different results on different
>      networks.
>
>  3.  Name resolution APIs and libraries SHOULD NOT recognize test
>      names as special and SHOULD NOT treat them differently.  Name
>      resolution APIs SHOULD send queries for test names to their
>      configured caching DNS server(s).

So with this we can at least expect that the way libcurl considers
itself will have no impact on our tests.

> so for a code like what we are discussing, which would not want the
> names to be shown to DNS and yield any IP address, ".test" makes a
> poorer "bogus domain name" than ".invalid", I think.

I would think that there are risks in both cases. I am Ok with using
any of the following in the test:

BOGUS_HOST=gitbogusexamplehost.invalid # or
BOGUS_HOST=gitbogusexamplehost.test

The test passes for me either way.

> By the way, we seem to have references to .xz top-level domain,
> which appeared only in earlier drafts of what became RFC2606 (which
> was updated by RFC6761) in both documentation pages and tests.  At
> some point we may want to update the former to ".example" and the
> latter to ".invalid" as a clean-up.

Yeah, good idea.

> > Also "example.com" does seem to resolve to an IP address and even has
> > an HTTP(S) server on it, while I think the purpose of the test would
> > be to check that there is not even a valid DNS resolution when the new
> > option is not used.
>
> Yup, that makes ".invalid" a better candidate, I think.

Ok, I will use "gitbogusexamplehost.invalid" in the next iteration then.

^ permalink raw reply	[flat|nested] 20+ messages in thread

* [PATCH v4] http: add custom hostname to IP address resolutions
  2022-05-09 15:38   ` [PATCH v3] " Christian Couder
  2022-05-10 18:20     ` Carlo Arenas
@ 2022-05-16  8:38     ` Christian Couder
  1 sibling, 0 replies; 20+ messages in thread
From: Christian Couder @ 2022-05-16  8:38 UTC (permalink / raw)
  To: git
  Cc: Junio C Hamano, Christian Couder, Derrick Stolee, Carlo Arenas,
	Patrick Steinhardt

Libcurl has a CURLOPT_RESOLVE easy option that allows
the result of hostname resolution in the following
format to be passed:

	[+]HOST:PORT:ADDRESS[,ADDRESS]

This way, redirects and everything operating against the
HOST+PORT will use the provided ADDRESS(s).

The following format is also allowed to stop using
hostname resolutions that have already been passed:

	-HOST:PORT

See https://curl.se/libcurl/c/CURLOPT_RESOLVE.html for
more details.

Let's add a corresponding "http.curloptResolve" config
option that takes advantage of CURLOPT_RESOLVE.

Each value configured for the "http.curloptResolve" key
is passed "as is" to libcurl through CURLOPT_RESOLVE, so
it should be in one of the above 2 formats. This keeps
the implementation simple and makes us consistent with
libcurl's CURLOPT_RESOLVE, and with curl's corresponding
`--resolve` command line option.

The implementation uses CURLOPT_RESOLVE only in
get_active_slot() which is called by all the HTTP
request sending functions.

Signed-off-by: Christian Couder <chriscool@tuxfamily.org>
---

Changes since v3:

  - Use gitbogusexamplehost.invalid instead of gitbogusexamplehost.com
    in the test.
  - Rebased on top of master at 277cf0bc36 (second 0th batch of topics
    from the previous cycle, 2022-05-11).

Range diff:

1:  3d689f8a6f ! 1:  a27ebf5988 http: add custom hostname to IP address resolutions
    @@ t/t5551-http-fetch-smart.sh: test_expect_success 'client falls back from v2 to v
      '
      
     +test_expect_success 'passing hostname resolution information works' '
    -+  BOGUS_HOST=gitbogusexamplehost.com &&
    ++  BOGUS_HOST=gitbogusexamplehost.invalid &&
     +  BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
     +  test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
     +  git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null

 Documentation/config/http.txt | 16 ++++++++++++++++
 http.c                        | 18 ++++++++++++++++++
 t/t5551-http-fetch-smart.sh   |  7 +++++++
 3 files changed, 41 insertions(+)

diff --git a/Documentation/config/http.txt b/Documentation/config/http.txt
index 7003661c0d..179d03e57b 100644
--- a/Documentation/config/http.txt
+++ b/Documentation/config/http.txt
@@ -98,6 +98,22 @@ http.version::
 	- HTTP/2
 	- HTTP/1.1
 
+http.curloptResolve::
+	Hostname resolution information that will be used first by
+	libcurl when sending HTTP requests.  This information should
+	be in one of the following formats:
+
+	- [+]HOST:PORT:ADDRESS[,ADDRESS]
+	- -HOST:PORT
+
++
+The first format redirects all requests to the given `HOST:PORT`
+to the provided `ADDRESS`(s). The second format clears all
+previous config values for that `HOST:PORT` combination.  To
+allow easy overriding of all the settings inherited from the
+system config, an empty value will reset all resolution
+information to the empty list.
+
 http.sslVersion::
 	The SSL version to use when negotiating an SSL connection, if you
 	want to force the default.  The available and default version
diff --git a/http.c b/http.c
index 229da4d148..8beacb95cc 100644
--- a/http.c
+++ b/http.c
@@ -128,6 +128,8 @@ static struct curl_slist *pragma_header;
 static struct curl_slist *no_pragma_header;
 static struct string_list extra_http_headers = STRING_LIST_INIT_DUP;
 
+static struct curl_slist *host_resolutions;
+
 static struct active_request_slot *active_queue_head;
 
 static char *cached_accept_language;
@@ -393,6 +395,18 @@ static int http_options(const char *var, const char *value, void *cb)
 		return 0;
 	}
 
+	if (!strcmp("http.curloptresolve", var)) {
+		if (!value) {
+			return config_error_nonbool(var);
+		} else if (!*value) {
+			curl_slist_free_all(host_resolutions);
+			host_resolutions = NULL;
+		} else {
+			host_resolutions = curl_slist_append(host_resolutions, value);
+		}
+		return 0;
+	}
+
 	if (!strcmp("http.followredirects", var)) {
 		if (value && !strcmp(value, "initial"))
 			http_follow_config = HTTP_FOLLOW_INITIAL;
@@ -1131,6 +1145,9 @@ void http_cleanup(void)
 	curl_slist_free_all(no_pragma_header);
 	no_pragma_header = NULL;
 
+	curl_slist_free_all(host_resolutions);
+	host_resolutions = NULL;
+
 	if (curl_http_proxy) {
 		free((void *)curl_http_proxy);
 		curl_http_proxy = NULL;
@@ -1211,6 +1228,7 @@ struct active_request_slot *get_active_slot(void)
 	if (curl_save_cookies)
 		curl_easy_setopt(slot->curl, CURLOPT_COOKIEJAR, curl_cookie_file);
 	curl_easy_setopt(slot->curl, CURLOPT_HTTPHEADER, pragma_header);
+	curl_easy_setopt(slot->curl, CURLOPT_RESOLVE, host_resolutions);
 	curl_easy_setopt(slot->curl, CURLOPT_ERRORBUFFER, curl_errorstr);
 	curl_easy_setopt(slot->curl, CURLOPT_CUSTOMREQUEST, NULL);
 	curl_easy_setopt(slot->curl, CURLOPT_READFUNCTION, NULL);
diff --git a/t/t5551-http-fetch-smart.sh b/t/t5551-http-fetch-smart.sh
index f92c79c132..b9351a732f 100755
--- a/t/t5551-http-fetch-smart.sh
+++ b/t/t5551-http-fetch-smart.sh
@@ -567,4 +567,11 @@ test_expect_success 'client falls back from v2 to v0 to match server' '
 	grep symref=HEAD:refs/heads/ trace
 '
 
+test_expect_success 'passing hostname resolution information works' '
+	BOGUS_HOST=gitbogusexamplehost.invalid &&
+	BOGUS_HTTPD_URL=$HTTPD_PROTO://$BOGUS_HOST:$LIB_HTTPD_PORT &&
+	test_must_fail git ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null &&
+	git -c "http.curloptResolve=$BOGUS_HOST:$LIB_HTTPD_PORT:127.0.0.1" ls-remote "$BOGUS_HTTPD_URL/smart/repo.git" >/dev/null
+'
+
 test_done
-- 
2.36.1.75.ga27ebf5988


^ permalink raw reply	[flat|nested] 20+ messages in thread

end of thread, other threads:[~2022-05-16  8:39 UTC | newest]

Thread overview: 20+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-05-02  8:36 [PATCH] http: add custom hostname to IP address resolves Christian Couder
2022-05-02 19:04 ` Junio C Hamano
2022-05-04 10:07   ` Christian Couder
2022-05-04 14:34     ` Junio C Hamano
2022-05-05 10:48       ` Christian Couder
2022-05-05 11:16         ` Carlo Marcelo Arenas Belón
2022-05-09 15:40           ` Christian Couder
2022-05-04 10:46 ` [PATCH v2] http: add custom hostname to IP address resolutions Christian Couder
2022-05-05 11:21   ` Carlo Marcelo Arenas Belón
2022-05-12  8:52     ` Christian Couder
2022-05-12 16:22       ` Junio C Hamano
2022-05-12 18:57         ` Christian Couder
2022-05-09 15:38   ` [PATCH v3] " Christian Couder
2022-05-10 18:20     ` Carlo Arenas
2022-05-12  8:29       ` Christian Couder
2022-05-12 11:55         ` Carlo Arenas
2022-05-12 13:01       ` Patrick Steinhardt
2022-05-12 13:56         ` Carlo Arenas
2022-05-12 15:58         ` Junio C Hamano
2022-05-16  8:38     ` [PATCH v4] " Christian Couder

Code repositories for project(s) associated with this inbox:

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).