git@vger.kernel.org mailing list mirror (one of many)
 help / color / mirror / code / Atom feed
From: Patrick Steinhardt <ps@pks.im>
To: git@vger.kernel.org
Cc: Patrick Steinhardt <patrick.steinhardt@elego.de>,
	Junio C Hamano <gitster@pobox.com>,
	Patrick Steinhardt <ps@pks.im>
Subject: [PATCH v5 4/5] urlmatch: include host in urlmatch ranking
Date: Tue, 31 Jan 2017 10:01:46 +0100	[thread overview]
Message-ID: <3280b309860266dd68622f8362e265c7f9834cba.1485853153.git.ps@pks.im> (raw)
In-Reply-To: <cover.1485853153.git.ps@pks.im>
In-Reply-To: <cover.1485853153.git.ps@pks.im>

From: Patrick Steinhardt <patrick.steinhardt@elego.de>

In order to be able to rank positive matches by `urlmatch`, we inspect
the path length and user part to decide whether a match is better than
another match. As all other parts are matched exactly between both URLs,
this is the correct thing to do right now.

In the future, though, we want to introduce wild cards for the domain
part. When doing this, it does not make sense anymore to only compare
the path lengths. Instead, we also want to compare the domain lengths to
determine which of both URLs matches the host part more closely.

Signed-off-by: Patrick Steinhardt <patrick.steinhardt@elego.de>
---
 t/t1300-repo-config.sh | 33 ++++++++++++++++++++++++++++
 urlmatch.c             | 59 +++++++++++++++++++++++++++++---------------------
 urlmatch.h             |  3 ++-
 3 files changed, 69 insertions(+), 26 deletions(-)

diff --git a/t/t1300-repo-config.sh b/t/t1300-repo-config.sh
index 923bfc5a2..6c844d519 100755
--- a/t/t1300-repo-config.sh
+++ b/t/t1300-repo-config.sh
@@ -1177,6 +1177,39 @@ test_expect_success 'urlmatch' '
 	test_cmp expect actual
 '
 
+test_expect_success 'urlmatch favors more specific URLs' '
+	cat >.git/config <<-\EOF &&
+	[http "https://example.com/"]
+		cookieFile = /tmp/root.txt
+	[http "https://example.com/subdirectory"]
+		cookieFile = /tmp/subdirectory.txt
+	[http "https://user@example.com/"]
+		cookieFile = /tmp/user.txt
+	[http "https://averylonguser@example.com/"]
+		cookieFile = /tmp/averylonguser.txt
+	EOF
+
+	echo http.cookiefile /tmp/root.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com/subdirectory >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://example.com/subdirectory/nested >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/user.txt >expect &&
+	git config --get-urlmatch HTTP https://user@example.com/ >actual &&
+	test_cmp expect actual &&
+
+	echo http.cookiefile /tmp/subdirectory.txt >expect &&
+	git config --get-urlmatch HTTP https://averylonguser@example.com/subdirectory >actual &&
+	test_cmp expect actual
+'
+
 # good section hygiene
 test_expect_failure 'unsetting the last key in a section removes header' '
 	cat >.git/config <<-\EOF &&
diff --git a/urlmatch.c b/urlmatch.c
index e328905eb..f79887825 100644
--- a/urlmatch.c
+++ b/urlmatch.c
@@ -426,7 +426,7 @@ static size_t url_match_prefix(const char *url,
 
 static int match_urls(const struct url_info *url,
 		      const struct url_info *url_prefix,
-		      int *exactusermatch)
+		      struct urlmatch_item *match)
 {
 	/*
 	 * url_prefix matches url if the scheme, host and port of url_prefix
@@ -445,8 +445,8 @@ static int match_urls(const struct url_info *url,
 	 * contained a user name or false if url_prefix did not have a
 	 * user name.  If there is no match *exactusermatch is left untouched.
 	 */
-	int usermatched = 0;
-	int pathmatchlen;
+	char usermatched = 0;
+	size_t pathmatchlen;
 
 	if (!url || !url_prefix || !url->url || !url_prefix->url)
 		return 0;
@@ -483,22 +483,38 @@ static int match_urls(const struct url_info *url,
 		url->url + url->path_off,
 		url_prefix->url + url_prefix->path_off,
 		url_prefix->url_len - url_prefix->path_off);
+	if (!pathmatchlen)
+		return 0; /* paths do not match */
 
-	if (pathmatchlen && exactusermatch)
-		*exactusermatch = usermatched;
-	return pathmatchlen;
+	if (match) {
+		match->hostmatch_len = url_prefix->host_len;
+		match->pathmatch_len = pathmatchlen;
+		match->user_matched = usermatched;
+	}
+
+	return 1;
+}
+
+static int cmp_matches(const struct urlmatch_item *a,
+		       const struct urlmatch_item *b)
+{
+	if (a->hostmatch_len != b->hostmatch_len)
+		return a->hostmatch_len < b->hostmatch_len ? -1 : 1;
+	if (a->pathmatch_len != b->pathmatch_len)
+		return a->pathmatch_len < b->pathmatch_len ? -1 : 1;
+	if (a->user_matched != b->user_matched)
+		return b->user_matched ? -1 : 1;
+	return 0;
 }
 
 int urlmatch_config_entry(const char *var, const char *value, void *cb)
 {
 	struct string_list_item *item;
 	struct urlmatch_config *collect = cb;
-	struct urlmatch_item *matched;
+	struct urlmatch_item matched = {0};
 	struct url_info *url = &collect->url;
 	const char *key, *dot;
 	struct strbuf synthkey = STRBUF_INIT;
-	size_t matched_len = 0;
-	int user_matched = 0;
 	int retval;
 
 	if (!skip_prefix(var, collect->section, &key) || *(key++) != '.') {
@@ -516,9 +532,9 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 		free(config_url);
 		if (!norm_url)
 			return 0;
-		matched_len = match_urls(url, &norm_info, &user_matched);
+		retval = match_urls(url, &norm_info, &matched);
 		free(norm_url);
-		if (!matched_len)
+		if (!retval)
 			return 0;
 		key = dot + 1;
 	}
@@ -528,24 +544,17 @@ int urlmatch_config_entry(const char *var, const char *value, void *cb)
 
 	item = string_list_insert(&collect->vars, key);
 	if (!item->util) {
-		matched = xcalloc(1, sizeof(*matched));
-		item->util = matched;
+		item->util = xcalloc(1, sizeof(matched));
 	} else {
-		matched = item->util;
-		/*
-		 * Is our match shorter?  Is our match the same
-		 * length, and without user while the current
-		 * candidate is with user?  Then we cannot use it.
-		 */
-		if (matched_len < matched->matched_len ||
-		    ((matched_len == matched->matched_len) &&
-		     (!user_matched && matched->user_matched)))
+		if (cmp_matches(&matched, item->util) <= 0)
+			 /*
+			  * Our match is worse than the old one,
+			  * we cannot use it.
+			  */
 			return 0;
-		/* Otherwise, replace it with this one. */
 	}
 
-	matched->matched_len = matched_len;
-	matched->user_matched = user_matched;
+	memcpy(item->util, &matched, sizeof(matched));
 	strbuf_addstr(&synthkey, collect->section);
 	strbuf_addch(&synthkey, '.');
 	strbuf_addstr(&synthkey, key);
diff --git a/urlmatch.h b/urlmatch.h
index 0ea812b03..37ee5da85 100644
--- a/urlmatch.h
+++ b/urlmatch.h
@@ -34,7 +34,8 @@ struct url_info {
 extern char *url_normalize(const char *, struct url_info *);
 
 struct urlmatch_item {
-	size_t matched_len;
+	size_t hostmatch_len;
+	size_t pathmatch_len;
 	char user_matched;
 };
 
-- 
2.11.0


  parent reply	other threads:[~2017-01-31  9:09 UTC|newest]

Thread overview: 37+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-01-23 13:06 [PATCH v1 0/2] urlmatch: allow regexp-based matches Patrick Steinhardt
2017-01-23 13:06 ` [PATCH v1 1/2] mailmap: add Patrick Steinhardt's work address Patrick Steinhardt
2017-01-23 13:06 ` [PATCH v1 2/2] urlmatch: allow regex-based URL matching Patrick Steinhardt
2017-01-23 19:53 ` [PATCH v1 0/2] urlmatch: allow regexp-based matches Junio C Hamano
2017-01-24 11:29   ` Patrick Steinhardt
2017-01-24 17:00 ` [PATCH v2 0/4] " Patrick Steinhardt
2017-01-24 17:00 ` [PATCH v2 1/4] mailmap: add Patrick Steinhardt's work address Patrick Steinhardt
2017-01-24 17:00 ` [PATCH v2 2/4] urlmatch: enable normalization of URLs with globs Patrick Steinhardt
2017-01-24 17:00 ` [PATCH v2 3/4] urlmatch: split host and port fields in `struct url_info` Patrick Steinhardt
2017-01-24 17:00 ` [PATCH v2 4/4] urlmatch: allow globbing for the URL host part Patrick Steinhardt
2017-01-24 17:52   ` Philip Oakley
2017-01-25  9:57     ` Patrick Steinhardt
2017-01-25  9:56 ` [PATCH v3 0/4] urlmatch: allow regexp-based matches Patrick Steinhardt
2017-01-25  9:56 ` [PATCH v3 1/4] mailmap: add Patrick Steinhardt's work address Patrick Steinhardt
2017-01-25  9:56 ` [PATCH v3 2/4] urlmatch: enable normalization of URLs with globs Patrick Steinhardt
2017-01-25  9:56 ` [PATCH v3 3/4] urlmatch: split host and port fields in `struct url_info` Patrick Steinhardt
2017-01-25  9:56 ` [PATCH v3 4/4] urlmatch: allow globbing for the URL host part Patrick Steinhardt
2017-01-26 20:43   ` Junio C Hamano
2017-01-26 20:49     ` Junio C Hamano
2017-01-26 21:12       ` Junio C Hamano
2017-01-27  6:21   ` Patrick Steinhardt
2017-01-27 17:45     ` Junio C Hamano
2017-01-27 10:32 ` [PATCH v4 0/5] urlmatch: allow wildcard-based matches Patrick Steinhardt
2017-01-30 22:00   ` Junio C Hamano
2017-01-30 22:52     ` Junio C Hamano
2017-01-31  8:26       ` Patrick Steinhardt
2017-01-27 10:32 ` [PATCH v4 1/5] mailmap: add Patrick Steinhardt's work address Patrick Steinhardt
2017-01-27 10:32 ` [PATCH v4 2/5] urlmatch: enable normalization of URLs with globs Patrick Steinhardt
2017-01-27 10:32 ` [PATCH v4 3/5] urlmatch: split host and port fields in `struct url_info` Patrick Steinhardt
2017-01-27 10:32 ` [PATCH v4 4/5] urlmatch: include host and port in urlmatch length Patrick Steinhardt
2017-01-27 10:32 ` [PATCH v4 5/5] urlmatch: allow globbing for the URL host part Patrick Steinhardt
2017-01-31  9:01 ` [PATCH v5 0/5] urlmatch: allow wildcard-based matches Patrick Steinhardt
2017-01-31  9:01 ` [PATCH v5 1/5] mailmap: add Patrick Steinhardt's work address Patrick Steinhardt
2017-01-31  9:01 ` [PATCH v5 2/5] urlmatch: enable normalization of URLs with globs Patrick Steinhardt
2017-01-31  9:01 ` [PATCH v5 3/5] urlmatch: split host and port fields in `struct url_info` Patrick Steinhardt
2017-01-31  9:01 ` Patrick Steinhardt [this message]
2017-01-31  9:01 ` [PATCH v5 5/5] urlmatch: allow globbing for the URL host part Patrick Steinhardt

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://vger.kernel.org/majordomo-info.html

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3280b309860266dd68622f8362e265c7f9834cba.1485853153.git.ps@pks.im \
    --to=ps@pks.im \
    --cc=git@vger.kernel.org \
    --cc=gitster@pobox.com \
    --cc=patrick.steinhardt@elego.de \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/mirrors/git.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).