user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 5/5] implement legacy redirects for old URLs
  2015-08-27  4:33  7% [PATCH 0/5] prefer shorter, less-ambiguous URLs Eric Wong
@ 2015-08-27  4:34  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2015-08-27  4:34 UTC (permalink / raw)
  To: meta; +Cc: Eric Wong

We should not break existing URLs.  Redirect them to
the newer, less-ambiguous URLs to improve cache hit
ratios.
---
 lib/PublicInbox/WWW.pm | 37 ++++++++++++++++++++-----------------
 t/plack.t              | 36 +++++++++++++++++++++++++++++++++---
 2 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index ceb34d6..8058f3e 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -44,37 +44,39 @@ sub run {
 		invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
 	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
 
 	# full-message page
 	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
-	} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
 
 	# thread display
 	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
 		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
 
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
-	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!ox ||
-	         $path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox(\.gz)?\z!o) {
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) {
 		my $sfx = $3;
 		invalid_list_mid(\%ctx, $1, $2) ||
 			get_thread_mbox(\%ctx, $sfx);
 
-	} elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) {
-		invalid_list_mid(\%ctx, $1, $2) || redirect_mid_txt(\%ctx);
+	# legacy redirects
+	} elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
+		my $pfx = $2;
+		invalid_list_mid(\%ctx, $1, $3) ||
+			redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/');
+	} elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) {
+		my $pfx = $2;
+		invalid_list_mid(\%ctx, $1, $3) ||
+			redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw');
+	} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+		my $end = $3;
+		invalid_list_mid(\%ctx, $1, $2) ||
+			redirect_mid(\%ctx, 't', $end, '/mbox.gz');
 
 	# convenience redirects, order matters
 	} elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
 		my $pfx = $2;
-		invalid_list_mid(\%ctx, $1, $3) || redirect_mid(\%ctx, $2);
+		invalid_list_mid(\%ctx, $1, $3) ||
+			redirect_mid(\%ctx, $pfx, qr/\z/, '/');
 
 	} else {
 		r404();
@@ -217,13 +219,14 @@ sub redirect_list_index {
 }
 
 sub redirect_mid {
-	my ($ctx, $pfx) = @_;
+	my ($ctx, $pfx, $old, $sfx) = @_;
 	my $url = self_url($ctx->{cgi});
 	my $anchor = '';
-	if (lc($pfx) eq 't') {
+	if (lc($pfx) eq 't' && $sfx eq '/') {
 		$anchor = '#u'; # <u id='#u'> is used to highlight in View.pm
 	}
-	do_redirect($url . "/$anchor");
+	$url =~ s/$old/$sfx/;
+	do_redirect($url . $anchor);
 }
 
 # only hit when somebody tries to guess URLs manually:
diff --git a/t/plack.t b/t/plack.t
index ee77291..b3c8764 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -92,9 +92,9 @@ EOF
 			'index generated');
 	});
 
+	my $pfx = 'http://example.com/test';
 	test_psgi($app, sub {
 		my ($cb) = @_;
-		my $pfx = 'http://example.com/test';
 		my $res = $cb->(GET($pfx . '/atom.xml'));
 		is(200, $res->code, 'success response received for atom');
 		like($res->content,
@@ -105,7 +105,6 @@ EOF
 	foreach my $t (qw(f m)) {
 		test_psgi($app, sub {
 			my ($cb) = @_;
-			my $pfx = 'http://example.com/test';
 			my $path = "/$t/blah%40example.com/";
 			my $res = $cb->(GET($pfx . $path));
 			is(200, $res->code, "success for $path");
@@ -115,11 +114,42 @@ EOF
 	}
 	test_psgi($app, sub {
 		my ($cb) = @_;
-		my $pfx = 'http://example.com/test';
 		my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw'));
 		is(200, $res->code, 'success response received for /m/*/raw');
 		like($res->content, qr!\AFrom !, "mbox returned");
 	});
+
+	# legacy redirects
+	foreach my $t (qw(m f)) {
+		test_psgi($app, sub {
+			my ($cb) = @_;
+			my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt"));
+			is(301, $res->code, "redirect for old $t .txt link");
+			my $location = $res->header('Location');
+			like($location, qr!/$t/blah%40example\.com/raw\z!,
+				".txt redirected to /raw");
+		});
+	}
+	foreach my $t (qw(m f t)) {
+		test_psgi($app, sub {
+			my ($cb) = @_;
+			my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html"));
+			is(301, $res->code, "redirect for old $t .html link");
+			my $location = $res->header('Location');
+			like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!,
+				".html redirected to /raw");
+		});
+	}
+	foreach my $sfx (qw(mbox mbox.gz)) {
+		test_psgi($app, sub {
+			my ($cb) = @_;
+			my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx"));
+			is(301, $res->code, 'redirect for old thread link');
+			my $location = $res->header('Location');
+			like($location, qr!/t/blah%40example\.com/mbox\.gz\z!,
+				"$sfx redirected to /mbox.gz");
+		});
+	}
 }
 
 done_testing();
-- 
EW


^ permalink raw reply related	[relevance 5%]

* [PATCH 0/5] prefer shorter, less-ambiguous URLs
@ 2015-08-27  4:33  7% Eric Wong
  2015-08-27  4:34  5% ` [PATCH 5/5] implement legacy redirects for old URLs Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2015-08-27  4:33 UTC (permalink / raw)
  To: meta

Unfortunately, it's possible to have Message-IDs which end in '.txt',
'.html' or some other suffix we might use.  Instead of '.html',
use '/' as a suffix to allow '/raw' for the mbox version (following
a lead from gmane).

In summary:

	/m/$MESSAGE_ID.html    -> /m/$MESSAGE_ID/
	/m/$MESSAGE_ID.txt     -> /m/$MESSAGE_ID/raw
	/f/$MESSAGE_ID.html    -> /f/$MESSAGE_ID/
	/t/$MESSAGE_ID.html    -> /t/$MESSAGE_ID/
	/t/$MESSAGE_ID.mbox.gz -> /t/$MESSAGE_ID/mbox.gz

Redirects for old URLs remain in place to not break existing
links.

Eric Wong (5):
      www: minor cleanups to shorten code
      wire up shorter, less ambiguous URLs
      mid: extract Message-ID from inside '<>'
      wire up to display non-suffixed Message-ID links
      implement legacy redirects for old URLs

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-08-27  4:33  7% [PATCH 0/5] prefer shorter, less-ambiguous URLs Eric Wong
2015-08-27  4:34  5% ` [PATCH 5/5] implement legacy redirects for old URLs Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).