From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Cc: Eric Wong <e@80x24.org>
Subject: [PATCH 5/5] implement legacy redirects for old URLs
Date: Thu, 27 Aug 2015 04:34:02 +0000 [thread overview]
Message-ID: <1440650042-26176-6-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1440650042-26176-1-git-send-email-e@80x24.org>
We should not break existing URLs. Redirect them to
the newer, less-ambiguous URLs to improve cache hit
ratios.
---
lib/PublicInbox/WWW.pm | 37 ++++++++++++++++++++-----------------
t/plack.t | 36 +++++++++++++++++++++++++++++++++---
2 files changed, 53 insertions(+), 20 deletions(-)
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index ceb34d6..8058f3e 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -44,37 +44,39 @@ sub run {
invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
} elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx);
# full-message page
} elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx);
# thread display
} elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) {
invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx);
-
- } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!ox ||
- $path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox(\.gz)?\z!o) {
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) {
my $sfx = $3;
invalid_list_mid(\%ctx, $1, $2) ||
get_thread_mbox(\%ctx, $sfx);
- } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) {
- invalid_list_mid(\%ctx, $1, $2) || redirect_mid_txt(\%ctx);
+ # legacy redirects
+ } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
+ my $pfx = $2;
+ invalid_list_mid(\%ctx, $1, $3) ||
+ redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/');
+ } elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) {
+ my $pfx = $2;
+ invalid_list_mid(\%ctx, $1, $3) ||
+ redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw');
+ } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) {
+ my $end = $3;
+ invalid_list_mid(\%ctx, $1, $2) ||
+ redirect_mid(\%ctx, 't', $end, '/mbox.gz');
# convenience redirects, order matters
} elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) {
my $pfx = $2;
- invalid_list_mid(\%ctx, $1, $3) || redirect_mid(\%ctx, $2);
+ invalid_list_mid(\%ctx, $1, $3) ||
+ redirect_mid(\%ctx, $pfx, qr/\z/, '/');
} else {
r404();
@@ -217,13 +219,14 @@ sub redirect_list_index {
}
sub redirect_mid {
- my ($ctx, $pfx) = @_;
+ my ($ctx, $pfx, $old, $sfx) = @_;
my $url = self_url($ctx->{cgi});
my $anchor = '';
- if (lc($pfx) eq 't') {
+ if (lc($pfx) eq 't' && $sfx eq '/') {
$anchor = '#u'; # <u id='#u'> is used to highlight in View.pm
}
- do_redirect($url . "/$anchor");
+ $url =~ s/$old/$sfx/;
+ do_redirect($url . $anchor);
}
# only hit when somebody tries to guess URLs manually:
diff --git a/t/plack.t b/t/plack.t
index ee77291..b3c8764 100644
--- a/t/plack.t
+++ b/t/plack.t
@@ -92,9 +92,9 @@ EOF
'index generated');
});
+ my $pfx = 'http://example.com/test';
test_psgi($app, sub {
my ($cb) = @_;
- my $pfx = 'http://example.com/test';
my $res = $cb->(GET($pfx . '/atom.xml'));
is(200, $res->code, 'success response received for atom');
like($res->content,
@@ -105,7 +105,6 @@ EOF
foreach my $t (qw(f m)) {
test_psgi($app, sub {
my ($cb) = @_;
- my $pfx = 'http://example.com/test';
my $path = "/$t/blah%40example.com/";
my $res = $cb->(GET($pfx . $path));
is(200, $res->code, "success for $path");
@@ -115,11 +114,42 @@ EOF
}
test_psgi($app, sub {
my ($cb) = @_;
- my $pfx = 'http://example.com/test';
my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw'));
is(200, $res->code, 'success response received for /m/*/raw');
like($res->content, qr!\AFrom !, "mbox returned");
});
+
+ # legacy redirects
+ foreach my $t (qw(m f)) {
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt"));
+ is(301, $res->code, "redirect for old $t .txt link");
+ my $location = $res->header('Location');
+ like($location, qr!/$t/blah%40example\.com/raw\z!,
+ ".txt redirected to /raw");
+ });
+ }
+ foreach my $t (qw(m f t)) {
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html"));
+ is(301, $res->code, "redirect for old $t .html link");
+ my $location = $res->header('Location');
+ like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!,
+ ".html redirected to /raw");
+ });
+ }
+ foreach my $sfx (qw(mbox mbox.gz)) {
+ test_psgi($app, sub {
+ my ($cb) = @_;
+ my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx"));
+ is(301, $res->code, 'redirect for old thread link');
+ my $location = $res->header('Location');
+ like($location, qr!/t/blah%40example\.com/mbox\.gz\z!,
+ "$sfx redirected to /mbox.gz");
+ });
+ }
}
done_testing();
--
EW
prev parent reply other threads:[~2015-08-27 4:34 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-27 4:33 [PATCH 0/5] prefer shorter, less-ambiguous URLs Eric Wong
2015-08-27 4:33 ` [PATCH 1/5] www: minor cleanups to shorten code Eric Wong
2015-08-27 4:33 ` [PATCH 2/5] wire up shorter, less ambiguous URLs Eric Wong
2015-08-27 4:34 ` [PATCH 3/5] mid: extract Message-ID from inside '<>' Eric Wong
2015-08-27 4:34 ` [PATCH 4/5] wire up to display non-suffixed Message-ID links Eric Wong
2015-08-27 4:34 ` Eric Wong [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1440650042-26176-6-git-send-email-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).