From 3ed9085b7445fb3b7f7f5eed107dce63e4bc3bf3 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 27 Aug 2015 04:34:02 +0000 Subject: implement legacy redirects for old URLs We should not break existing URLs. Redirect them to the newer, less-ambiguous URLs to improve cache hit ratios. --- lib/PublicInbox/WWW.pm | 37 ++++++++++++++++++++----------------- t/plack.t | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index ceb34d67..8058f3e1 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -44,37 +44,39 @@ sub run { invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); # full-message page } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); # thread display } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!ox || - $path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox(\.gz)?\z!o) { + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) { my $sfx = $3; invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx); - } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || redirect_mid_txt(\%ctx); + # legacy redirects + } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) { + my $pfx = $2; + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/'); + } elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) { + my $pfx = $2; + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw'); + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { + my $end = $3; + invalid_list_mid(\%ctx, $1, $2) || + redirect_mid(\%ctx, 't', $end, '/mbox.gz'); # convenience redirects, order matters } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) { my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || redirect_mid(\%ctx, $2); + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\z/, '/'); } else { r404(); @@ -217,13 +219,14 @@ sub redirect_list_index { } sub redirect_mid { - my ($ctx, $pfx) = @_; + my ($ctx, $pfx, $old, $sfx) = @_; my $url = self_url($ctx->{cgi}); my $anchor = ''; - if (lc($pfx) eq 't') { + if (lc($pfx) eq 't' && $sfx eq '/') { $anchor = '#u'; # is used to highlight in View.pm } - do_redirect($url . "/$anchor"); + $url =~ s/$old/$sfx/; + do_redirect($url . $anchor); } # only hit when somebody tries to guess URLs manually: diff --git a/t/plack.t b/t/plack.t index ee77291c..b3c87642 100644 --- a/t/plack.t +++ b/t/plack.t @@ -92,9 +92,9 @@ EOF 'index generated'); }); + my $pfx = 'http://example.com/test'; test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $res = $cb->(GET($pfx . '/atom.xml')); is(200, $res->code, 'success response received for atom'); like($res->content, @@ -105,7 +105,6 @@ EOF foreach my $t (qw(f m)) { test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $path = "/$t/blah%40example.com/"; my $res = $cb->(GET($pfx . $path)); is(200, $res->code, "success for $path"); @@ -115,11 +114,42 @@ EOF } test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw')); is(200, $res->code, 'success response received for /m/*/raw'); like($res->content, qr!\AFrom !, "mbox returned"); }); + + # legacy redirects + foreach my $t (qw(m f)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt")); + is(301, $res->code, "redirect for old $t .txt link"); + my $location = $res->header('Location'); + like($location, qr!/$t/blah%40example\.com/raw\z!, + ".txt redirected to /raw"); + }); + } + foreach my $t (qw(m f t)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html")); + is(301, $res->code, "redirect for old $t .html link"); + my $location = $res->header('Location'); + like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!, + ".html redirected to /raw"); + }); + } + foreach my $sfx (qw(mbox mbox.gz)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx")); + is(301, $res->code, 'redirect for old thread link'); + my $location = $res->header('Location'); + like($location, qr!/t/blah%40example\.com/mbox\.gz\z!, + "$sfx redirected to /mbox.gz"); + }); + } } done_testing(); -- cgit v1.2.3-24-ge0c7