From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id D03B863381D; Thu, 27 Aug 2015 04:34:06 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Cc: Eric Wong Subject: [PATCH 5/5] implement legacy redirects for old URLs Date: Thu, 27 Aug 2015 04:34:02 +0000 Message-Id: <1440650042-26176-6-git-send-email-e@80x24.org> In-Reply-To: <1440650042-26176-1-git-send-email-e@80x24.org> References: <1440650042-26176-1-git-send-email-e@80x24.org> List-Id: We should not break existing URLs. Redirect them to the newer, less-ambiguous URLs to improve cache hit ratios. --- lib/PublicInbox/WWW.pm | 37 ++++++++++++++++++++----------------- t/plack.t | 36 +++++++++++++++++++++++++++++++++--- 2 files changed, 53 insertions(+), 20 deletions(-) diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index ceb34d6..8058f3e 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -44,37 +44,39 @@ sub run { invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)/raw\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_txt(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/m/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_mid_html(\%ctx); # full-message page } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)/\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/f/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_full_html(\%ctx); # thread display } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/\z!o) { invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)\.html\z!o) { - invalid_list_mid(\%ctx, $1, $2) || get_thread(\%ctx); - - } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!ox || - $path_info =~ m!$LISTNAME_RE/t/(\S+)\.mbox(\.gz)?\z!o) { + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/mbox(\.gz)?\z!x) { my $sfx = $3; invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx); - } elsif ($path_info =~ m!$LISTNAME_RE/f/\S+\.txt\z!o) { - invalid_list_mid(\%ctx, $1, $2) || redirect_mid_txt(\%ctx); + # legacy redirects + } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) { + my $pfx = $2; + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\.html\z/, '/'); + } elsif ($path_info =~ m!$LISTNAME_RE/(m|f)/(\S+)\.txt\z!o) { + my $pfx = $2; + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\.txt\z/, '/raw'); + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)(\.mbox(?:\.gz)?)\z!o) { + my $end = $3; + invalid_list_mid(\%ctx, $1, $2) || + redirect_mid(\%ctx, 't', $end, '/mbox.gz'); # convenience redirects, order matters } elsif ($path_info =~ m!$LISTNAME_RE/(m|f|t|s)/(\S+)\z!o) { my $pfx = $2; - invalid_list_mid(\%ctx, $1, $3) || redirect_mid(\%ctx, $2); + invalid_list_mid(\%ctx, $1, $3) || + redirect_mid(\%ctx, $pfx, qr/\z/, '/'); } else { r404(); @@ -217,13 +219,14 @@ sub redirect_list_index { } sub redirect_mid { - my ($ctx, $pfx) = @_; + my ($ctx, $pfx, $old, $sfx) = @_; my $url = self_url($ctx->{cgi}); my $anchor = ''; - if (lc($pfx) eq 't') { + if (lc($pfx) eq 't' && $sfx eq '/') { $anchor = '#u'; # is used to highlight in View.pm } - do_redirect($url . "/$anchor"); + $url =~ s/$old/$sfx/; + do_redirect($url . $anchor); } # only hit when somebody tries to guess URLs manually: diff --git a/t/plack.t b/t/plack.t index ee77291..b3c8764 100644 --- a/t/plack.t +++ b/t/plack.t @@ -92,9 +92,9 @@ EOF 'index generated'); }); + my $pfx = 'http://example.com/test'; test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $res = $cb->(GET($pfx . '/atom.xml')); is(200, $res->code, 'success response received for atom'); like($res->content, @@ -105,7 +105,6 @@ EOF foreach my $t (qw(f m)) { test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $path = "/$t/blah%40example.com/"; my $res = $cb->(GET($pfx . $path)); is(200, $res->code, "success for $path"); @@ -115,11 +114,42 @@ EOF } test_psgi($app, sub { my ($cb) = @_; - my $pfx = 'http://example.com/test'; my $res = $cb->(GET($pfx . '/m/blah%40example.com/raw')); is(200, $res->code, 'success response received for /m/*/raw'); like($res->content, qr!\AFrom !, "mbox returned"); }); + + # legacy redirects + foreach my $t (qw(m f)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/$t/blah%40example.com.txt")); + is(301, $res->code, "redirect for old $t .txt link"); + my $location = $res->header('Location'); + like($location, qr!/$t/blah%40example\.com/raw\z!, + ".txt redirected to /raw"); + }); + } + foreach my $t (qw(m f t)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/$t/blah%40example.com.html")); + is(301, $res->code, "redirect for old $t .html link"); + my $location = $res->header('Location'); + like($location, qr!/$t/blah%40example\.com/(?:#u)?\z!, + ".html redirected to /raw"); + }); + } + foreach my $sfx (qw(mbox mbox.gz)) { + test_psgi($app, sub { + my ($cb) = @_; + my $res = $cb->(GET($pfx . "/t/blah%40example.com.$sfx")); + is(301, $res->code, 'redirect for old thread link'); + my $location = $res->header('Location'); + like($location, qr!/t/blah%40example\.com/mbox\.gz\z!, + "$sfx redirected to /mbox.gz"); + }); + } } done_testing(); -- EW