From 98661e7894ae4b516d7b7a9d87e451ef2bfe57ba Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 9 Jan 2019 11:43:27 +0000 Subject: newswww: add /$MESSAGE_ID global redirector endpoint This is the fallback for the normal WWW endpoint. Adding this to the top-level seems to be alright, since lynx and w3m both understand nntp:/// anyways. If newsgroup and inbox names conflict, then consider it the fault of the original sender. Since NewsWWW is intended to support buggy linkifiers in mail clients, they can interpret nntp:// URLs as http:/// Inbox ordering from the config file is preserved since commit cfa8ff7c256e20f3240aed5f98d155c019788e3b ("config: each_inbox iteration preserves config order"), so admins can rely on that to configure how scanning works. Requested-by: Konstantin Ryabitsev cf. https://public-inbox.org/meta/20190107190719.GE9442@pure.paranoia.local/ nntp://news.public-inbox.org/20190107190719.GE9442@pure.paranoia.local --- MANIFEST | 1 + lib/PublicInbox/NewsWWW.pm | 50 +++++++++++++++++++++++++++------ t/psgi_scan_all.t | 69 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 t/psgi_scan_all.t diff --git a/MANIFEST b/MANIFEST index c4a9349f..6ff2bfef 100644 --- a/MANIFEST +++ b/MANIFEST @@ -208,6 +208,7 @@ t/psgi_attach.t t/psgi_bad_mids.t t/psgi_mount.t t/psgi_multipart_not.t +t/psgi_scan_all.t t/psgi_search.t t/psgi_text.t t/psgi_v2.t diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm index 01e34d7b..d7fcb0da 100644 --- a/lib/PublicInbox/NewsWWW.pm +++ b/lib/PublicInbox/NewsWWW.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # # Plack app redirector for mapping /$NEWSGROUP requests to @@ -17,16 +17,34 @@ sub new { bless { pi_config => $pi_config }, $class; } +sub redirect ($$) { + my ($code, $url) = @_; + [ $code, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + +sub try_inbox ($$) { + my ($ibx, $mid) = @_; + # do not pass $env since HTTP_HOST may differ + my $url = $ibx->base_url or return; + + eval { $ibx->mm->num_for($mid) } or return; + + # 302 since the same message may show up on + # multiple inboxes and inboxes can be added/reordered + redirect(302, $url .= mid_escape($mid) . '/'); +} + sub call { my ($self, $env) = @_; - my $path = $env->{PATH_INFO}; - $path =~ s!\A/+!!; - $path =~ s!/+\z!!; # some links may have the article number in them: # /inbox.foo.bar/123456 - my ($ng, $article) = split(m!/+!, $path, 2); - if (my $inbox = $self->{pi_config}->lookup_newsgroup($ng)) { + my (undef, @parts) = split(m!/!, $env->{PATH_INFO}); + my ($ng, $article) = @parts; + my $pi_config = $self->{pi_config}; + if (my $inbox = $pi_config->lookup_newsgroup($ng)) { my $url = PublicInbox::Hval::prurl($env, $inbox->{url}); my $code = 301; if (defined $article && $article =~ /\A\d+\z/) { @@ -38,12 +56,26 @@ sub call { $url .= mid_escape($mid) . '/'; } } + return redirect($code, $url); + } - my $h = [ Location => $url, 'Content-Type' => 'text/plain' ]; + my $res; + my @try = (join('/', @parts)); + + # trailing slash is in the rest of our WWW, so maybe some users + # will assume it: + if ($parts[-1] eq '') { + pop @parts; + push @try, join('/', @parts); + } - return [ $code, $h, [ "Redirecting to $url\n" ] ] + foreach my $mid (@try) { + $pi_config->each_inbox(sub { + $res ||= try_inbox($_[0], $mid); + }); + last if defined $res; } - [ 404, [ 'Content-Type' => 'text/plain' ], [ "404 Not Found\n" ] ]; + $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ]; } 1; diff --git a/t/psgi_scan_all.t b/t/psgi_scan_all.t new file mode 100644 index 00000000..e9c439ec --- /dev/null +++ b/t/psgi_scan_all.t @@ -0,0 +1,69 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +use PublicInbox::Config; +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Search::Xapian + DBD::SQLite); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_scan_all.t" if $@; +} +use_ok 'PublicInbox::V2Writable'; +foreach my $mod (@mods) { use_ok $mod; } +my $tmp = tempdir('pi-scan_all-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $cfg = {}; + +foreach my $i (1..2) { + my $cfgpfx = "publicinbox.test-$i"; + my $addr = $cfg->{"$cfgpfx.address"} = "test-$i\@example.com"; + my $mainrepo = $cfg->{"$cfgpfx.mainrepo"} = "$tmp/$i"; + $cfg->{"$cfgpfx.url"} = "http://example.com/$i"; + my $opt = { + mainrepo => $mainrepo, + name => "test-$i", + version => 2, + -primary_address => $addr, + }; + my $ibx = PublicInbox::Inbox->new($opt); + my $im = PublicInbox::V2Writable->new($ibx, 1); + $im->{parallel} = 0; + $im->init_inbox(0); + my $mime = PublicInbox::MIME->new(< +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +hello world +EOF + + ok($im->add($mime), "added message to $i"); + $im->done; +} +my $config = PublicInbox::Config->new($cfg); +use_ok 'PublicInbox::WWW'; +my $www = PublicInbox::WWW->new($config); + +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + foreach my $i (1..2) { + foreach my $end ('', '/') { + my $res = $cb->(GET("/a-mid-$i\@b$end")); + is($res->code, 302, 'got 302'); + is($res->header('Location'), + "http://example.com/$i/a-mid-$i\@b/", + "redirected OK to $i"); + } + } + foreach my $x (qw(inv@lid inv@lid/ i/v/a l/i/d/)) { + my $res = $cb->(GET("/$x")); + is($res->code, 404, "404 on $x"); + } +}); + +done_testing(); -- cgit v1.2.3-24-ge0c7