From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id F235E1F453; Fri, 1 Feb 2019 18:31:13 +0000 (UTC) Date: Fri, 1 Feb 2019 18:31:13 +0000 From: Eric Wong To: Konstantin Ryabitsev Cc: meta@public-inbox.org Subject: [PATCH v2] newswww: add /$MESSAGE_ID global redirector endpoint Message-ID: <20190201183113.kacp665geshxxmin@dcvr> References: <20190109114327.1901-1-e@80x24.org> <20190109114327.1901-3-e@80x24.org> <20190127020608.f6yu2j3w4mfcc75b@dcvr> <20190201090056.b5ait6ebaflz3tsq@dcvr> MIME-Version: 1.0 Content-Type: text/plain; charset=utf-8 Content-Disposition: inline In-Reply-To: <20190201090056.b5ait6ebaflz3tsq@dcvr> List-Id: Eric Wong wrote: > However, my screwing up solver hrefs in the Atom feeds got me > thinking this can even be a 404 handler at the top level > (similar to how PublicInbox::NewsWWW works). That would allow > it to be mapped to any path (or domain) via the PSGI builder > file... Or just use NewsWWW, because nntp:/// is valid. Going to think about it while I eat and do other things, but will very likely merge it to master, soon. --------8<----------- Subject: [PATCH] newswww: add /$MESSAGE_ID global redirector endpoint This is the fallback for the normal WWW endpoint. Adding this to the top-level seems to be alright, since lynx and w3m both understand nntp:/// anyways. If newsgroup and inbox names conflict, then consider it the fault of the original sender. Since NewsWWW is intended to support buggy linkifiers in mail clients, they can interpret nntp:// URLs as http:/// Inbox ordering from the config file is preserved since commit cfa8ff7c256e20f3240aed5f98d155c019788e3b ("config: each_inbox iteration preserves config order"), so admins can rely on that to configure how scanning works. Requested-by: Konstantin Ryabitsev cf. https://public-inbox.org/meta/20190107190719.GE9442@pure.paranoia.local/ nntp://news.public-inbox.org/20190107190719.GE9442@pure.paranoia.local --- MANIFEST | 1 + lib/PublicInbox/NewsWWW.pm | 50 ++++++++++++++++++++++----- t/psgi_scan_all.t | 69 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 9 deletions(-) create mode 100644 t/psgi_scan_all.t diff --git a/MANIFEST b/MANIFEST index c4a9349..6ff2bfe 100644 --- a/MANIFEST +++ b/MANIFEST @@ -208,6 +208,7 @@ t/psgi_attach.t t/psgi_bad_mids.t t/psgi_mount.t t/psgi_multipart_not.t +t/psgi_scan_all.t t/psgi_search.t t/psgi_text.t t/psgi_v2.t diff --git a/lib/PublicInbox/NewsWWW.pm b/lib/PublicInbox/NewsWWW.pm index 01e34d7..d7fcb0d 100644 --- a/lib/PublicInbox/NewsWWW.pm +++ b/lib/PublicInbox/NewsWWW.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2016-2018 all contributors +# Copyright (C) 2016-2019 all contributors # License: AGPL-3.0+ # # Plack app redirector for mapping /$NEWSGROUP requests to @@ -17,16 +17,34 @@ sub new { bless { pi_config => $pi_config }, $class; } +sub redirect ($$) { + my ($code, $url) = @_; + [ $code, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + +sub try_inbox ($$) { + my ($ibx, $mid) = @_; + # do not pass $env since HTTP_HOST may differ + my $url = $ibx->base_url or return; + + eval { $ibx->mm->num_for($mid) } or return; + + # 302 since the same message may show up on + # multiple inboxes and inboxes can be added/reordered + redirect(302, $url .= mid_escape($mid) . '/'); +} + sub call { my ($self, $env) = @_; - my $path = $env->{PATH_INFO}; - $path =~ s!\A/+!!; - $path =~ s!/+\z!!; # some links may have the article number in them: # /inbox.foo.bar/123456 - my ($ng, $article) = split(m!/+!, $path, 2); - if (my $inbox = $self->{pi_config}->lookup_newsgroup($ng)) { + my (undef, @parts) = split(m!/!, $env->{PATH_INFO}); + my ($ng, $article) = @parts; + my $pi_config = $self->{pi_config}; + if (my $inbox = $pi_config->lookup_newsgroup($ng)) { my $url = PublicInbox::Hval::prurl($env, $inbox->{url}); my $code = 301; if (defined $article && $article =~ /\A\d+\z/) { @@ -38,12 +56,26 @@ sub call { $url .= mid_escape($mid) . '/'; } } + return redirect($code, $url); + } - my $h = [ Location => $url, 'Content-Type' => 'text/plain' ]; + my $res; + my @try = (join('/', @parts)); + + # trailing slash is in the rest of our WWW, so maybe some users + # will assume it: + if ($parts[-1] eq '') { + pop @parts; + push @try, join('/', @parts); + } - return [ $code, $h, [ "Redirecting to $url\n" ] ] + foreach my $mid (@try) { + $pi_config->each_inbox(sub { + $res ||= try_inbox($_[0], $mid); + }); + last if defined $res; } - [ 404, [ 'Content-Type' => 'text/plain' ], [ "404 Not Found\n" ] ]; + $res || [ 404, [qw(Content-Type text/plain)], ["404 Not Found\n"] ]; } 1; diff --git a/t/psgi_scan_all.t b/t/psgi_scan_all.t new file mode 100644 index 0000000..e9c439e --- /dev/null +++ b/t/psgi_scan_all.t @@ -0,0 +1,69 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +use PublicInbox::Config; +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Search::Xapian + DBD::SQLite); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_scan_all.t" if $@; +} +use_ok 'PublicInbox::V2Writable'; +foreach my $mod (@mods) { use_ok $mod; } +my $tmp = tempdir('pi-scan_all-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $cfg = {}; + +foreach my $i (1..2) { + my $cfgpfx = "publicinbox.test-$i"; + my $addr = $cfg->{"$cfgpfx.address"} = "test-$i\@example.com"; + my $mainrepo = $cfg->{"$cfgpfx.mainrepo"} = "$tmp/$i"; + $cfg->{"$cfgpfx.url"} = "http://example.com/$i"; + my $opt = { + mainrepo => $mainrepo, + name => "test-$i", + version => 2, + -primary_address => $addr, + }; + my $ibx = PublicInbox::Inbox->new($opt); + my $im = PublicInbox::V2Writable->new($ibx, 1); + $im->{parallel} = 0; + $im->init_inbox(0); + my $mime = PublicInbox::MIME->new(< +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +hello world +EOF + + ok($im->add($mime), "added message to $i"); + $im->done; +} +my $config = PublicInbox::Config->new($cfg); +use_ok 'PublicInbox::WWW'; +my $www = PublicInbox::WWW->new($config); + +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + foreach my $i (1..2) { + foreach my $end ('', '/') { + my $res = $cb->(GET("/a-mid-$i\@b$end")); + is($res->code, 302, 'got 302'); + is($res->header('Location'), + "http://example.com/$i/a-mid-$i\@b/", + "redirected OK to $i"); + } + } + foreach my $x (qw(inv@lid inv@lid/ i/v/a l/i/d/)) { + my $res = $cb->(GET("/$x")); + is($res->code, 404, "404 on $x"); + } +}); + +done_testing(); -- EW