From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4342C211B5; Wed, 9 Jan 2019 11:43:27 +0000 (UTC) From: Eric Wong To: Konstantin Ryabitsev Cc: meta@public-inbox.org Subject: [RFC 2/2] www: add /~/$MESSAGE_ID global redirector endpoint Date: Wed, 9 Jan 2019 11:43:27 +0000 Message-Id: <20190109114327.1901-3-e@80x24.org> In-Reply-To: <20190109114327.1901-1-e@80x24.org> References: <20190109114327.1901-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: The "/~/" is not finalized, yet. Initially I chose "/_/", but it could conflict with valid git remote names. Perhaps even "/.$MESSAGE_ID" or "/~$MESSAGE_ID" could work to save a byte. Requested-by: Konstantin Ryabitsev cf. https://public-inbox.org/meta/20190107190719.GE9442@pure.paranoia.local/ --- MANIFEST | 1 + lib/PublicInbox/WWW.pm | 44 +++++++++++++++++++++++++-- t/psgi_scan_all.t | 69 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 3 deletions(-) create mode 100644 t/psgi_scan_all.t diff --git a/MANIFEST b/MANIFEST index e4f3df8..73d1047 100644 --- a/MANIFEST +++ b/MANIFEST @@ -193,6 +193,7 @@ t/psgi_attach.t t/psgi_bad_mids.t t/psgi_mount.t t/psgi_multipart_not.t +t/psgi_scan_all.t t/psgi_search.t t/psgi_text.t t/psgi_v2.t diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 3562e46..9e0973f 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -360,11 +360,23 @@ sub legacy_redirects { r301($ctx, $1, $2, $3 eq 't' ? 't/#u' : $3); } elsif ($path_info =~ m!$INBOX_RE/(\S+/\S+)/f\z!o) { r301($ctx, $1, $2); + + # scan across all inboxes + # XXX '/~/$MESSAGE_ID' not finalized + } elsif ($path_info =~ m!\A/~/(\S+)\z!) { + scan_all($ctx, $1); } else { $ctx->{www}->news_www->call($ctx->{env}); } } +sub redirect ($$) { + my ($code, $url) = @_; + [ $code, + [ Location => $url, 'Content-Type' => 'text/plain' ], + [ "Redirecting to $url\n" ] ] +} + sub r301 { my ($ctx, $inbox, $mid_ue, $suffix) = @_; my $obj = $ctx->{-inbox}; @@ -383,9 +395,7 @@ sub r301 { $url .= $suffix if (defined $suffix); $url .= "?$qs" if $qs ne ''; - [ 301, - [ Location => $url, 'Content-Type' => 'text/plain' ], - [ "Redirecting to $url\n" ] ] + redirect(301, $url); } sub msg_page { @@ -446,4 +456,32 @@ sub get_attach { PublicInbox::WwwAttach::get_attach($ctx, $idx, $fn); } +sub scan_all { + my ($ctx, $mid) = @_; # mid may have trailing slash + + # TODO: user-sortable + + my @found; + do { + $ctx->{www}->{pi_config}->each_inbox(sub { + my ($ibx) = @_; + # do not pass $env, since HTTP_HOST can be different + my $url = $ibx->base_url or next; + + my $n = eval { $ibx->mm->num_for($mid) } or return; + + # ambiguous, so 302 instead of 301: + push @found, redirect(302, $url .= "$mid/"); + }); + + # account for trailing slash, since the rest of our API uses it + } while (!@found && $mid =~ s!/+\z!!); + + # FIXME: It's possible for a message to have the same Message-ID but + # different content across multiple groups... + @found ? $found[0] : r404(); + + # n.b. we use trailing slash in most URLs to allow "wget -r" mirrors :) +} + 1; diff --git a/t/psgi_scan_all.t b/t/psgi_scan_all.t new file mode 100644 index 0000000..bf03f22 --- /dev/null +++ b/t/psgi_scan_all.t @@ -0,0 +1,69 @@ +# Copyright (C) 2019 all contributors +# License: AGPL-3.0+ +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +use PublicInbox::Config; +my @mods = qw(HTTP::Request::Common Plack::Test URI::Escape Search::Xapian + DBD::SQLite); +foreach my $mod (@mods) { + eval "require $mod"; + plan skip_all => "$mod missing for psgi_scan_all.t" if $@; +} +use_ok 'PublicInbox::V2Writable'; +foreach my $mod (@mods) { use_ok $mod; } +my $tmp = tempdir('pi-scan_all-XXXXXX', TMPDIR => 1, CLEANUP => 1); +my $cfg = {}; + +foreach my $i (1..2) { + my $cfgpfx = "publicinbox.test-$i"; + my $addr = $cfg->{"$cfgpfx.address"} = "test-$i\@example.com"; + my $mainrepo = $cfg->{"$cfgpfx.mainrepo"} = "$tmp/$i"; + $cfg->{"$cfgpfx.url"} = "http://example.com/$i"; + my $opt = { + mainrepo => $mainrepo, + name => "test-$i", + version => 2, + -primary_address => $addr, + }; + my $ibx = PublicInbox::Inbox->new($opt); + my $im = PublicInbox::V2Writable->new($ibx, 1); + $im->{parallel} = 0; + $im->init_inbox(0); + my $mime = PublicInbox::MIME->new(< +Date: Fri, 02 Oct 1993 00:00:00 +0000 + +hello world +EOF + + ok($im->add($mime), "added message to $i"); + $im->done; +} +my $config = PublicInbox::Config->new($cfg); +use_ok 'PublicInbox::WWW'; +my $www = PublicInbox::WWW->new($config); + +test_psgi(sub { $www->call(@_) }, sub { + my ($cb) = @_; + foreach my $i (1..2) { + foreach my $end ('', '/') { + my $res = $cb->(GET("/~/a-mid-$i\@b$end")); + is($res->code, 302, 'got 302'); + is($res->header('Location'), + "http://example.com/$i/a-mid-$i\@b/", + "redirected OK to $i"); + } + } + foreach my $x (qw(inv@lid inv@lid/ i/v/a l/i/d/)) { + my $res = $cb->(GET("/~/$x")); + is($res->code, 404, "404 on $x"); + } +}); + +done_testing(); -- EW