From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 0FF0C1FAF0; Thu, 29 Mar 2018 10:28:32 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Cc: "Eric Wong (Contractor, The Linux Foundation)" Subject: [PATCH 13/14] www: cleanup expensive fallback for legacy URLs Date: Thu, 29 Mar 2018 10:28:18 +0000 Message-Id: <20180329102819.15234-14-e@80x24.org> In-Reply-To: <20180329102819.15234-1-e@80x24.org> References: <20180329102819.15234-1-e@80x24.org> List-Id: Back in the day, we compressed long Message-IDs to SHA-1 hexdigests for the URL. This now redirects to a 301 in the hopes we can remove these checks some day to reduce overhead. --- lib/PublicInbox/Inbox.pm | 11 ++++++++--- lib/PublicInbox/WWW.pm | 23 +++++++++-------------- t/plack.t | 18 ++++++++++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 01aa500..265360d 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -293,13 +293,18 @@ sub path_check { git($self)->check('HEAD:'.$path); } +sub mid2num($$) { + my ($self, $mid) = @_; + my $mm = mm($self) or return; + $mm->num_for($mid); +} + sub smsg_by_mid ($$) { my ($self, $mid) = @_; my $srch = search($self) or return; # favor the Message-ID we used for the NNTP article number: - my $mm = mm($self) or return; - my $num = $mm->num_for($mid); - $srch->lookup_article($num); + my $num = mid2num($self, $mid); + defined $num ? $srch->lookup_article($num) : undef; } sub msg_by_mid ($$;$) { diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 7bd2973..24e24f1 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -169,14 +169,15 @@ sub invalid_inbox_mid { return $ret if $ret; $ctx->{mid} = $mid; - if ($mid =~ /\A[a-f0-9]{40}\z/) { - # this is horiffically wasteful for legacy URLs: - if ($mid = mid2blob($ctx)) { - require Email::Simple; - use PublicInbox::MID qw/mid_clean/; - my $s = Email::Simple->new($mid); - $ctx->{mid} = mid_clean($s->header('Message-ID')); - } + my $ibx = $ctx->{-inbox}; + if ($mid =~ m!\A([a-f0-9]{2})([a-f0-9]{38})\z!) { + my ($x2, $x38) = ($1, $2); + # this is horrifically wasteful for legacy URLs: + my $str = $ctx->{-inbox}->msg_by_path("$x2/$x38") or return; + require Email::Simple; + my $s = Email::Simple->new($str); + $mid = PublicInbox::MID::mid_clean($s->header('Message-ID')); + return r301($ctx, $inbox, $mid); } undef; } @@ -208,12 +209,6 @@ sub get_index { } } -# just returns a string ref for the blob in the current ctx -sub mid2blob { - my ($ctx) = @_; - $ctx->{-inbox}->msg_by_mid($ctx->{mid}); -} - # /$INBOX/$MESSAGE_ID/raw -> raw mbox sub get_mid_txt { my ($ctx) = @_; diff --git a/t/plack.t b/t/plack.t index 26b0366..7eb7d7f 100644 --- a/t/plack.t +++ b/t/plack.t @@ -18,6 +18,7 @@ foreach my $mod (@mods) { } use_ok 'PublicInbox::Import'; use_ok 'PublicInbox::Git'; +my @ls; foreach my $mod (@mods) { use_ok $mod; } { @@ -55,6 +56,8 @@ EOF $im->done; my $rev = `git --git-dir="$maindir" rev-list HEAD`; like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); + @ls = `git --git-dir="$maindir" ls-tree -r --name-only HEAD`; + chomp @ls; } my $app = eval { local $ENV{PI_CONFIG} = $pi_config; @@ -198,6 +201,21 @@ EOF "$sfx redirected to /mbox.gz"); }); } + test_psgi($app, sub { + my ($cb) = @_; + # for a while, we used to support /$INBOX/$X40/ + # when we "compressed" long Message-IDs to SHA-1 + # Now we're stuck supporting them forever :< + foreach my $path (@ls) { + $path =~ tr!/!!d; + my $from = "http://example.com/test/$path/"; + my $res = $cb->(GET($from)); + is(301, $res->code, 'is permanent redirect'); + like($res->header('Location'), + qr!/test/blah\@example\.com/!, + 'redirect from x40 MIDs works'); + } + }); } done_testing(); -- EW