From 144885c29120cd4e4e64d10f6c320f5efbf9fb7b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 8 Dec 2017 20:54:09 +0000 Subject: search: force large mbox result downloads to POST This should prevent crawlers (including most robots.txt ignoring ones) from burning our CPU time without severely compromising usability for humans. --- lib/PublicInbox/WWW.pm | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'lib/PublicInbox/WWW.pm') diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index f3c702e9..3fd77d42 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -53,10 +53,14 @@ sub call { my $path_info = $env->{PATH_INFO}; my $method = $env->{REQUEST_METHOD}; - if ($method eq 'POST' && - $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { - my $path = $2; - return invalid_inbox($ctx, $1) || serve_git($ctx, $path); + if ($method eq 'POST') { + if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) { + my $path = $2; + return invalid_inbox($ctx, $1) || + serve_git($ctx, $path); + } elsif ($path_info =~ m!$INBOX_RE/!o) { + return invalid_inbox($ctx, $1) || mbox_results($ctx); + } } elsif ($method !~ /\AGET|HEAD\z/) { return r(405, 'Method Not Allowed'); @@ -400,6 +404,16 @@ sub serve_git { PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path); } +sub mbox_results { + my ($ctx) = @_; + if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) { + searcher($ctx) or return need_search($ctx); + require PublicInbox::SearchView; + return PublicInbox::SearchView::mbox_results($ctx); + } + r404(); +} + sub serve_mbox_range { my ($ctx, $inbox, $range) = @_; invalid_inbox($ctx, $inbox) || eval { -- cgit v1.2.3-24-ge0c7