From 144885c29120cd4e4e64d10f6c320f5efbf9fb7b Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 8 Dec 2017 20:54:09 +0000 Subject: search: force large mbox result downloads to POST This should prevent crawlers (including most robots.txt ignoring ones) from burning our CPU time without severely compromising usability for humans. --- lib/PublicInbox/SearchView.pm | 19 ++++++++++++------- lib/PublicInbox/WWW.pm | 22 ++++++++++++++++++---- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm index 8e0c3cfc..13e9c179 100644 --- a/lib/PublicInbox/SearchView.pm +++ b/lib/PublicInbox/SearchView.pm @@ -18,12 +18,19 @@ our $LIM = 200; sub noop {} +sub mbox_results { + my ($ctx) = @_; + my $q = PublicInbox::SearchQuery->new($ctx->{qp}); + my $x = $q->{x}; + return PublicInbox::Mbox::mbox_all($ctx, $q->{'q'}) if $x eq 'm'; + sres_top_html($ctx); +} + sub sres_top_html { my ($ctx) = @_; my $q = PublicInbox::SearchQuery->new($ctx->{qp}); my $x = $q->{x}; my $query = $q->{'q'}; - return PublicInbox::Mbox::mbox_all($ctx, $query) if $x eq 'm'; my $code = 200; # double the limit for expanded views: @@ -60,7 +67,7 @@ retry: } else { return adump($_[0], $mset, $q, $ctx) if $x eq 'A'; - $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx) . "\n\n"; + $ctx->{-html_tip} = search_nav_top($mset, $q, $ctx); if ($x eq 't') { $cb = mset_thread($ctx, $mset, $q); } else { @@ -131,8 +138,8 @@ sub err_txt { sub search_nav_top { my ($mset, $q, $ctx) = @_; - - my $rv = '
';
+	my $m = $q->qs_html(x => 'm', r => undef);
+	my $rv = qq{
};
 	my $initial_q = $ctx->{-uxs_retried};
 	if (defined $initial_q) {
 		my $rewritten = $q->{'q'};
@@ -166,10 +173,8 @@ sub search_nav_top {
 	}
 	my $A = $q->qs_html(x => 'A', r => undef);
 	$rv .= qq{|Atom feed]};
-	my $m = $q->qs_html(x => 'm', r => undef);
-	warn "m: $m\n";
 	$rv .= qq{\n\t\t\t\t\t\tdownload: };
-	$rv .= qq{mbox.gz};
+	$rv .= qq{
};
 }
 
 sub search_nav_bot {
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index f3c702e9..3fd77d42 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -53,10 +53,14 @@ sub call {
 	my $path_info = $env->{PATH_INFO};
 	my $method = $env->{REQUEST_METHOD};
 
-	if ($method eq 'POST' &&
-		 $path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) {
-		my $path = $2;
-		return invalid_inbox($ctx, $1) || serve_git($ctx, $path);
+	if ($method eq 'POST') {
+		if ($path_info =~ m!$INBOX_RE/(git-upload-pack)\z!) {
+			my $path = $2;
+			return invalid_inbox($ctx, $1) ||
+				serve_git($ctx, $path);
+		} elsif ($path_info =~ m!$INBOX_RE/!o) {
+			return invalid_inbox($ctx, $1) || mbox_results($ctx);
+		}
 	}
 	elsif ($method !~ /\AGET|HEAD\z/) {
 		return r(405, 'Method Not Allowed');
@@ -400,6 +404,16 @@ sub serve_git {
 	PublicInbox::GitHTTPBackend::serve($ctx->{env}, $ctx->{git}, $path);
 }
 
+sub mbox_results {
+	my ($ctx) = @_;
+	if ($ctx->{env}->{QUERY_STRING} =~ /(?:\A|[&;])q=/) {
+		searcher($ctx) or return need_search($ctx);
+		require PublicInbox::SearchView;
+		return PublicInbox::SearchView::mbox_results($ctx);
+	}
+	r404();
+}
+
 sub serve_mbox_range {
 	my ($ctx, $inbox, $range) = @_;
 	invalid_inbox($ctx, $inbox) || eval {
-- 
cgit v1.2.3-24-ge0c7