From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-2.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 1C286200F7 for ; Sat, 5 Sep 2015 09:01:12 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 6/6] extmsg: fall back to partial Message-ID matching Date: Sat, 5 Sep 2015 09:01:08 +0000 Message-Id: <1441443668-21092-7-git-send-email-e@80x24.org> In-Reply-To: <1441443668-21092-1-git-send-email-e@80x24.org> References: <1441443668-21092-1-git-send-email-e@80x24.org> List-Id: In case a URL gets truncated (as is common with long URLs), we can rely on Xapian for partial matches and bring the user to their destination. --- lib/PublicInbox/ExtMsg.pm | 45 +++++++++++++++++++++++++++++++++++++++------ lib/PublicInbox/Search.pm | 9 +++++++++ lib/PublicInbox/WWW.pm | 1 + 3 files changed, 49 insertions(+), 6 deletions(-) diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 7cf696d..243b6ba 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -23,7 +23,7 @@ sub ext_msg { eval { require PublicInbox::Search }; my $have_xap = $@ ? 0 : 1; - my @nox; + my (@nox, @pfx); foreach my $k (keys %$pi_config) { $k =~ /\Apublicinbox\.([A-Z0-9a-z-]+)\.url\z/ or next; @@ -40,8 +40,9 @@ sub ext_msg { # try to find the URL with Xapian to avoid forking if ($have_xap) { + my $s; my $doc_id = eval { - my $s = PublicInbox::Search->new($git_dir); + $s = PublicInbox::Search->new($git_dir); $s->find_unique_doc_id('mid', $mid); }; if ($@) { @@ -53,6 +54,7 @@ sub ext_msg { # no point in trying the fork fallback if we # know Xapian is up-to-date but missing the # message in the current repo + push @pfx, { srch => $s, url => $url }; next; } } @@ -82,19 +84,50 @@ sub ext_msg { } } + # fall back to partial MID matching + my $n_partial = 0; + my @partial; + if ($have_xap) { + my $cgi = $ctx->{cgi}; + my $url = ref($cgi) eq 'CGI' ? $cgi->url(-base) . '/' + : $cgi->base->as_string; + $url .= $listname; + unshift @pfx, { srch => $ctx->{srch}, url => $url }; + foreach my $pfx (@pfx) { + my $srch = delete $pfx->{srch} or next; + if (my $res = $srch->mid_prefix($mid)) { + $n_partial += scalar(@$res); + $pfx->{res} = $res; + push @partial, $pfx; + } + } + } my $code = 404; my $h = PublicInbox::Hval->new_msgid($mid, 1); my $href = $h->as_href; my $html = $h->as_html; my $title = "Message-ID <$html> not found"; - - # Fall back to external repos if configured my $s = "$title" . - "
$title";
+		"
$title\n";
 
+	if ($n_partial) {
+		$code = 300;
+		$s.= "\nPartial matches found:\n\n";
+		foreach my $pfx (@partial) {
+			my $u = $pfx->{url};
+			foreach my $m (@{$pfx->{res}}) {
+				$h = PublicInbox::Hval->new($m);
+				$href = $h->as_href;
+				$html = $h->as_html;
+				$s .= qq{$u/$html/\n};
+			}
+		}
+	}
+
+	# Fall back to external repos if configured
 	if (@EXT_URL) {
 		$code = 300;
-		$s .= "\n\nPerhaps try an external site:\n\n";
+		$s .= "\nPerhaps try an external site:\n\n";
 		foreach my $u (@EXT_URL) {
 			my $r = sprintf($u, $href);
 			my $t = sprintf($u, $html);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 2065055..e7ea96c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -269,4 +269,13 @@ sub enquire {
 	$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
 }
 
+sub mid_prefix {
+	my ($self, $mpfx) = @_;
+	my $query = eval { $self->qp->parse_query("m:$mpfx", FLAG_PARTIAL) };
+	return if $@;
+	my $res = $self->do_enquire($query, { relevance => 1 });
+	return unless $res->{total};
+	[ map { $_->mid } @{$res->{msgs}} ];
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2718854..8f15506 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -79,6 +79,7 @@ sub r404 {
 	my ($ctx) = @_;
 	if ($ctx && $ctx->{mid}) {
 		require PublicInbox::ExtMsg;
+		searcher($ctx);
 		return PublicInbox::ExtMsg::ext_msg($ctx);
 	}
 	r(404, 'Not Found');
-- 
EW