user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* Re: [PATCH 6/6] extmsg: fall back to partial Message-ID matching
  2015-09-05  9:01  5% ` [PATCH 6/6] extmsg: fall back to partial Message-ID matching Eric Wong
@ 2015-09-05  9:14  7%   ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2015-09-05  9:14 UTC (permalink / raw)
  To: meta

Eric Wong <e@80x24.org> wrote:
> In case a URL gets truncated (as is common with long URLs),
> we can rely on Xapian for partial matches and bring the user
> to their destination.

Note: this is a bit half-assed and does not work when Message-IDs
is broken into multiple terms (common).

Perhaps a prefix trie is necessary; but a good on-disk one does
not seem to readily exist in Debian (or anywhere) for Perl?
Oh well, this is a rare feature.

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 243b6ba..77537c2 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -95,6 +95,8 @@ sub ext_msg {
 		unshift @pfx, { srch => $ctx->{srch}, url => $url };
 		foreach my $pfx (@pfx) {
 			my $srch = delete $pfx->{srch} or next;
+
+			# FIXME we may need a proper prefix trie here...
 			if (my $res = $srch->mid_prefix($mid)) {
 				$n_partial += scalar(@$res);
 				$pfx->{res} = $res;

^ permalink raw reply related	[relevance 7%]

* [PATCH 6/6] extmsg: fall back to partial Message-ID matching
  @ 2015-09-05  9:01  5% ` Eric Wong
  2015-09-05  9:14  7%   ` Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2015-09-05  9:01 UTC (permalink / raw)
  To: meta

In case a URL gets truncated (as is common with long URLs),
we can rely on Xapian for partial matches and bring the user
to their destination.
---
 lib/PublicInbox/ExtMsg.pm | 45 +++++++++++++++++++++++++++++++++++++++------
 lib/PublicInbox/Search.pm |  9 +++++++++
 lib/PublicInbox/WWW.pm    |  1 +
 3 files changed, 49 insertions(+), 6 deletions(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 7cf696d..243b6ba 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -23,7 +23,7 @@ sub ext_msg {
 
 	eval { require PublicInbox::Search };
 	my $have_xap = $@ ? 0 : 1;
-	my @nox;
+	my (@nox, @pfx);
 
 	foreach my $k (keys %$pi_config) {
 		$k =~ /\Apublicinbox\.([A-Z0-9a-z-]+)\.url\z/ or next;
@@ -40,8 +40,9 @@ sub ext_msg {
 
 		# try to find the URL with Xapian to avoid forking
 		if ($have_xap) {
+			my $s;
 			my $doc_id = eval {
-				my $s = PublicInbox::Search->new($git_dir);
+				$s = PublicInbox::Search->new($git_dir);
 				$s->find_unique_doc_id('mid', $mid);
 			};
 			if ($@) {
@@ -53,6 +54,7 @@ sub ext_msg {
 				# no point in trying the fork fallback if we
 				# know Xapian is up-to-date but missing the
 				# message in the current repo
+				push @pfx, { srch => $s, url => $url };
 				next;
 			}
 		}
@@ -82,19 +84,50 @@ sub ext_msg {
 		}
 	}
 
+	# fall back to partial MID matching
+	my $n_partial = 0;
+	my @partial;
+	if ($have_xap) {
+		my $cgi = $ctx->{cgi};
+		my $url = ref($cgi) eq 'CGI' ? $cgi->url(-base) . '/'
+					: $cgi->base->as_string;
+		$url .= $listname;
+		unshift @pfx, { srch => $ctx->{srch}, url => $url };
+		foreach my $pfx (@pfx) {
+			my $srch = delete $pfx->{srch} or next;
+			if (my $res = $srch->mid_prefix($mid)) {
+				$n_partial += scalar(@$res);
+				$pfx->{res} = $res;
+				push @partial, $pfx;
+			}
+		}
+	}
 	my $code = 404;
 	my $h = PublicInbox::Hval->new_msgid($mid, 1);
 	my $href = $h->as_href;
 	my $html = $h->as_html;
 	my $title = "Message-ID &lt;$html&gt; not found";
-
-	# Fall back to external repos if configured
 	my $s = "<html><head><title>$title</title>" .
-		"</head><body><pre><b>$title</b>";
+		"</head><body><pre><b>$title</b>\n";
 
+	if ($n_partial) {
+		$code = 300;
+		$s.= "\nPartial matches found:\n\n";
+		foreach my $pfx (@partial) {
+			my $u = $pfx->{url};
+			foreach my $m (@{$pfx->{res}}) {
+				$h = PublicInbox::Hval->new($m);
+				$href = $h->as_href;
+				$html = $h->as_html;
+				$s .= qq{<a\nhref="$u/$href/">$u/$html/</a>\n};
+			}
+		}
+	}
+
+	# Fall back to external repos if configured
 	if (@EXT_URL) {
 		$code = 300;
-		$s .= "\n\nPerhaps try an external site:\n\n";
+		$s .= "\nPerhaps try an external site:\n\n";
 		foreach my $u (@EXT_URL) {
 			my $r = sprintf($u, $href);
 			my $t = sprintf($u, $html);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 2065055..e7ea96c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -269,4 +269,13 @@ sub enquire {
 	$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
 }
 
+sub mid_prefix {
+	my ($self, $mpfx) = @_;
+	my $query = eval { $self->qp->parse_query("m:$mpfx", FLAG_PARTIAL) };
+	return if $@;
+	my $res = $self->do_enquire($query, { relevance => 1 });
+	return unless $res->{total};
+	[ map { $_->mid } @{$res->{msgs}} ];
+}
+
 1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2718854..8f15506 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -79,6 +79,7 @@ sub r404 {
 	my ($ctx) = @_;
 	if ($ctx && $ctx->{mid}) {
 		require PublicInbox::ExtMsg;
+		searcher($ctx);
 		return PublicInbox::ExtMsg::ext_msg($ctx);
 	}
 	r(404, 'Not Found');
-- 
EW


^ permalink raw reply related	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-09-05  9:01     [PATCH 0/6] another round of search updates Eric Wong
2015-09-05  9:01  5% ` [PATCH 6/6] extmsg: fall back to partial Message-ID matching Eric Wong
2015-09-05  9:14  7%   ` Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).