* Re: [PATCH 6/6] extmsg: fall back to partial Message-ID matching
2015-09-05 9:01 5% ` [PATCH 6/6] extmsg: fall back to partial Message-ID matching Eric Wong
@ 2015-09-05 9:14 7% ` Eric Wong
0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2015-09-05 9:14 UTC (permalink / raw)
To: meta
Eric Wong <e@80x24.org> wrote:
> In case a URL gets truncated (as is common with long URLs),
> we can rely on Xapian for partial matches and bring the user
> to their destination.
Note: this is a bit half-assed and does not work when Message-IDs
is broken into multiple terms (common).
Perhaps a prefix trie is necessary; but a good on-disk one does
not seem to readily exist in Debian (or anywhere) for Perl?
Oh well, this is a rare feature.
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 243b6ba..77537c2 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -95,6 +95,8 @@ sub ext_msg {
unshift @pfx, { srch => $ctx->{srch}, url => $url };
foreach my $pfx (@pfx) {
my $srch = delete $pfx->{srch} or next;
+
+ # FIXME we may need a proper prefix trie here...
if (my $res = $srch->mid_prefix($mid)) {
$n_partial += scalar(@$res);
$pfx->{res} = $res;
^ permalink raw reply related [relevance 7%]
* [PATCH 6/6] extmsg: fall back to partial Message-ID matching
@ 2015-09-05 9:01 5% ` Eric Wong
2015-09-05 9:14 7% ` Eric Wong
0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2015-09-05 9:01 UTC (permalink / raw)
To: meta
In case a URL gets truncated (as is common with long URLs),
we can rely on Xapian for partial matches and bring the user
to their destination.
---
lib/PublicInbox/ExtMsg.pm | 45 +++++++++++++++++++++++++++++++++++++++------
lib/PublicInbox/Search.pm | 9 +++++++++
lib/PublicInbox/WWW.pm | 1 +
3 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index 7cf696d..243b6ba 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -23,7 +23,7 @@ sub ext_msg {
eval { require PublicInbox::Search };
my $have_xap = $@ ? 0 : 1;
- my @nox;
+ my (@nox, @pfx);
foreach my $k (keys %$pi_config) {
$k =~ /\Apublicinbox\.([A-Z0-9a-z-]+)\.url\z/ or next;
@@ -40,8 +40,9 @@ sub ext_msg {
# try to find the URL with Xapian to avoid forking
if ($have_xap) {
+ my $s;
my $doc_id = eval {
- my $s = PublicInbox::Search->new($git_dir);
+ $s = PublicInbox::Search->new($git_dir);
$s->find_unique_doc_id('mid', $mid);
};
if ($@) {
@@ -53,6 +54,7 @@ sub ext_msg {
# no point in trying the fork fallback if we
# know Xapian is up-to-date but missing the
# message in the current repo
+ push @pfx, { srch => $s, url => $url };
next;
}
}
@@ -82,19 +84,50 @@ sub ext_msg {
}
}
+ # fall back to partial MID matching
+ my $n_partial = 0;
+ my @partial;
+ if ($have_xap) {
+ my $cgi = $ctx->{cgi};
+ my $url = ref($cgi) eq 'CGI' ? $cgi->url(-base) . '/'
+ : $cgi->base->as_string;
+ $url .= $listname;
+ unshift @pfx, { srch => $ctx->{srch}, url => $url };
+ foreach my $pfx (@pfx) {
+ my $srch = delete $pfx->{srch} or next;
+ if (my $res = $srch->mid_prefix($mid)) {
+ $n_partial += scalar(@$res);
+ $pfx->{res} = $res;
+ push @partial, $pfx;
+ }
+ }
+ }
my $code = 404;
my $h = PublicInbox::Hval->new_msgid($mid, 1);
my $href = $h->as_href;
my $html = $h->as_html;
my $title = "Message-ID <$html> not found";
-
- # Fall back to external repos if configured
my $s = "<html><head><title>$title</title>" .
- "</head><body><pre><b>$title</b>";
+ "</head><body><pre><b>$title</b>\n";
+ if ($n_partial) {
+ $code = 300;
+ $s.= "\nPartial matches found:\n\n";
+ foreach my $pfx (@partial) {
+ my $u = $pfx->{url};
+ foreach my $m (@{$pfx->{res}}) {
+ $h = PublicInbox::Hval->new($m);
+ $href = $h->as_href;
+ $html = $h->as_html;
+ $s .= qq{<a\nhref="$u/$href/">$u/$html/</a>\n};
+ }
+ }
+ }
+
+ # Fall back to external repos if configured
if (@EXT_URL) {
$code = 300;
- $s .= "\n\nPerhaps try an external site:\n\n";
+ $s .= "\nPerhaps try an external site:\n\n";
foreach my $u (@EXT_URL) {
my $r = sprintf($u, $href);
my $t = sprintf($u, $html);
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 2065055..e7ea96c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -269,4 +269,13 @@ sub enquire {
$self->{enquire} ||= Search::Xapian::Enquire->new($self->{xdb});
}
+sub mid_prefix {
+ my ($self, $mpfx) = @_;
+ my $query = eval { $self->qp->parse_query("m:$mpfx", FLAG_PARTIAL) };
+ return if $@;
+ my $res = $self->do_enquire($query, { relevance => 1 });
+ return unless $res->{total};
+ [ map { $_->mid } @{$res->{msgs}} ];
+}
+
1;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 2718854..8f15506 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -79,6 +79,7 @@ sub r404 {
my ($ctx) = @_;
if ($ctx && $ctx->{mid}) {
require PublicInbox::ExtMsg;
+ searcher($ctx);
return PublicInbox::ExtMsg::ext_msg($ctx);
}
r(404, 'Not Found');
--
EW
^ permalink raw reply related [relevance 5%]
Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2015-09-05 9:01 [PATCH 0/6] another round of search updates Eric Wong
2015-09-05 9:01 5% ` [PATCH 6/6] extmsg: fall back to partial Message-ID matching Eric Wong
2015-09-05 9:14 7% ` Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).