* [PATCH 02/12] extmsg: fix broken Xapian MID lookup
@ 2018-02-22 21:42 6% ` Eric Wong (Contractor, The Linux Foundation)
0 siblings, 0 replies; 4+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-02-22 21:42 UTC (permalink / raw)
To: meta
This likely has no real world implications, though, as we
fall back to Msgmap lookups anyways.
Broken since commit 7eeadcb62729b0efbcb53cd9b7b181897c92cf9a
("search: remove unnecessary abstractions and functionality")
---
lib/PublicInbox/ExtMsg.pm | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index ab9591f..4e31ef0 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -46,7 +46,7 @@ sub ext_msg {
}
# try to find the URL with Xapian to avoid forking
- my $doc_id = eval { $s->find_unique_doc_id('mid', $mid) };
+ my $doc_id = eval { $s->find_unique_doc_id('XMID' . $mid) };
if ($@) {
# xapian not configured properly for this repo
push @nox, $other;
--
EW
^ permalink raw reply related [relevance 6%]
* [PATCH 4/3] searchidx: remove messages correctly from Xapian index
2017-06-14 0:14 6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
@ 2017-06-15 23:11 6% ` Eric Wong
0 siblings, 0 replies; 4+ results
From: Eric Wong @ 2017-06-15 23:11 UTC (permalink / raw)
To: meta
This fixes a bug introduced in
commit 7eeadcb62729b0efbcb53cd9b7b181897c92cf9a
("search: remove unnecessary abstractions and functionality")
---
lib/PublicInbox/SearchIdx.pm | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 9ba9437..69b7a6f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -366,8 +366,12 @@ sub remove_message {
$mid = mid_clean($mid);
eval {
- $doc_id = $self->find_unique_doc_id('mid', $mid);
- $db->delete_document($doc_id) if defined $doc_id;
+ $doc_id = $self->find_unique_doc_id('Q' . $mid);
+ if (defined $doc_id) {
+ $db->delete_document($doc_id);
+ } else {
+ warn "cannot remove non-existent <$mid>\n";
+ }
};
if ($@) {
--
EW
^ permalink raw reply related [relevance 6%]
* [PATCH 1/3] search: remove unnecessary abstractions and functionality
2017-06-14 0:14 7% [PATCH 0/3] search improvements Eric Wong
@ 2017-06-14 0:14 6% ` Eric Wong
2017-06-15 23:11 6% ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
0 siblings, 1 reply; 4+ results
From: Eric Wong @ 2017-06-14 0:14 UTC (permalink / raw)
To: meta
This simplifies the code a bit and reduces the translation
overhead for looking directly at data from tools shipped
with Xapian.
While we're at it, fix thread-all.t :)
---
lib/PublicInbox/Search.pm | 31 +++++++++----------------------
lib/PublicInbox/SearchIdx.pm | 20 +++++++++-----------
lib/PublicInbox/SearchMsg.pm | 2 +-
t/search.t | 9 +--------
4 files changed, 20 insertions(+), 42 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 82a6e54..67837f4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -56,8 +56,6 @@ my %bool_pfx_internal = (
);
my %bool_pfx_external = (
- # do we still need these? probably not..
- path => 'XPATH',
mid => 'Q', # uniQue id (Message-ID)
);
@@ -107,11 +105,7 @@ chomp @HELP;
# da (diff a/ removed lines)
# db (diff b/ added lines)
-my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
-
-sub xpfx { $all_pfx{$_[0]} }
-
-my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
+my $mail_query = Search::Xapian::Query->new('T' . 'mail');
sub xdir {
my (undef, $git_dir) = @_;
@@ -146,11 +140,11 @@ sub get_thread {
my $smsg = eval { $self->lookup_message($mid) };
return { total => 0, msgs => [] } unless $smsg;
- my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id);
+ my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
my $path = $smsg->path;
if (defined $path && $path ne '') {
my $path = id_compress($smsg->path);
- my $qsub = Search::Xapian::Query->new(xpfx('path').$path);
+ my $qsub = Search::Xapian::Query->new('XPATH' . $path);
$qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
}
$opts ||= {};
@@ -279,7 +273,7 @@ sub lookup_message {
my ($self, $mid) = @_;
$mid = mid_clean($mid);
- my $doc_id = $self->find_unique_doc_id('mid', $mid);
+ my $doc_id = $self->find_unique_doc_id('Q' . $mid);
my $smsg;
if (defined $doc_id) {
# raises on error:
@@ -299,9 +293,9 @@ sub lookup_mail { # no ghosts!
}
sub find_unique_doc_id {
- my ($self, $term, $value) = @_;
+ my ($self, $termval) = @_;
- my ($begin, $end) = $self->find_doc_ids($term, $value);
+ my ($begin, $end) = $self->find_doc_ids($termval);
return undef if $begin->equal($end); # not found
@@ -309,23 +303,16 @@ sub find_unique_doc_id {
# sanity check
$begin->inc;
- $begin->equal($end) or die "Term '$term:$value' is not unique\n";
+ $begin->equal($end) or die "Term '$termval' is not unique\n";
$rv;
}
# returns begin and end PostingIterator
sub find_doc_ids {
- my ($self, $term, $value) = @_;
-
- $self->find_doc_ids_for_term(xpfx($term) . $value);
-}
-
-# returns begin and end PostingIterator
-sub find_doc_ids_for_term {
- my ($self, $term) = @_;
+ my ($self, $termval) = @_;
my $db = $self->{xdb};
- ($db->postlist_begin($term), $db->postlist_end($term));
+ ($db->postlist_begin($termval), $db->postlist_end($termval));
}
# normalize subjects so they are suitable as pathnames for URLs
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fd0d320..316111b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,6 @@ use PublicInbox::MsgIter;
use Carp qw(croak);
use POSIX qw(strftime);
require PublicInbox::Git;
-*xpfx = *PublicInbox::Search::xpfx;
use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
use constant {
@@ -160,12 +159,12 @@ sub add_message {
}
$smsg = PublicInbox::SearchMsg->new($mime);
my $doc = $smsg->{doc};
- $doc->add_term(xpfx('mid') . $mid);
+ $doc->add_term('Q' . $mid);
my $subj = $smsg->subject;
if ($subj ne '') {
my $path = $self->subject_path($subj);
- $doc->add_term(xpfx('path') . id_compress($path));
+ $doc->add_term('XPATH' . id_compress($path));
}
add_values($smsg, $bytes, $num);
@@ -332,7 +331,7 @@ sub link_message {
} else {
$tid = defined $old_tid ? $old_tid : $self->next_thread_id;
}
- $doc->add_term(xpfx('thread') . $tid);
+ $doc->add_term('G' . $tid);
}
sub index_blob {
@@ -542,9 +541,9 @@ sub create_ghost {
my $tid = $self->next_thread_id;
my $doc = Search::Xapian::Document->new;
- $doc->add_term(xpfx('mid') . $mid);
- $doc->add_term(xpfx('thread') . $tid);
- $doc->add_term(xpfx('type') . 'ghost');
+ $doc->add_term('Q' . $mid);
+ $doc->add_term('G' . $tid);
+ $doc->add_term('T' . 'ghost');
my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
$self->{xdb}->add_document($doc);
@@ -555,15 +554,14 @@ sub create_ghost {
sub merge_threads {
my ($self, $winner_tid, $loser_tid) = @_;
return if $winner_tid == $loser_tid;
- my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid);
- my $thread_pfx = xpfx('thread');
+ my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid);
my $db = $self->{xdb};
for (; $head != $tail; $head->inc) {
my $docid = $head->get_docid;
my $doc = $db->get_document($docid);
- $doc->remove_term($thread_pfx . $loser_tid);
- $doc->add_term($thread_pfx . $winner_tid);
+ $doc->remove_term('G' . $loser_tid);
+ $doc->add_term('G' . $winner_tid);
$db->replace_document($docid, $doc);
}
}
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b8eee66..a19d45d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -14,7 +14,7 @@ use PublicInbox::Address;
sub new {
my ($class, $mime) = @_;
my $doc = Search::Xapian::Document->new;
- $doc->add_term(PublicInbox::Search::xpfx('type') . 'mail');
+ $doc->add_term('T' . 'mail');
bless { type => 'mail', doc => $doc, mime => $mime }, $class;
}
diff --git a/t/search.t b/t/search.t
index c9c4e34..a75dc9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -95,15 +95,8 @@ sub filter_mids {
is($found->mid, 'root@s', 'mid set correctly');
ok(int($found->thread_id) > 0, 'thread_id is an integer');
+ my ($res, @res);
my @exp = sort qw(root@s last@s);
- my $res = $ro->query("path:hello_world");
- my @res = filter_mids($res);
- is_deeply(\@res, \@exp, 'got expected results for path: match');
-
- foreach my $p (qw(hello hello_ hello_world2 hello_world_)) {
- $res = $ro->query("path:$p");
- is($res->{total}, 0, "path variant `$p' does not match");
- }
$res = $ro->query('s:(Hello world)');
@res = filter_mids($res);
--
EW
^ permalink raw reply related [relevance 6%]
* [PATCH 0/3] search improvements
@ 2017-06-14 0:14 7% Eric Wong
2017-06-14 0:14 6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
0 siblings, 1 reply; 4+ results
From: Eric Wong @ 2017-06-14 0:14 UTC (permalink / raw)
To: meta
These have been sitting in the stalled "repobrowse" branch for
a bit. I think they can be tracked into "master", first; since
I'm leaning towards splitting repobrowse into a separate project
at the moment.
Eric Wong (3):
search: remove unnecessary abstractions and functionality
searchidx: switch to accounting by message bytes
search: allow searching within mail diffs
lib/PublicInbox/Search.pm | 51 ++++++------
lib/PublicInbox/SearchIdx.pm | 180 +++++++++++++++++++++++++++++++++++++------
lib/PublicInbox/SearchMsg.pm | 2 +-
t/search.t | 9 +--
4 files changed, 183 insertions(+), 59 deletions(-)
^ permalink raw reply [relevance 7%]
Results 1-4 of 4 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2017-06-14 0:14 7% [PATCH 0/3] search improvements Eric Wong
2017-06-14 0:14 6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
2017-06-15 23:11 6% ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
2018-02-22 21:42 [WIP PATCH 0/12] v2: git repo rotation + parallel Xapian indexing Eric Wong (Contractor, The Linux Foundation)
2018-02-22 21:42 6% ` [PATCH 02/12] extmsg: fix broken Xapian MID lookup Eric Wong (Contractor, The Linux Foundation)
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).