user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 1/3] search: remove unnecessary abstractions and functionality
  2017-06-14  0:14  7% [PATCH 0/3] search improvements Eric Wong
@ 2017-06-14  0:14  6% ` Eric Wong
  2017-06-15 23:11  6%   ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
  0 siblings, 1 reply; 4+ results
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

This simplifies the code a bit and reduces the translation
overhead for looking directly at data from tools shipped
with Xapian.

While we're at it, fix thread-all.t :)
---
 lib/PublicInbox/Search.pm    | 31 +++++++++----------------------
 lib/PublicInbox/SearchIdx.pm | 20 +++++++++-----------
 lib/PublicInbox/SearchMsg.pm |  2 +-
 t/search.t                   |  9 +--------
 4 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 82a6e54..67837f4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -56,8 +56,6 @@ my %bool_pfx_internal = (
 );
 
 my %bool_pfx_external = (
-	# do we still need these? probably not..
-	path => 'XPATH',
 	mid => 'Q', # uniQue id (Message-ID)
 );
 
@@ -107,11 +105,7 @@ chomp @HELP;
 # da (diff a/ removed lines)
 # db (diff b/ added lines)
 
-my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
-
-sub xpfx { $all_pfx{$_[0]} }
-
-my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
+my $mail_query = Search::Xapian::Query->new('T' . 'mail');
 
 sub xdir {
 	my (undef, $git_dir) = @_;
@@ -146,11 +140,11 @@ sub get_thread {
 	my $smsg = eval { $self->lookup_message($mid) };
 
 	return { total => 0, msgs => [] } unless $smsg;
-	my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id);
+	my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
 	my $path = $smsg->path;
 	if (defined $path && $path ne '') {
 		my $path = id_compress($smsg->path);
-		my $qsub = Search::Xapian::Query->new(xpfx('path').$path);
+		my $qsub = Search::Xapian::Query->new('XPATH' . $path);
 		$qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
 	}
 	$opts ||= {};
@@ -279,7 +273,7 @@ sub lookup_message {
 	my ($self, $mid) = @_;
 	$mid = mid_clean($mid);
 
-	my $doc_id = $self->find_unique_doc_id('mid', $mid);
+	my $doc_id = $self->find_unique_doc_id('Q' . $mid);
 	my $smsg;
 	if (defined $doc_id) {
 		# raises on error:
@@ -299,9 +293,9 @@ sub lookup_mail { # no ghosts!
 }
 
 sub find_unique_doc_id {
-	my ($self, $term, $value) = @_;
+	my ($self, $termval) = @_;
 
-	my ($begin, $end) = $self->find_doc_ids($term, $value);
+	my ($begin, $end) = $self->find_doc_ids($termval);
 
 	return undef if $begin->equal($end); # not found
 
@@ -309,23 +303,16 @@ sub find_unique_doc_id {
 
 	# sanity check
 	$begin->inc;
-	$begin->equal($end) or die "Term '$term:$value' is not unique\n";
+	$begin->equal($end) or die "Term '$termval' is not unique\n";
 	$rv;
 }
 
 # returns begin and end PostingIterator
 sub find_doc_ids {
-	my ($self, $term, $value) = @_;
-
-	$self->find_doc_ids_for_term(xpfx($term) . $value);
-}
-
-# returns begin and end PostingIterator
-sub find_doc_ids_for_term {
-	my ($self, $term) = @_;
+	my ($self, $termval) = @_;
 	my $db = $self->{xdb};
 
-	($db->postlist_begin($term), $db->postlist_end($term));
+	($db->postlist_begin($termval), $db->postlist_end($termval));
 }
 
 # normalize subjects so they are suitable as pathnames for URLs
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fd0d320..316111b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,6 @@ use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
-*xpfx = *PublicInbox::Search::xpfx;
 
 use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
 use constant {
@@ -160,12 +159,12 @@ sub add_message {
 		}
 		$smsg = PublicInbox::SearchMsg->new($mime);
 		my $doc = $smsg->{doc};
-		$doc->add_term(xpfx('mid') . $mid);
+		$doc->add_term('Q' . $mid);
 
 		my $subj = $smsg->subject;
 		if ($subj ne '') {
 			my $path = $self->subject_path($subj);
-			$doc->add_term(xpfx('path') . id_compress($path));
+			$doc->add_term('XPATH' . id_compress($path));
 		}
 
 		add_values($smsg, $bytes, $num);
@@ -332,7 +331,7 @@ sub link_message {
 	} else {
 		$tid = defined $old_tid ? $old_tid : $self->next_thread_id;
 	}
-	$doc->add_term(xpfx('thread') . $tid);
+	$doc->add_term('G' . $tid);
 }
 
 sub index_blob {
@@ -542,9 +541,9 @@ sub create_ghost {
 
 	my $tid = $self->next_thread_id;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(xpfx('mid') . $mid);
-	$doc->add_term(xpfx('thread') . $tid);
-	$doc->add_term(xpfx('type') . 'ghost');
+	$doc->add_term('Q' . $mid);
+	$doc->add_term('G' . $tid);
+	$doc->add_term('T' . 'ghost');
 
 	my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
 	$self->{xdb}->add_document($doc);
@@ -555,15 +554,14 @@ sub create_ghost {
 sub merge_threads {
 	my ($self, $winner_tid, $loser_tid) = @_;
 	return if $winner_tid == $loser_tid;
-	my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid);
-	my $thread_pfx = xpfx('thread');
+	my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid);
 	my $db = $self->{xdb};
 
 	for (; $head != $tail; $head->inc) {
 		my $docid = $head->get_docid;
 		my $doc = $db->get_document($docid);
-		$doc->remove_term($thread_pfx . $loser_tid);
-		$doc->add_term($thread_pfx . $winner_tid);
+		$doc->remove_term('G' . $loser_tid);
+		$doc->add_term('G' . $winner_tid);
 		$db->replace_document($docid, $doc);
 	}
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b8eee66..a19d45d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -14,7 +14,7 @@ use PublicInbox::Address;
 sub new {
 	my ($class, $mime) = @_;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(PublicInbox::Search::xpfx('type') . 'mail');
+	$doc->add_term('T' . 'mail');
 
 	bless { type => 'mail', doc => $doc, mime => $mime }, $class;
 }
diff --git a/t/search.t b/t/search.t
index c9c4e34..a75dc9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -95,15 +95,8 @@ sub filter_mids {
 	is($found->mid, 'root@s', 'mid set correctly');
 	ok(int($found->thread_id) > 0, 'thread_id is an integer');
 
+	my ($res, @res);
 	my @exp = sort qw(root@s last@s);
-	my $res = $ro->query("path:hello_world");
-	my @res = filter_mids($res);
-	is_deeply(\@res, \@exp, 'got expected results for path: match');
-
-	foreach my $p (qw(hello hello_ hello_world2 hello_world_)) {
-		$res = $ro->query("path:$p");
-		is($res->{total}, 0, "path variant `$p' does not match");
-	}
 
 	$res = $ro->query('s:(Hello world)');
 	@res = filter_mids($res);
-- 
EW


^ permalink raw reply related	[relevance 6%]

* [PATCH 4/3] searchidx: remove messages correctly from Xapian index
  2017-06-14  0:14  6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
@ 2017-06-15 23:11  6%   ` Eric Wong
  0 siblings, 0 replies; 4+ results
From: Eric Wong @ 2017-06-15 23:11 UTC (permalink / raw)
  To: meta

This fixes a bug introduced in
commit 7eeadcb62729b0efbcb53cd9b7b181897c92cf9a
("search: remove unnecessary abstractions and functionality")
---
 lib/PublicInbox/SearchIdx.pm | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 9ba9437..69b7a6f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -366,8 +366,12 @@ sub remove_message {
 	$mid = mid_clean($mid);
 
 	eval {
-		$doc_id = $self->find_unique_doc_id('mid', $mid);
-		$db->delete_document($doc_id) if defined $doc_id;
+		$doc_id = $self->find_unique_doc_id('Q' . $mid);
+		if (defined $doc_id) {
+			$db->delete_document($doc_id);
+		} else {
+			warn "cannot remove non-existent <$mid>\n";
+		}
 	};
 
 	if ($@) {
-- 
EW

^ permalink raw reply related	[relevance 6%]

* [PATCH 02/12] extmsg: fix broken Xapian MID lookup
  @ 2018-02-22 21:42  6% ` Eric Wong (Contractor, The Linux Foundation)
  0 siblings, 0 replies; 4+ results
From: Eric Wong (Contractor, The Linux Foundation) @ 2018-02-22 21:42 UTC (permalink / raw)
  To: meta

This likely has no real world implications, though, as we
fall back to Msgmap lookups anyways.

Broken since commit 7eeadcb62729b0efbcb53cd9b7b181897c92cf9a
("search: remove unnecessary abstractions and functionality")
---
 lib/PublicInbox/ExtMsg.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index ab9591f..4e31ef0 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -46,7 +46,7 @@ sub ext_msg {
 		}
 
 		# try to find the URL with Xapian to avoid forking
-		my $doc_id = eval { $s->find_unique_doc_id('mid', $mid) };
+		my $doc_id = eval { $s->find_unique_doc_id('XMID' . $mid) };
 		if ($@) {
 			# xapian not configured properly for this repo
 			push @nox, $other;
-- 
EW


^ permalink raw reply related	[relevance 6%]

* [PATCH 0/3] search improvements
@ 2017-06-14  0:14  7% Eric Wong
  2017-06-14  0:14  6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
  0 siblings, 1 reply; 4+ results
From: Eric Wong @ 2017-06-14  0:14 UTC (permalink / raw)
  To: meta

These have been sitting in the stalled "repobrowse" branch for
a bit.  I think they can be tracked into "master", first; since
I'm leaning towards splitting repobrowse into a separate project
at the moment.

Eric Wong (3):
      search: remove unnecessary abstractions and functionality
      searchidx: switch to accounting by message bytes
      search: allow searching within mail diffs

 lib/PublicInbox/Search.pm    |  51 ++++++------
 lib/PublicInbox/SearchIdx.pm | 180 +++++++++++++++++++++++++++++++++++++------
 lib/PublicInbox/SearchMsg.pm |   2 +-
 t/search.t                   |   9 +--
 4 files changed, 183 insertions(+), 59 deletions(-)


^ permalink raw reply	[relevance 7%]

Results 1-4 of 4 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2017-06-14  0:14  7% [PATCH 0/3] search improvements Eric Wong
2017-06-14  0:14  6% ` [PATCH 1/3] search: remove unnecessary abstractions and functionality Eric Wong
2017-06-15 23:11  6%   ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
2018-02-22 21:42     [WIP PATCH 0/12] v2: git repo rotation + parallel Xapian indexing Eric Wong (Contractor, The Linux Foundation)
2018-02-22 21:42  6% ` [PATCH 02/12] extmsg: fix broken Xapian MID lookup Eric Wong (Contractor, The Linux Foundation)

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).