user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/3] search: remove unnecessary abstractions and functionality
Date: Wed, 14 Jun 2017 00:14:46 +0000	[thread overview]
Message-ID: <20170614001448.27098-2-e@80x24.org> (raw)
In-Reply-To: <20170614001448.27098-1-e@80x24.org>

This simplifies the code a bit and reduces the translation
overhead for looking directly at data from tools shipped
with Xapian.

While we're at it, fix thread-all.t :)
---
 lib/PublicInbox/Search.pm    | 31 +++++++++----------------------
 lib/PublicInbox/SearchIdx.pm | 20 +++++++++-----------
 lib/PublicInbox/SearchMsg.pm |  2 +-
 t/search.t                   |  9 +--------
 4 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 82a6e54..67837f4 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -56,8 +56,6 @@ my %bool_pfx_internal = (
 );
 
 my %bool_pfx_external = (
-	# do we still need these? probably not..
-	path => 'XPATH',
 	mid => 'Q', # uniQue id (Message-ID)
 );
 
@@ -107,11 +105,7 @@ chomp @HELP;
 # da (diff a/ removed lines)
 # db (diff b/ added lines)
 
-my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
-
-sub xpfx { $all_pfx{$_[0]} }
-
-my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
+my $mail_query = Search::Xapian::Query->new('T' . 'mail');
 
 sub xdir {
 	my (undef, $git_dir) = @_;
@@ -146,11 +140,11 @@ sub get_thread {
 	my $smsg = eval { $self->lookup_message($mid) };
 
 	return { total => 0, msgs => [] } unless $smsg;
-	my $qtid = Search::Xapian::Query->new(xpfx('thread').$smsg->thread_id);
+	my $qtid = Search::Xapian::Query->new('G' . $smsg->thread_id);
 	my $path = $smsg->path;
 	if (defined $path && $path ne '') {
 		my $path = id_compress($smsg->path);
-		my $qsub = Search::Xapian::Query->new(xpfx('path').$path);
+		my $qsub = Search::Xapian::Query->new('XPATH' . $path);
 		$qtid = Search::Xapian::Query->new(OP_OR, $qtid, $qsub);
 	}
 	$opts ||= {};
@@ -279,7 +273,7 @@ sub lookup_message {
 	my ($self, $mid) = @_;
 	$mid = mid_clean($mid);
 
-	my $doc_id = $self->find_unique_doc_id('mid', $mid);
+	my $doc_id = $self->find_unique_doc_id('Q' . $mid);
 	my $smsg;
 	if (defined $doc_id) {
 		# raises on error:
@@ -299,9 +293,9 @@ sub lookup_mail { # no ghosts!
 }
 
 sub find_unique_doc_id {
-	my ($self, $term, $value) = @_;
+	my ($self, $termval) = @_;
 
-	my ($begin, $end) = $self->find_doc_ids($term, $value);
+	my ($begin, $end) = $self->find_doc_ids($termval);
 
 	return undef if $begin->equal($end); # not found
 
@@ -309,23 +303,16 @@ sub find_unique_doc_id {
 
 	# sanity check
 	$begin->inc;
-	$begin->equal($end) or die "Term '$term:$value' is not unique\n";
+	$begin->equal($end) or die "Term '$termval' is not unique\n";
 	$rv;
 }
 
 # returns begin and end PostingIterator
 sub find_doc_ids {
-	my ($self, $term, $value) = @_;
-
-	$self->find_doc_ids_for_term(xpfx($term) . $value);
-}
-
-# returns begin and end PostingIterator
-sub find_doc_ids_for_term {
-	my ($self, $term) = @_;
+	my ($self, $termval) = @_;
 	my $db = $self->{xdb};
 
-	($db->postlist_begin($term), $db->postlist_end($term));
+	($db->postlist_begin($termval), $db->postlist_end($termval));
 }
 
 # normalize subjects so they are suitable as pathnames for URLs
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index fd0d320..316111b 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -19,7 +19,6 @@ use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
 require PublicInbox::Git;
-*xpfx = *PublicInbox::Search::xpfx;
 
 use constant MAX_MID_SIZE => 244; # max term size - 1 in Xapian
 use constant {
@@ -160,12 +159,12 @@ sub add_message {
 		}
 		$smsg = PublicInbox::SearchMsg->new($mime);
 		my $doc = $smsg->{doc};
-		$doc->add_term(xpfx('mid') . $mid);
+		$doc->add_term('Q' . $mid);
 
 		my $subj = $smsg->subject;
 		if ($subj ne '') {
 			my $path = $self->subject_path($subj);
-			$doc->add_term(xpfx('path') . id_compress($path));
+			$doc->add_term('XPATH' . id_compress($path));
 		}
 
 		add_values($smsg, $bytes, $num);
@@ -332,7 +331,7 @@ sub link_message {
 	} else {
 		$tid = defined $old_tid ? $old_tid : $self->next_thread_id;
 	}
-	$doc->add_term(xpfx('thread') . $tid);
+	$doc->add_term('G' . $tid);
 }
 
 sub index_blob {
@@ -542,9 +541,9 @@ sub create_ghost {
 
 	my $tid = $self->next_thread_id;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(xpfx('mid') . $mid);
-	$doc->add_term(xpfx('thread') . $tid);
-	$doc->add_term(xpfx('type') . 'ghost');
+	$doc->add_term('Q' . $mid);
+	$doc->add_term('G' . $tid);
+	$doc->add_term('T' . 'ghost');
 
 	my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid);
 	$self->{xdb}->add_document($doc);
@@ -555,15 +554,14 @@ sub create_ghost {
 sub merge_threads {
 	my ($self, $winner_tid, $loser_tid) = @_;
 	return if $winner_tid == $loser_tid;
-	my ($head, $tail) = $self->find_doc_ids('thread', $loser_tid);
-	my $thread_pfx = xpfx('thread');
+	my ($head, $tail) = $self->find_doc_ids('G' . $loser_tid);
 	my $db = $self->{xdb};
 
 	for (; $head != $tail; $head->inc) {
 		my $docid = $head->get_docid;
 		my $doc = $db->get_document($docid);
-		$doc->remove_term($thread_pfx . $loser_tid);
-		$doc->add_term($thread_pfx . $winner_tid);
+		$doc->remove_term('G' . $loser_tid);
+		$doc->add_term('G' . $winner_tid);
 		$db->replace_document($docid, $doc);
 	}
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index b8eee66..a19d45d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -14,7 +14,7 @@ use PublicInbox::Address;
 sub new {
 	my ($class, $mime) = @_;
 	my $doc = Search::Xapian::Document->new;
-	$doc->add_term(PublicInbox::Search::xpfx('type') . 'mail');
+	$doc->add_term('T' . 'mail');
 
 	bless { type => 'mail', doc => $doc, mime => $mime }, $class;
 }
diff --git a/t/search.t b/t/search.t
index c9c4e34..a75dc9b 100644
--- a/t/search.t
+++ b/t/search.t
@@ -95,15 +95,8 @@ sub filter_mids {
 	is($found->mid, 'root@s', 'mid set correctly');
 	ok(int($found->thread_id) > 0, 'thread_id is an integer');
 
+	my ($res, @res);
 	my @exp = sort qw(root@s last@s);
-	my $res = $ro->query("path:hello_world");
-	my @res = filter_mids($res);
-	is_deeply(\@res, \@exp, 'got expected results for path: match');
-
-	foreach my $p (qw(hello hello_ hello_world2 hello_world_)) {
-		$res = $ro->query("path:$p");
-		is($res->{total}, 0, "path variant `$p' does not match");
-	}
 
 	$res = $ro->query('s:(Hello world)');
 	@res = filter_mids($res);
-- 
EW


  reply	other threads:[~2017-06-14  0:14 UTC|newest]

Thread overview: 5+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-06-14  0:14 [PATCH 0/3] search improvements Eric Wong
2017-06-14  0:14 ` Eric Wong [this message]
2017-06-15 23:11   ` [PATCH 4/3] searchidx: remove messages correctly from Xapian index Eric Wong
2017-06-14  0:14 ` [PATCH 2/3] searchidx: switch to accounting by message bytes Eric Wong
2017-06-14  0:14 ` [PATCH 3/3] search: allow searching within mail diffs Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170614001448.27098-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).