user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] search: do not index references and inreplyto terms
@ 2015-08-30  1:10 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2015-08-30  1:10 UTC (permalink / raw)
  To: meta

We no longer need them, as we can rely on index-time thread
resolution and thread merging.  This allows us to index less
data and hopefully increase efficiency.
---
 lib/PublicInbox/Search.pm    | 17 ++---------------
 lib/PublicInbox/SearchIdx.pm |  5 -----
 t/search.t                   | 21 +++------------------
 3 files changed, 5 insertions(+), 38 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 4b3830e..d3faaeb 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -24,7 +24,8 @@ use constant {
 	# 4 - change "Re: " normalization, avoid circular Reference ghosts
 	# 5 - subject_path drops trailing '.'
 	# 6 - preserve References: order in document data
-	SCHEMA_VERSION => 6,
+	# 7 - remove references and inreplyto terms
+	SCHEMA_VERSION => 7,
 	QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
 };
 
@@ -37,8 +38,6 @@ my %bool_pfx_internal = (
 my %bool_pfx_external = (
 	path => 'XPATH',
 	thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
-	references => 'XREFS',
-	inreplyto => 'XIRT',
 );
 
 my %prob_prefix = (
@@ -87,18 +86,6 @@ sub get_subject_path {
 	$self->do_enquire($query, $opts);
 }
 
-# given a message ID, get followups to a message
-sub get_followups {
-	my ($self, $mid, $opts) = @_;
-	$mid = mid_clean($mid);
-	$mid = mid_compress($mid);
-	my $qp = $self->qp;
-	my $irt = $qp->parse_query("inreplyto:$mid", 0);
-	my $ref = $qp->parse_query("references:$mid", 0);
-	my $query = Search::Xapian::Query->new(OP_OR, $irt, $ref);
-	$self->do_enquire($query, $opts);
-}
-
 sub get_thread {
 	my ($self, $mid, $opts) = @_;
 	my $smsg = eval { $self->lookup_message($mid) };
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 2ac53a7..dec3333 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -238,20 +238,15 @@ sub link_message_to_parents {
 		}
 	}
 	if (@refs) {
-		$doc->add_term(xpfx('inreplyto') . $irt) if defined $irt;
 		$smsg->{references_sorted} = '<'.join('><', @refs).'>';
 
-		my $ref_pfx = xpfx('references');
-
 		# first ref *should* be the thread root,
 		# but we can never trust clients to do the right thing
 		my $ref = shift @refs;
-		$doc->add_term($ref_pfx . $ref);
 		$tid = $self->_resolve_mid_to_tid($ref);
 
 		# the rest of the refs should point to this tid:
 		foreach $ref (@refs) {
-			$doc->add_term($ref_pfx . $ref);
 			my $ptid = $self->_resolve_mid_to_tid($ref);
 			if ($tid ne $ptid) {
 				$self->merge_threads($tid, $ptid);
diff --git a/t/search.t b/t/search.t
index 65539f1..02189ac 100644
--- a/t/search.t
+++ b/t/search.t
@@ -135,15 +135,6 @@ sub filter_mids {
 	my $second = $res->{msgs}->[0];
 
 	isnt($first, $second, "offset returned different result from limit");
-
-	foreach my $f (qw(inreplyto references)) {
-		$res = $ro->query($f . ':root@s');
-		@res = filter_mids($res);
-		is_deeply(\@res, [ 'last@s' ],
-			  "got expected results for $f: match");
-		$res = $ro->query($f . ':root');
-		is($res->{total}, 0, "no partial mid match");
-	}
 }
 
 # ghost vivication
@@ -219,14 +210,8 @@ sub filter_mids {
 
 	$rw_commit->();
 	$ro->reopen;
-	my $res = $ro->query('references:root@s');
-	my @res = filter_mids($res);
-	is_deeply(\@res, [ sort('last@s', $long_midc) ],
-		  "got expected results for references: match");
-
-	my $followups = $ro->get_followups('root@s');
-	$followups = [ filter_mids($followups) ];
-	is_deeply($followups, [ filter_mids($res) ], "get_followups matches");
+	my $res;
+	my @res;
 
 	my $long_reply_mid = 'reply-to-long@1';
 	my $long_reply = Email::MIME->create(
@@ -301,7 +286,7 @@ sub filter_mids {
 	ok($doc_id > 0, "doc_id defined with circular reference");
 	my $smsg = $rw->lookup_message('circle@a');
 	$smsg->ensure_metadata;
-	is($smsg->{references}, undef, "no references created");
+	is($smsg->references_sorted, '', "no references created");
 }
 
 done_testing();
-- 
EW


^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2015-08-30  1:10 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-08-30  1:10 [PATCH] search: do not index references and inreplyto terms Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).