From 8ea217f4452e34776ba294b5090827c99656cada Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 30 Aug 2015 00:38:05 +0000 Subject: search: do not index references and inreplyto terms We no longer need them, as we can rely on index-time thread resolution and thread merging. This allows us to index less data and hopefully increase efficiency. --- lib/PublicInbox/Search.pm | 17 ++--------------- lib/PublicInbox/SearchIdx.pm | 5 ----- 2 files changed, 2 insertions(+), 20 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 4b3830e2..d3faaebb 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -24,7 +24,8 @@ use constant { # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' # 6 - preserve References: order in document data - SCHEMA_VERSION => 6, + # 7 - remove references and inreplyto terms + SCHEMA_VERSION => 7, QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; @@ -37,8 +38,6 @@ my %bool_pfx_internal = ( my %bool_pfx_external = ( path => 'XPATH', thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) - references => 'XREFS', - inreplyto => 'XIRT', ); my %prob_prefix = ( @@ -87,18 +86,6 @@ sub get_subject_path { $self->do_enquire($query, $opts); } -# given a message ID, get followups to a message -sub get_followups { - my ($self, $mid, $opts) = @_; - $mid = mid_clean($mid); - $mid = mid_compress($mid); - my $qp = $self->qp; - my $irt = $qp->parse_query("inreplyto:$mid", 0); - my $ref = $qp->parse_query("references:$mid", 0); - my $query = Search::Xapian::Query->new(OP_OR, $irt, $ref); - $self->do_enquire($query, $opts); -} - sub get_thread { my ($self, $mid, $opts) = @_; my $smsg = eval { $self->lookup_message($mid) }; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 2ac53a73..dec33334 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -238,20 +238,15 @@ sub link_message_to_parents { } } if (@refs) { - $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt; $smsg->{references_sorted} = '<'.join('><', @refs).'>'; - my $ref_pfx = xpfx('references'); - # first ref *should* be the thread root, # but we can never trust clients to do the right thing my $ref = shift @refs; - $doc->add_term($ref_pfx . $ref); $tid = $self->_resolve_mid_to_tid($ref); # the rest of the refs should point to this tid: foreach $ref (@refs) { - $doc->add_term($ref_pfx . $ref); my $ptid = $self->_resolve_mid_to_tid($ref); if ($tid ne $ptid) { $self->merge_threads($tid, $ptid); -- cgit v1.2.3-24-ge0c7