From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.5 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, RP_MATCHES_RCVD shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id A5F801FD1F for ; Sun, 30 Aug 2015 01:10:39 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH] search: do not index references and inreplyto terms Date: Sun, 30 Aug 2015 01:10:39 +0000 Message-Id: <1440897039-28935-1-git-send-email-e@80x24.org> List-Id: We no longer need them, as we can rely on index-time thread resolution and thread merging. This allows us to index less data and hopefully increase efficiency. --- lib/PublicInbox/Search.pm | 17 ++--------------- lib/PublicInbox/SearchIdx.pm | 5 ----- t/search.t | 21 +++------------------ 3 files changed, 5 insertions(+), 38 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 4b3830e..d3faaeb 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -24,7 +24,8 @@ use constant { # 4 - change "Re: " normalization, avoid circular Reference ghosts # 5 - subject_path drops trailing '.' # 6 - preserve References: order in document data - SCHEMA_VERSION => 6, + # 7 - remove references and inreplyto terms + SCHEMA_VERSION => 7, QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD, }; @@ -37,8 +38,6 @@ my %bool_pfx_internal = ( my %bool_pfx_external = ( path => 'XPATH', thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) - references => 'XREFS', - inreplyto => 'XIRT', ); my %prob_prefix = ( @@ -87,18 +86,6 @@ sub get_subject_path { $self->do_enquire($query, $opts); } -# given a message ID, get followups to a message -sub get_followups { - my ($self, $mid, $opts) = @_; - $mid = mid_clean($mid); - $mid = mid_compress($mid); - my $qp = $self->qp; - my $irt = $qp->parse_query("inreplyto:$mid", 0); - my $ref = $qp->parse_query("references:$mid", 0); - my $query = Search::Xapian::Query->new(OP_OR, $irt, $ref); - $self->do_enquire($query, $opts); -} - sub get_thread { my ($self, $mid, $opts) = @_; my $smsg = eval { $self->lookup_message($mid) }; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 2ac53a7..dec3333 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -238,20 +238,15 @@ sub link_message_to_parents { } } if (@refs) { - $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt; $smsg->{references_sorted} = '<'.join('><', @refs).'>'; - my $ref_pfx = xpfx('references'); - # first ref *should* be the thread root, # but we can never trust clients to do the right thing my $ref = shift @refs; - $doc->add_term($ref_pfx . $ref); $tid = $self->_resolve_mid_to_tid($ref); # the rest of the refs should point to this tid: foreach $ref (@refs) { - $doc->add_term($ref_pfx . $ref); my $ptid = $self->_resolve_mid_to_tid($ref); if ($tid ne $ptid) { $self->merge_threads($tid, $ptid); diff --git a/t/search.t b/t/search.t index 65539f1..02189ac 100644 --- a/t/search.t +++ b/t/search.t @@ -135,15 +135,6 @@ sub filter_mids { my $second = $res->{msgs}->[0]; isnt($first, $second, "offset returned different result from limit"); - - foreach my $f (qw(inreplyto references)) { - $res = $ro->query($f . ':root@s'); - @res = filter_mids($res); - is_deeply(\@res, [ 'last@s' ], - "got expected results for $f: match"); - $res = $ro->query($f . ':root'); - is($res->{total}, 0, "no partial mid match"); - } } # ghost vivication @@ -219,14 +210,8 @@ sub filter_mids { $rw_commit->(); $ro->reopen; - my $res = $ro->query('references:root@s'); - my @res = filter_mids($res); - is_deeply(\@res, [ sort('last@s', $long_midc) ], - "got expected results for references: match"); - - my $followups = $ro->get_followups('root@s'); - $followups = [ filter_mids($followups) ]; - is_deeply($followups, [ filter_mids($res) ], "get_followups matches"); + my $res; + my @res; my $long_reply_mid = 'reply-to-long@1'; my $long_reply = Email::MIME->create( @@ -301,7 +286,7 @@ sub filter_mids { ok($doc_id > 0, "doc_id defined with circular reference"); my $smsg = $rw->lookup_message('circle@a'); $smsg->ensure_metadata; - is($smsg->{references}, undef, "no references created"); + is($smsg->references_sorted, '', "no references created"); } done_testing(); -- EW