From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 854E01FAF6 for ; Tue, 6 Mar 2018 08:42:43 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 19/34] searchidx: use add_boolean_term for internal terms Date: Tue, 6 Mar 2018 08:42:27 +0000 Message-Id: <20180306084242.19988-20-e@80x24.org> In-Reply-To: <20180306084242.19988-1-e@80x24.org> References: <20180306084242.19988-1-e@80x24.org> List-Id: Aside from the Message-Id ('Q'), these terms do not appear in content and thus have no business contributing to the Xapian document length. Thanks-to Olly Betts for the tip on xapian-discuss <20180228004400.GU12724@survex.com> --- lib/PublicInbox/SearchIdx.pm | 15 ++++++++------- lib/PublicInbox/SearchIdxSkeleton.pm | 2 +- lib/PublicInbox/SearchMsg.pm | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1bca3a6..f63e072 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -322,7 +322,7 @@ sub add_message { if ($subj ne '') { $xpath = $self->subject_path($subj); $xpath = id_compress($xpath); - $doc->add_term('XPATH' . $xpath); + $doc->add_boolean_term('XPATH' . $xpath); } my $lines = $mime->body_raw =~ tr!\n!\n!; @@ -385,10 +385,11 @@ sub add_message { $doc->set_data($data); if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { + my $pfx = $alt->{xprefix}; foreach my $mid (@$mids) { my $id = $alt->mid2alt($mid); next unless defined $id; - $doc->add_term($alt->{xprefix} . $id); + $doc->add_boolean_term($pfx . $id); } } } @@ -498,7 +499,7 @@ sub link_doc { } else { $tid = defined $old_tid ? $old_tid : $self->next_thread_id; } - $doc->add_term('G' . $tid); + $doc->add_boolean_term('G' . $tid); $tid; } @@ -779,9 +780,9 @@ sub create_ghost { my $tid = $self->next_thread_id; my $doc = Search::Xapian::Document->new; - $doc->add_term('Q' . $mid); - $doc->add_term('G' . $tid); - $doc->add_term('T' . 'ghost'); + $doc->add_boolean_term('Q' . $mid); + $doc->add_boolean_term('G' . $tid); + $doc->add_boolean_term('T' . 'ghost'); my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); $self->{xdb}->add_document($doc); @@ -805,7 +806,7 @@ sub merge_threads { foreach my $docid (@ids) { my $doc = $db->get_document($docid); $doc->remove_term('G' . $loser_tid); - $doc->add_term('G' . $winner_tid); + $doc->add_boolean_term('G' . $winner_tid); $db->replace_document($docid, $doc); } } diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm index 506e566..3fe6a4a 100644 --- a/lib/PublicInbox/SearchIdxSkeleton.pm +++ b/lib/PublicInbox/SearchIdxSkeleton.pm @@ -98,7 +98,7 @@ sub index_skeleton_real ($$) { my $ts = $values->[PublicInbox::Search::TS]; my $smsg = PublicInbox::SearchMsg->new(undef); my $doc = $smsg->{doc}; - $doc->add_term('XPATH' . $xpath) if defined $xpath; + $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath; foreach my $mid (@$mids) { $doc->add_term('Q' . $mid); } diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a556534..93e6fd8 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -14,7 +14,7 @@ use PublicInbox::Address; sub new { my ($class, $mime) = @_; my $doc = Search::Xapian::Document->new; - $doc->add_term('T' . 'mail'); + $doc->add_boolean_term('T' . 'mail'); bless { type => 'mail', doc => $doc, mime => $mime }, $class; } -- EW