From 43fd7e7bda1b8eeb32cf43f6fd89568a938aedf5 Mon Sep 17 00:00:00 2001 From: "Eric Wong (Contractor, The Linux Foundation)" Date: Sat, 3 Mar 2018 17:26:16 +0000 Subject: searchidx: use add_boolean_term for internal terms Aside from the Message-Id ('Q'), these terms do not appear in content and thus have no business contributing to the Xapian document length. Thanks-to Olly Betts for the tip on xapian-discuss <20180228004400.GU12724@survex.com> --- lib/PublicInbox/SearchIdx.pm | 15 ++++++++------- lib/PublicInbox/SearchIdxSkeleton.pm | 2 +- lib/PublicInbox/SearchMsg.pm | 2 +- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 1bca3a64..f63e0720 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -322,7 +322,7 @@ sub add_message { if ($subj ne '') { $xpath = $self->subject_path($subj); $xpath = id_compress($xpath); - $doc->add_term('XPATH' . $xpath); + $doc->add_boolean_term('XPATH' . $xpath); } my $lines = $mime->body_raw =~ tr!\n!\n!; @@ -385,10 +385,11 @@ sub add_message { $doc->set_data($data); if (my $altid = $self->{-altid}) { foreach my $alt (@$altid) { + my $pfx = $alt->{xprefix}; foreach my $mid (@$mids) { my $id = $alt->mid2alt($mid); next unless defined $id; - $doc->add_term($alt->{xprefix} . $id); + $doc->add_boolean_term($pfx . $id); } } } @@ -498,7 +499,7 @@ sub link_doc { } else { $tid = defined $old_tid ? $old_tid : $self->next_thread_id; } - $doc->add_term('G' . $tid); + $doc->add_boolean_term('G' . $tid); $tid; } @@ -779,9 +780,9 @@ sub create_ghost { my $tid = $self->next_thread_id; my $doc = Search::Xapian::Document->new; - $doc->add_term('Q' . $mid); - $doc->add_term('G' . $tid); - $doc->add_term('T' . 'ghost'); + $doc->add_boolean_term('Q' . $mid); + $doc->add_boolean_term('G' . $tid); + $doc->add_boolean_term('T' . 'ghost'); my $smsg = PublicInbox::SearchMsg->wrap($doc, $mid); $self->{xdb}->add_document($doc); @@ -805,7 +806,7 @@ sub merge_threads { foreach my $docid (@ids) { my $doc = $db->get_document($docid); $doc->remove_term('G' . $loser_tid); - $doc->add_term('G' . $winner_tid); + $doc->add_boolean_term('G' . $winner_tid); $db->replace_document($docid, $doc); } } diff --git a/lib/PublicInbox/SearchIdxSkeleton.pm b/lib/PublicInbox/SearchIdxSkeleton.pm index 506e566f..3fe6a4ad 100644 --- a/lib/PublicInbox/SearchIdxSkeleton.pm +++ b/lib/PublicInbox/SearchIdxSkeleton.pm @@ -98,7 +98,7 @@ sub index_skeleton_real ($$) { my $ts = $values->[PublicInbox::Search::TS]; my $smsg = PublicInbox::SearchMsg->new(undef); my $doc = $smsg->{doc}; - $doc->add_term('XPATH' . $xpath) if defined $xpath; + $doc->add_boolean_term('XPATH' . $xpath) if defined $xpath; foreach my $mid (@$mids) { $doc->add_term('Q' . $mid); } diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index a5565345..93e6fd8b 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -14,7 +14,7 @@ use PublicInbox::Address; sub new { my ($class, $mime) = @_; my $doc = Search::Xapian::Document->new; - $doc->add_term('T' . 'mail'); + $doc->add_boolean_term('T' . 'mail'); bless { type => 'mail', doc => $doc, mime => $mime }, $class; } -- cgit v1.2.3-24-ge0c7