From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 8B31920712 for ; Fri, 9 Sep 2016 00:01:34 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 04/10] search: fix space regressions from recent changes Date: Fri, 9 Sep 2016 00:01:25 +0000 Message-Id: <20160909000131.18584-5-e@80x24.org> In-Reply-To: <20160909000131.18584-1-e@80x24.org> References: <20160909000131.18584-1-e@80x24.org> List-Id: As of Xapian 1.0.4 (from 2007) is possible to use Search::Xapian::QueryParser::add_prefix multiple times with the same user field name but different term prefixes. This brings my current git@vger mirror from 6.5GB to 2.1GB (both sizes are after xapian-compact). --- lib/PublicInbox/Search.pm | 15 +++++++++------ lib/PublicInbox/SearchIdx.pm | 25 ++++--------------------- 2 files changed, 13 insertions(+), 27 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index f74129d..c8e297f 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -60,20 +60,23 @@ my %bool_pfx_external = ( my %prob_prefix = ( # for mairix compatibility s => 'S', - m => 'Q', # 'mid' is exact, 'm' can do partial + m => 'XMID', # 'mid:' (bool) is exact, 'm:' (prob) can do partial f => 'A', t => 'XTO', - tc => 'XTC', + tc => 'XTO XCC', c => 'XCC', - tcf => 'XTCF', - b => 'XBODY', - bs => 'XBS', + tcf => 'XTO XCC A', + b => 'XNQ XQUOT', + bs => 'XNQ XQUOT S', # n.b.: leaving out "a:" alias for "tcf:" even though # mairix supports it. It is only mentioned in passing in mairix(1) # and the extra two letters are not significantly longer. q => 'XQUOT', nq => 'XNQ', + + # default: + '' => 'XMID S A XNQ XQUOT', ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian @@ -241,7 +244,7 @@ EOF } while (my ($name, $prefix) = each %prob_prefix) { - $qp->add_prefix($name, $prefix); + $qp->add_prefix($name, $_) foreach split(/ /, $prefix); } $self->{query_parser} = $qp; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index cd27a29..ae89060 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -129,15 +129,9 @@ sub index_users ($$) { $tg->index_text($from, 1, 'A'); # A - author $tg->increase_termpos; - $tg->index_text($to, 1, 'XTO') if $to ne ''; + $tg->increase_termpos; $tg->index_text($cc, 1, 'XCC') if $cc ne ''; - my $tc = join("\t", $to, $cc); - $tg->index_text($tc, 1, 'XTC') if $tc ne ''; - my $tcf = join("\t", $tc, $from); - $tg->index_text($tcf, 1, 'XTCF') if $tcf ne ''; - - $tg->index_text($from); $tg->increase_termpos; } @@ -173,12 +167,7 @@ sub add_message { my $tg = $self->term_generator; $tg->set_document($doc); - if ($subj) { - $tg->index_text($subj, 1, 'S'); - $tg->index_text($subj, 1, 'XBS'); - } - $tg->increase_termpos; - $tg->index_text($subj) if $subj; + $tg->index_text($subj, 1, 'S') if $subj; $tg->increase_termpos; index_users($tg, $smsg); @@ -204,25 +193,19 @@ sub add_message { if (@quot) { my $s = join("\n", @quot); @quot = (); - $tg->index_text($s, 1, 'XQUOT'); - $tg->index_text($s, 0, 'XBS'); - $tg->index_text($s, 0, 'XBODY'); - $tg->index_text($s, 0); + $tg->index_text($s, 0, 'XQUOT'); $tg->increase_termpos; } if (@orig) { my $s = join("\n", @orig); @orig = (); $tg->index_text($s, 1, 'XNQ'); - $tg->index_text($s, 1, 'XBS'); - $tg->index_text($s, 1, 'XBODY'); - $tg->index_text($s); $tg->increase_termpos; } }); link_message($self, $smsg, $old_tid); - $tg->index_text($mid, 1); + $tg->index_text($mid, 1, 'XMID'); $doc->set_data($smsg->to_doc_data($blob)); if (my $altid = $self->{-altid}) { -- EW