From 21ab8f3cc530d9483091f32c0865ba1ce867cef8 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Sep 2016 00:01:25 +0000 Subject: search: fix space regressions from recent changes As of Xapian 1.0.4 (from 2007) is possible to use Search::Xapian::QueryParser::add_prefix multiple times with the same user field name but different term prefixes. This brings my current git@vger mirror from 6.5GB to 2.1GB (both sizes are after xapian-compact). --- lib/PublicInbox/Search.pm | 15 +++++++++------ lib/PublicInbox/SearchIdx.pm | 25 ++++--------------------- 2 files changed, 13 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index f74129d5..c8e297f4 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -60,20 +60,23 @@ my %bool_pfx_external = ( my %prob_prefix = ( # for mairix compatibility s => 'S', - m => 'Q', # 'mid' is exact, 'm' can do partial + m => 'XMID', # 'mid:' (bool) is exact, 'm:' (prob) can do partial f => 'A', t => 'XTO', - tc => 'XTC', + tc => 'XTO XCC', c => 'XCC', - tcf => 'XTCF', - b => 'XBODY', - bs => 'XBS', + tcf => 'XTO XCC A', + b => 'XNQ XQUOT', + bs => 'XNQ XQUOT S', # n.b.: leaving out "a:" alias for "tcf:" even though # mairix supports it. It is only mentioned in passing in mairix(1) # and the extra two letters are not significantly longer. q => 'XQUOT', nq => 'XNQ', + + # default: + '' => 'XMID S A XNQ XQUOT', ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian @@ -241,7 +244,7 @@ EOF } while (my ($name, $prefix) = each %prob_prefix) { - $qp->add_prefix($name, $prefix); + $qp->add_prefix($name, $_) foreach split(/ /, $prefix); } $self->{query_parser} = $qp; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index cd27a294..ae890605 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -129,15 +129,9 @@ sub index_users ($$) { $tg->index_text($from, 1, 'A'); # A - author $tg->increase_termpos; - $tg->index_text($to, 1, 'XTO') if $to ne ''; + $tg->increase_termpos; $tg->index_text($cc, 1, 'XCC') if $cc ne ''; - my $tc = join("\t", $to, $cc); - $tg->index_text($tc, 1, 'XTC') if $tc ne ''; - my $tcf = join("\t", $tc, $from); - $tg->index_text($tcf, 1, 'XTCF') if $tcf ne ''; - - $tg->index_text($from); $tg->increase_termpos; } @@ -173,12 +167,7 @@ sub add_message { my $tg = $self->term_generator; $tg->set_document($doc); - if ($subj) { - $tg->index_text($subj, 1, 'S'); - $tg->index_text($subj, 1, 'XBS'); - } - $tg->increase_termpos; - $tg->index_text($subj) if $subj; + $tg->index_text($subj, 1, 'S') if $subj; $tg->increase_termpos; index_users($tg, $smsg); @@ -204,25 +193,19 @@ sub add_message { if (@quot) { my $s = join("\n", @quot); @quot = (); - $tg->index_text($s, 1, 'XQUOT'); - $tg->index_text($s, 0, 'XBS'); - $tg->index_text($s, 0, 'XBODY'); - $tg->index_text($s, 0); + $tg->index_text($s, 0, 'XQUOT'); $tg->increase_termpos; } if (@orig) { my $s = join("\n", @orig); @orig = (); $tg->index_text($s, 1, 'XNQ'); - $tg->index_text($s, 1, 'XBS'); - $tg->index_text($s, 1, 'XBODY'); - $tg->index_text($s); $tg->increase_termpos; } }); link_message($self, $smsg, $old_tid); - $tg->index_text($mid, 1); + $tg->index_text($mid, 1, 'XMID'); $doc->set_data($smsg->to_doc_data($blob)); if (my $altid = $self->{-altid}) { -- cgit v1.2.3-24-ge0c7