From c617254e00ae43414236603cf9bbcdc8cbc2b139 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Sep 2016 00:01:22 +0000 Subject: search: allow searching user fields (To/Cc/From) Sometimes it can be useful to search based on who the message was sent to, sent by, or Cc:-ed. Of course, headers can be faked, but they usually are not... Anyways this mostly matches the behavior of mairix(1). --- lib/PublicInbox/Search.pm | 10 +++++++- lib/PublicInbox/SearchIdx.pm | 59 +++++++++++++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 445c2d8a..aec459b3 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -51,8 +51,8 @@ my %bool_pfx_internal = ( thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); -# do we still need these? probably not.. my %bool_pfx_external = ( + # do we still need these? probably not.. path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) ); @@ -61,6 +61,14 @@ my %prob_prefix = ( subject => 'S', s => 'S', # for mairix compatibility m => 'Q', # 'mid' is exact, 'm' can do partial + f => 'A', # for mairix compatibility + t => 'XTO', # for mairix compatibility + tc => 'XTC', # for mairix compatibility + c => 'XCC', # for mairix compatibility + tcf => 'XTCF', # for mairix compatibility + # n.b.: leaving out "a:" alias for "tcf:" even though + # mairix supports it. It is only mentioned in passing in mairix(1) + # and the extra two letters are not significantly longer. ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f54f5f2f..37fefbea 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -96,12 +96,51 @@ sub _lock_release { close $lockfh or die "close failed: $!\n"; } -sub add_val { +sub add_val ($$$) { my ($doc, $col, $num) = @_; $num = Search::Xapian::sortable_serialise($num); $doc->add_value($col, $num); } +sub add_values ($$$) { + my ($smsg, $bytes, $num) = @_; + + my $ts = $smsg->ts; + my $doc = $smsg->{doc}; + add_val($doc, &PublicInbox::Search::TS, $ts); + + defined($num) and add_val($doc, &PublicInbox::Search::NUM, $num); + + defined($bytes) and add_val($doc, &PublicInbox::Search::BYTES, $bytes); + + add_val($doc, &PublicInbox::Search::LINES, + $smsg->{mime}->body_raw =~ tr!\n!\n!); + + my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); + $doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd); +} + +sub index_users ($$) { + my ($tg, $smsg) = @_; + + my $from = $smsg->from; + my $to = $smsg->to; + my $cc = $smsg->cc; + + $tg->index_text($from, 1, 'A'); # A - author + $tg->increase_termpos; + + $tg->index_text($to, 1, 'XTO') if $to ne ''; + $tg->index_text($cc, 1, 'XCC') if $cc ne ''; + my $tc = join("\t", $to, $cc); + $tg->index_text($tc, 1, 'XTC') if $tc ne ''; + my $tcf = join("\t", $tc, $from); + $tg->index_text($tcf, 1, 'XTCF') if $tcf ne ''; + + $tg->index_text($from); + $tg->increase_termpos; +} + sub add_message { my ($self, $mime, $bytes, $num, $blob) = @_; # mime = Email::MIME object my $db = $self->{xdb}; @@ -129,20 +168,7 @@ sub add_message { $doc->add_term(xpfx('path') . id_compress($path)); } - my $ts = $smsg->ts; - add_val($doc, &PublicInbox::Search::TS, $ts); - - defined($num) and - add_val($doc, &PublicInbox::Search::NUM, $num); - - defined($bytes) and - add_val($doc, &PublicInbox::Search::BYTES, $bytes); - - add_val($doc, &PublicInbox::Search::LINES, - $mime->body_raw =~ tr!\n!\n!); - - my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); - $doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd); + add_values($smsg, $bytes, $num); my $tg = $self->term_generator; @@ -152,8 +178,7 @@ sub add_message { $tg->index_text($subj) if $subj; $tg->increase_termpos; - $tg->index_text($smsg->from); - $tg->increase_termpos; + index_users($tg, $smsg); msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; -- cgit v1.2.3-24-ge0c7