From c617254e00ae43414236603cf9bbcdc8cbc2b139 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 9 Sep 2016 00:01:22 +0000 Subject: search: allow searching user fields (To/Cc/From) Sometimes it can be useful to search based on who the message was sent to, sent by, or Cc:-ed. Of course, headers can be faked, but they usually are not... Anyways this mostly matches the behavior of mairix(1). --- lib/PublicInbox/Search.pm | 10 +++++++- lib/PublicInbox/SearchIdx.pm | 59 +++++++++++++++++++++++++++++++------------- t/search.t | 37 +++++++++++++++++++++++++++ 3 files changed, 88 insertions(+), 18 deletions(-) diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index 445c2d8a..aec459b3 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -51,8 +51,8 @@ my %bool_pfx_internal = ( thread => 'G', # newsGroup (or similar entity - e.g. a web forum name) ); -# do we still need these? probably not.. my %bool_pfx_external = ( + # do we still need these? probably not.. path => 'XPATH', mid => 'Q', # uniQue id (Message-ID) ); @@ -61,6 +61,14 @@ my %prob_prefix = ( subject => 'S', s => 'S', # for mairix compatibility m => 'Q', # 'mid' is exact, 'm' can do partial + f => 'A', # for mairix compatibility + t => 'XTO', # for mairix compatibility + tc => 'XTC', # for mairix compatibility + c => 'XCC', # for mairix compatibility + tcf => 'XTCF', # for mairix compatibility + # n.b.: leaving out "a:" alias for "tcf:" even though + # mairix supports it. It is only mentioned in passing in mairix(1) + # and the extra two letters are not significantly longer. ); # not documenting m: and mid: for now, the using the URLs works w/o Xapian diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index f54f5f2f..37fefbea 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -96,12 +96,51 @@ sub _lock_release { close $lockfh or die "close failed: $!\n"; } -sub add_val { +sub add_val ($$$) { my ($doc, $col, $num) = @_; $num = Search::Xapian::sortable_serialise($num); $doc->add_value($col, $num); } +sub add_values ($$$) { + my ($smsg, $bytes, $num) = @_; + + my $ts = $smsg->ts; + my $doc = $smsg->{doc}; + add_val($doc, &PublicInbox::Search::TS, $ts); + + defined($num) and add_val($doc, &PublicInbox::Search::NUM, $num); + + defined($bytes) and add_val($doc, &PublicInbox::Search::BYTES, $bytes); + + add_val($doc, &PublicInbox::Search::LINES, + $smsg->{mime}->body_raw =~ tr!\n!\n!); + + my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); + $doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd); +} + +sub index_users ($$) { + my ($tg, $smsg) = @_; + + my $from = $smsg->from; + my $to = $smsg->to; + my $cc = $smsg->cc; + + $tg->index_text($from, 1, 'A'); # A - author + $tg->increase_termpos; + + $tg->index_text($to, 1, 'XTO') if $to ne ''; + $tg->index_text($cc, 1, 'XCC') if $cc ne ''; + my $tc = join("\t", $to, $cc); + $tg->index_text($tc, 1, 'XTC') if $tc ne ''; + my $tcf = join("\t", $tc, $from); + $tg->index_text($tcf, 1, 'XTCF') if $tcf ne ''; + + $tg->index_text($from); + $tg->increase_termpos; +} + sub add_message { my ($self, $mime, $bytes, $num, $blob) = @_; # mime = Email::MIME object my $db = $self->{xdb}; @@ -129,20 +168,7 @@ sub add_message { $doc->add_term(xpfx('path') . id_compress($path)); } - my $ts = $smsg->ts; - add_val($doc, &PublicInbox::Search::TS, $ts); - - defined($num) and - add_val($doc, &PublicInbox::Search::NUM, $num); - - defined($bytes) and - add_val($doc, &PublicInbox::Search::BYTES, $bytes); - - add_val($doc, &PublicInbox::Search::LINES, - $mime->body_raw =~ tr!\n!\n!); - - my $yyyymmdd = strftime('%Y%m%d', gmtime($ts)); - $doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd); + add_values($smsg, $bytes, $num); my $tg = $self->term_generator; @@ -152,8 +178,7 @@ sub add_message { $tg->index_text($subj) if $subj; $tg->increase_termpos; - $tg->index_text($smsg->from); - $tg->increase_termpos; + index_users($tg, $smsg); msg_iter($mime, sub { my ($part, $depth, @idx) = @{$_[0]}; diff --git a/t/search.t b/t/search.t index db94c0a3..bb0861a1 100644 --- a/t/search.t +++ b/t/search.t @@ -86,6 +86,7 @@ my $rw_commit = sub { 'Message-ID' => '', From => 'John Smith ', To => 'list@example.com', + Cc => 'foo@example.com', ], body => "goodbye forever :<\n"); @@ -324,6 +325,42 @@ sub filter_mids { is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001'); } +# names and addresses +{ + my $res = $ro->query('t:list@example.com'); + is(scalar @{$res->{msgs}}, 6, 'searched To: successfully'); + foreach my $smsg (@{$res->{msgs}}) { + like($smsg->to, qr/\blist\@example\.com\b/, 'to appears'); + } + + $res = $ro->query('tc:list@example.com'); + is(scalar @{$res->{msgs}}, 6, 'searched To+Cc: successfully'); + foreach my $smsg (@{$res->{msgs}}) { + my $tocc = join("\n", $smsg->to, $smsg->cc); + like($tocc, qr/\blist\@example\.com\b/, 'tocc appears'); + } + + foreach my $pfx ('tcf:', 'c:') { + $res = $ro->query($pfx . 'foo@example.com'); + is(scalar @{$res->{msgs}}, 1, + "searched $pfx successfully for Cc:"); + foreach my $smsg (@{$res->{msgs}}) { + like($smsg->cc, qr/\bfoo\@example\.com\b/, + 'cc appears'); + } + } + + foreach my $pfx ('', 'tcf:', 'f:') { + $res = $ro->query($pfx . 'Laggy'); + is(scalar @{$res->{msgs}}, 1, + "searched $pfx successfully for From:"); + foreach my $smsg (@{$res->{msgs}}) { + like($smsg->from, qr/Laggy Sender/, + "From appears with $pfx"); + } + } +} + done_testing(); 1; -- cgit v1.2.3-24-ge0c7