user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 01/10] search: allow searching user fields (To/Cc/From)
Date: Fri,  9 Sep 2016 00:01:22 +0000	[thread overview]
Message-ID: <20160909000131.18584-2-e@80x24.org> (raw)
In-Reply-To: <20160909000131.18584-1-e@80x24.org>

Sometimes it can be useful to search based on who the
message was sent to, sent by, or Cc:-ed.  Of course,
headers can be faked, but they usually are not...

Anyways this mostly matches the behavior of mairix(1).
---
 lib/PublicInbox/Search.pm    | 10 +++++++-
 lib/PublicInbox/SearchIdx.pm | 59 +++++++++++++++++++++++++++++++-------------
 t/search.t                   | 37 +++++++++++++++++++++++++++
 3 files changed, 88 insertions(+), 18 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 445c2d8..aec459b 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -51,8 +51,8 @@ my %bool_pfx_internal = (
 	thread => 'G', # newsGroup (or similar entity - e.g. a web forum name)
 );
 
-# do we still need these? probably not..
 my %bool_pfx_external = (
+	# do we still need these? probably not..
 	path => 'XPATH',
 	mid => 'Q', # uniQue id (Message-ID)
 );
@@ -61,6 +61,14 @@ my %prob_prefix = (
 	subject => 'S',
 	s => 'S', # for mairix compatibility
 	m => 'Q', # 'mid' is exact, 'm' can do partial
+	f => 'A', # for mairix compatibility
+	t => 'XTO', # for mairix compatibility
+	tc => 'XTC', # for mairix compatibility
+	c => 'XCC', # for mairix compatibility
+	tcf => 'XTCF', # for mairix compatibility
+	# n.b.: leaving out "a:" alias for "tcf:" even though
+	# mairix supports it.  It is only mentioned in passing in mairix(1)
+	# and the extra two letters are not significantly longer.
 );
 
 # not documenting m: and mid: for now, the using the URLs works w/o Xapian
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index f54f5f2..37fefbe 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -96,12 +96,51 @@ sub _lock_release {
 	close $lockfh or die "close failed: $!\n";
 }
 
-sub add_val {
+sub add_val ($$$) {
 	my ($doc, $col, $num) = @_;
 	$num = Search::Xapian::sortable_serialise($num);
 	$doc->add_value($col, $num);
 }
 
+sub add_values ($$$) {
+	my ($smsg, $bytes, $num) = @_;
+
+	my $ts = $smsg->ts;
+	my $doc = $smsg->{doc};
+	add_val($doc, &PublicInbox::Search::TS, $ts);
+
+	defined($num) and add_val($doc, &PublicInbox::Search::NUM, $num);
+
+	defined($bytes) and add_val($doc, &PublicInbox::Search::BYTES, $bytes);
+
+	add_val($doc, &PublicInbox::Search::LINES,
+			$smsg->{mime}->body_raw =~ tr!\n!\n!);
+
+	my $yyyymmdd = strftime('%Y%m%d', gmtime($ts));
+	$doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd);
+}
+
+sub index_users ($$) {
+	my ($tg, $smsg) = @_;
+
+	my $from = $smsg->from;
+	my $to = $smsg->to;
+	my $cc = $smsg->cc;
+
+	$tg->index_text($from, 1, 'A'); # A - author
+	$tg->increase_termpos;
+
+	$tg->index_text($to, 1, 'XTO') if $to ne '';
+	$tg->index_text($cc, 1, 'XCC') if $cc ne '';
+	my $tc = join("\t", $to, $cc);
+	$tg->index_text($tc, 1, 'XTC') if $tc ne '';
+	my $tcf = join("\t", $tc, $from);
+	$tg->index_text($tcf, 1, 'XTCF') if $tcf ne '';
+
+	$tg->index_text($from);
+	$tg->increase_termpos;
+}
+
 sub add_message {
 	my ($self, $mime, $bytes, $num, $blob) = @_; # mime = Email::MIME object
 	my $db = $self->{xdb};
@@ -129,20 +168,7 @@ sub add_message {
 			$doc->add_term(xpfx('path') . id_compress($path));
 		}
 
-		my $ts = $smsg->ts;
-		add_val($doc, &PublicInbox::Search::TS, $ts);
-
-		defined($num) and
-			add_val($doc, &PublicInbox::Search::NUM, $num);
-
-		defined($bytes) and
-			add_val($doc, &PublicInbox::Search::BYTES, $bytes);
-
-		add_val($doc, &PublicInbox::Search::LINES,
-				$mime->body_raw =~ tr!\n!\n!);
-
-		my $yyyymmdd = strftime('%Y%m%d', gmtime($ts));
-		$doc->add_value(&PublicInbox::Search::YYYYMMDD, $yyyymmdd);
+		add_values($smsg, $bytes, $num);
 
 		my $tg = $self->term_generator;
 
@@ -152,8 +178,7 @@ sub add_message {
 		$tg->index_text($subj) if $subj;
 		$tg->increase_termpos;
 
-		$tg->index_text($smsg->from);
-		$tg->increase_termpos;
+		index_users($tg, $smsg);
 
 		msg_iter($mime, sub {
 			my ($part, $depth, @idx) = @{$_[0]};
diff --git a/t/search.t b/t/search.t
index db94c0a..bb0861a 100644
--- a/t/search.t
+++ b/t/search.t
@@ -86,6 +86,7 @@ my $rw_commit = sub {
 			'Message-ID' => '<last@s>',
 			From => 'John Smith <js@example.com>',
 			To => 'list@example.com',
+			Cc => 'foo@example.com',
 		],
 		body => "goodbye forever :<\n");
 
@@ -324,6 +325,42 @@ sub filter_mids {
 	is(scalar @{$res->{msgs}}, 0, 'nothing before 19931001');
 }
 
+# names and addresses
+{
+	my $res = $ro->query('t:list@example.com');
+	is(scalar @{$res->{msgs}}, 6, 'searched To: successfully');
+	foreach my $smsg (@{$res->{msgs}}) {
+		like($smsg->to, qr/\blist\@example\.com\b/, 'to appears');
+	}
+
+	$res = $ro->query('tc:list@example.com');
+	is(scalar @{$res->{msgs}}, 6, 'searched To+Cc: successfully');
+	foreach my $smsg (@{$res->{msgs}}) {
+		my $tocc = join("\n", $smsg->to, $smsg->cc);
+		like($tocc, qr/\blist\@example\.com\b/, 'tocc appears');
+	}
+
+	foreach my $pfx ('tcf:', 'c:') {
+		$res = $ro->query($pfx . 'foo@example.com');
+		is(scalar @{$res->{msgs}}, 1,
+			"searched $pfx successfully for Cc:");
+		foreach my $smsg (@{$res->{msgs}}) {
+			like($smsg->cc, qr/\bfoo\@example\.com\b/,
+				'cc appears');
+		}
+	}
+
+	foreach my $pfx ('', 'tcf:', 'f:') {
+		$res = $ro->query($pfx . 'Laggy');
+		is(scalar @{$res->{msgs}}, 1,
+			"searched $pfx successfully for From:");
+		foreach my $smsg (@{$res->{msgs}}) {
+			like($smsg->from, qr/Laggy Sender/,
+				"From appears with $pfx");
+		}
+	}
+}
+
 done_testing();
 
 1;
-- 
EW


  reply	other threads:[~2016-09-09  0:01 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-09  0:01 [PATCH 0/10] search: more mairix prefix compatibility Eric Wong
2016-09-09  0:01 ` Eric Wong [this message]
2016-09-09  0:01 ` [PATCH 02/10] search: drop longer subject: prefix for search Eric Wong
2016-09-09  0:01 ` [PATCH 03/10] search: more granular message body searching Eric Wong
2016-09-09  0:01 ` [PATCH 04/10] search: fix space regressions from recent changes Eric Wong
2016-09-09  0:01 ` [PATCH 05/10] search: match quote detection behavior of view Eric Wong
2016-09-09  0:01 ` [PATCH 06/10] search: increase term positions for each quoted hunk Eric Wong
2016-09-09  0:01 ` [PATCH 07/10] search: fix compatibility with Debian wheezy Eric Wong
2016-09-09  0:01 ` [PATCH 08/10] search: avoid mindlessly calling body_set Eric Wong
2016-09-09  0:01 ` [PATCH 09/10] search: match the behavior of WWW for indexing text Eric Wong
2016-09-09  0:01 ` [PATCH 10/10] search: index attachment filenames Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20160909000131.18584-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).