user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Cc: Eric Wong <e@80x24.org>
Subject: [RFC 03/11] search: make search results more OO
Date: Sun, 16 Aug 2015 08:37:51 +0000	[thread overview]
Message-ID: <1439714279-21923-4-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1439714279-21923-1-git-send-email-e@80x24.org>

This will relieve callers of the need to decode the data
we store internally in Xapian
---
 lib/PublicInbox/Search.pm    | 34 ++++++-------------
 lib/PublicInbox/SearchMsg.pm | 81 +++++++++++++++++++++++++++++++++++++++++---
 t/search.t                   | 11 +++---
 3 files changed, 92 insertions(+), 34 deletions(-)

diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e88bfb1..c9c12c0 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -8,8 +8,6 @@ use PublicInbox::SearchMsg;
 use base qw/Exporter/;
 use Search::Xapian qw/:standard/;
 require PublicInbox::View;
-use Date::Parse qw/str2time/;
-use POSIX qw//;
 use Email::MIME;
 use PublicInbox::MID qw/mid_clean mid_compressed/;
 
@@ -109,8 +107,7 @@ sub add_message {
 			$doc->add_term(xpfx('mid') . $mid);
 		}
 
-		my $subj = $mime->header('Subject');
-		$subj = '' unless defined $subj;
+		my $subj = $smsg->subject;
 
 		if (length $subj) {
 			$doc->add_term(xpfx('subject') . $subj);
@@ -119,23 +116,12 @@ sub add_message {
 			$doc->add_term(xpfx('path') . $path);
 		}
 
-		my $from = $mime->header('From') || '';
-		my @from;
-
-		if ($from) {
-			@from = Email::Address->parse($from);
-			$from = $from[0]->name;
-		}
-
-		my $ts = eval { str2time($mime->header('Date')) } || 0;
-		my $date = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
-		$ts = Search::Xapian::sortable_serialise($ts);
+		my $from = $smsg->from_name;
+		my $date = $smsg->date;
+		my $ts = Search::Xapian::sortable_serialise($smsg->ts);
 		$doc->add_value(PublicInbox::Search::TS, $ts);
 
-		# this is what we show in index results:
-		$subj =~ tr/\n/ /;
-		$from =~ tr/\n/ /;
-		$doc->set_data("$mid\n$subj\n$from\n$date");
+		$doc->set_data($smsg->to_doc_data);
 
 		my $tg = $self->term_generator;
 
@@ -145,10 +131,8 @@ sub add_message {
 		$tg->index_text($subj) if $subj;
 		$tg->increase_termpos;
 
-		if (@from) {
-			$tg->index_text($from[0]->format);
-			$tg->increase_termpos;
-		}
+		$tg->index_text($smsg->from->format);
+		$tg->increase_termpos;
 
 		$mime->walk_parts(sub {
 			my ($part) = @_;
@@ -265,7 +249,9 @@ sub do_enquire {
 	my $offset = $opts->{offset} || 0;
 	my $limit = $opts->{limit} || 50;
 	my $mset = $enquire->get_mset($offset, $limit);
-	my @msgs = map { $_->get_document->get_data } $mset->items;
+	my @msgs = map {
+		PublicInbox::SearchMsg->load_doc($_->get_document);
+	} $mset->items;
 
 	{ count => $mset->get_matches_estimated, msgs => \@msgs }
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 920804a..619a19d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -5,6 +5,10 @@ package PublicInbox::SearchMsg;
 use strict;
 use warnings;
 use Search::Xapian;
+use Email::Address qw//;
+use POSIX qw//;
+use Date::Parse qw/str2time/;
+use PublicInbox::MID qw/mid_clean mid_compressed/;
 our $PFX2TERM_RE = undef;
 
 sub new {
@@ -20,6 +24,78 @@ sub wrap {
 	bless { doc => $doc, mime => undef, mid => $mid }, $class;
 }
 
+sub load_doc {
+	my ($class, $doc) = @_;
+	my ($mid, $subj, $from, $date) = split(/\n/, $doc->get_data);
+	bless {
+		doc => $doc,
+		mid => $mid,
+		subject => $subj,
+		date => $date,
+		from_name => $from,
+	}, $class;
+}
+
+sub subject {
+	my ($self) = @_;
+	my $subj = $self->{subject};
+	return $subj if defined $subj;
+	$subj = $self->{mime}->header('Subject');
+	$subj = '' unless defined $subj;
+	$subj =~ tr/\n/ /;
+	$self->{subject} = $subj;
+}
+
+sub from {
+	my ($self) = @_;
+	my $from = $self->mime->header('From') || '';
+	my @from;
+
+	if ($from) {
+		@from = Email::Address->parse($from);
+		$self->{from} = $from[0];
+		$from = $from[0]->name;
+	}
+	$from =~ tr/\n/ /;
+	$self->{from_name} = $from;
+	$self->{from};
+}
+
+sub from_name {
+	my ($self) = @_;
+	my $from_name = $self->{from_name};
+	return $from_name if defined $from_name;
+	$self->from;
+	$self->{from_name};
+}
+
+sub ts {
+	my ($self) = @_;
+	my $ts = $self->{ts};
+	return $ts if $ts;
+	$self->{date} = undef;
+	$self->date;
+	$self->{ts};
+}
+
+sub date {
+	my ($self) = @_;
+	my $date = $self->{date};
+	return $date if $date;
+	my $ts = eval { str2time($self->mime->header('Date')) } || 0;
+	$self->{ts} = $ts;
+	$self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+}
+
+sub to_doc_data {
+	my ($self) = @_;
+
+	$self->mid . "\n" .
+	$self->subject . "\n" .
+	$self->from_name . "\n".
+	$self->date;
+}
+
 sub ensure_metadata {
 	my ($self) = @_;
 	my $doc = $self->{doc};
@@ -60,10 +136,7 @@ sub _extract_mid {
 	my ($self) = @_;
 
 	my $mid = $self->mime->header('Message-ID');
-	if ($mid && $mid =~ /<([^>]+)>/) {
-		return $1;
-	}
-	return $mid;
+	$mid ? mid_compressed(mid_clean($mid)) : $mid;
 }
 
 sub mime {
diff --git a/t/search.t b/t/search.t
index 201578d..9de6d28 100644
--- a/t/search.t
+++ b/t/search.t
@@ -48,7 +48,7 @@ my $ro = PublicInbox::Search->new($git_dir);
 
 sub filter_mids {
 	my ($res) = @_;
-	sort(map { (split(/\n/, $_))[0] } @{$res->{msgs}});
+	sort(map { $_->mid } @{$res->{msgs}});
 }
 
 {
@@ -144,8 +144,7 @@ sub filter_mids {
 
 	# body
 	$res = $ro->query('goodbye');
-	is((split(/\n/, $res->{msgs}->[0]))[0], 'last@s',
-	   'got goodbye message body');
+	is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body');
 }
 
 # long message-id
@@ -226,12 +225,12 @@ sub filter_mids {
 		body => "theatre\nfade\n"));
 	my $res = $rw->query("theatre");
 	is($res->{count}, 2, "got both matches");
-	like($res->{msgs}->[0], qr/\Anquote\@a/, "non-quoted scores higher");
-	like($res->{msgs}->[1], qr/\Aquote\@a/, "quoted result still returned");
+	is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher");
+	is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned");
 
 	$res = $rw->query("illusions");
 	is($res->{count}, 1, "got a match for quoted text");
-	like($res->{msgs}->[0], qr/\Aquote\@a/,
+	is($res->{msgs}->[0]->mid, 'quote@a',
 		"quoted result returned if nothing else");
 }
 
-- 
EW


  parent reply	other threads:[~2015-08-16  8:38 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-16  8:37 [RFC 0/11] work-in-progress search branch updated Eric Wong
2015-08-16  8:37 ` [RFC 01/11] search: implement index_sync to fixup indexer Eric Wong
2015-08-16  8:37 ` [RFC 02/11] extract redundant Message-ID handling code Eric Wong
2015-08-16  8:37 ` Eric Wong [this message]
2015-08-16  8:37 ` [RFC 04/11] view: display replies in per-message view Eric Wong
2015-08-16  8:37 ` [RFC 05/11] thread: common sorting code Eric Wong
2015-08-16  8:37 ` [RFC 06/11] view: reply threading adjustment Eric Wong
2015-08-16  8:37 ` [RFC 07/11] view: hoist out index_walk function Eric Wong
2015-08-16  9:23   ` Eric Wong
2015-08-16  8:37 ` [RFC 08/11] www: /t/$MESSAGE_ID.html for threads Eric Wong
2015-08-16  8:37 ` [RFC 09/11] search: remove unnecessary xpfx export Eric Wong
2015-08-16  8:37 ` [RFC 10/11] implement /s/$SUBJECT_PATH.html lookups Eric Wong
2015-08-16  8:37 ` [RFC 11/11] SearchMsg: ensure metadata for ghost messages mid Eric Wong
2015-08-16  8:55 ` [RFC 12/11] view: deduplicate common code for loading search results Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1439714279-21923-4-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).