From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Cc: Eric Wong <e@80x24.org>
Subject: [RFC 03/11] search: make search results more OO
Date: Sun, 16 Aug 2015 08:37:51 +0000 [thread overview]
Message-ID: <1439714279-21923-4-git-send-email-e@80x24.org> (raw)
In-Reply-To: <1439714279-21923-1-git-send-email-e@80x24.org>
This will relieve callers of the need to decode the data
we store internally in Xapian
---
lib/PublicInbox/Search.pm | 34 ++++++-------------
lib/PublicInbox/SearchMsg.pm | 81 +++++++++++++++++++++++++++++++++++++++++---
t/search.t | 11 +++---
3 files changed, 92 insertions(+), 34 deletions(-)
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e88bfb1..c9c12c0 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -8,8 +8,6 @@ use PublicInbox::SearchMsg;
use base qw/Exporter/;
use Search::Xapian qw/:standard/;
require PublicInbox::View;
-use Date::Parse qw/str2time/;
-use POSIX qw//;
use Email::MIME;
use PublicInbox::MID qw/mid_clean mid_compressed/;
@@ -109,8 +107,7 @@ sub add_message {
$doc->add_term(xpfx('mid') . $mid);
}
- my $subj = $mime->header('Subject');
- $subj = '' unless defined $subj;
+ my $subj = $smsg->subject;
if (length $subj) {
$doc->add_term(xpfx('subject') . $subj);
@@ -119,23 +116,12 @@ sub add_message {
$doc->add_term(xpfx('path') . $path);
}
- my $from = $mime->header('From') || '';
- my @from;
-
- if ($from) {
- @from = Email::Address->parse($from);
- $from = $from[0]->name;
- }
-
- my $ts = eval { str2time($mime->header('Date')) } || 0;
- my $date = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
- $ts = Search::Xapian::sortable_serialise($ts);
+ my $from = $smsg->from_name;
+ my $date = $smsg->date;
+ my $ts = Search::Xapian::sortable_serialise($smsg->ts);
$doc->add_value(PublicInbox::Search::TS, $ts);
- # this is what we show in index results:
- $subj =~ tr/\n/ /;
- $from =~ tr/\n/ /;
- $doc->set_data("$mid\n$subj\n$from\n$date");
+ $doc->set_data($smsg->to_doc_data);
my $tg = $self->term_generator;
@@ -145,10 +131,8 @@ sub add_message {
$tg->index_text($subj) if $subj;
$tg->increase_termpos;
- if (@from) {
- $tg->index_text($from[0]->format);
- $tg->increase_termpos;
- }
+ $tg->index_text($smsg->from->format);
+ $tg->increase_termpos;
$mime->walk_parts(sub {
my ($part) = @_;
@@ -265,7 +249,9 @@ sub do_enquire {
my $offset = $opts->{offset} || 0;
my $limit = $opts->{limit} || 50;
my $mset = $enquire->get_mset($offset, $limit);
- my @msgs = map { $_->get_document->get_data } $mset->items;
+ my @msgs = map {
+ PublicInbox::SearchMsg->load_doc($_->get_document);
+ } $mset->items;
{ count => $mset->get_matches_estimated, msgs => \@msgs }
}
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 920804a..619a19d 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -5,6 +5,10 @@ package PublicInbox::SearchMsg;
use strict;
use warnings;
use Search::Xapian;
+use Email::Address qw//;
+use POSIX qw//;
+use Date::Parse qw/str2time/;
+use PublicInbox::MID qw/mid_clean mid_compressed/;
our $PFX2TERM_RE = undef;
sub new {
@@ -20,6 +24,78 @@ sub wrap {
bless { doc => $doc, mime => undef, mid => $mid }, $class;
}
+sub load_doc {
+ my ($class, $doc) = @_;
+ my ($mid, $subj, $from, $date) = split(/\n/, $doc->get_data);
+ bless {
+ doc => $doc,
+ mid => $mid,
+ subject => $subj,
+ date => $date,
+ from_name => $from,
+ }, $class;
+}
+
+sub subject {
+ my ($self) = @_;
+ my $subj = $self->{subject};
+ return $subj if defined $subj;
+ $subj = $self->{mime}->header('Subject');
+ $subj = '' unless defined $subj;
+ $subj =~ tr/\n/ /;
+ $self->{subject} = $subj;
+}
+
+sub from {
+ my ($self) = @_;
+ my $from = $self->mime->header('From') || '';
+ my @from;
+
+ if ($from) {
+ @from = Email::Address->parse($from);
+ $self->{from} = $from[0];
+ $from = $from[0]->name;
+ }
+ $from =~ tr/\n/ /;
+ $self->{from_name} = $from;
+ $self->{from};
+}
+
+sub from_name {
+ my ($self) = @_;
+ my $from_name = $self->{from_name};
+ return $from_name if defined $from_name;
+ $self->from;
+ $self->{from_name};
+}
+
+sub ts {
+ my ($self) = @_;
+ my $ts = $self->{ts};
+ return $ts if $ts;
+ $self->{date} = undef;
+ $self->date;
+ $self->{ts};
+}
+
+sub date {
+ my ($self) = @_;
+ my $date = $self->{date};
+ return $date if $date;
+ my $ts = eval { str2time($self->mime->header('Date')) } || 0;
+ $self->{ts} = $ts;
+ $self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+}
+
+sub to_doc_data {
+ my ($self) = @_;
+
+ $self->mid . "\n" .
+ $self->subject . "\n" .
+ $self->from_name . "\n".
+ $self->date;
+}
+
sub ensure_metadata {
my ($self) = @_;
my $doc = $self->{doc};
@@ -60,10 +136,7 @@ sub _extract_mid {
my ($self) = @_;
my $mid = $self->mime->header('Message-ID');
- if ($mid && $mid =~ /<([^>]+)>/) {
- return $1;
- }
- return $mid;
+ $mid ? mid_compressed(mid_clean($mid)) : $mid;
}
sub mime {
diff --git a/t/search.t b/t/search.t
index 201578d..9de6d28 100644
--- a/t/search.t
+++ b/t/search.t
@@ -48,7 +48,7 @@ my $ro = PublicInbox::Search->new($git_dir);
sub filter_mids {
my ($res) = @_;
- sort(map { (split(/\n/, $_))[0] } @{$res->{msgs}});
+ sort(map { $_->mid } @{$res->{msgs}});
}
{
@@ -144,8 +144,7 @@ sub filter_mids {
# body
$res = $ro->query('goodbye');
- is((split(/\n/, $res->{msgs}->[0]))[0], 'last@s',
- 'got goodbye message body');
+ is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body');
}
# long message-id
@@ -226,12 +225,12 @@ sub filter_mids {
body => "theatre\nfade\n"));
my $res = $rw->query("theatre");
is($res->{count}, 2, "got both matches");
- like($res->{msgs}->[0], qr/\Anquote\@a/, "non-quoted scores higher");
- like($res->{msgs}->[1], qr/\Aquote\@a/, "quoted result still returned");
+ is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher");
+ is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned");
$res = $rw->query("illusions");
is($res->{count}, 1, "got a match for quoted text");
- like($res->{msgs}->[0], qr/\Aquote\@a/,
+ is($res->{msgs}->[0]->mid, 'quote@a',
"quoted result returned if nothing else");
}
--
EW
next prev parent reply other threads:[~2015-08-16 8:38 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-16 8:37 [RFC 0/11] work-in-progress search branch updated Eric Wong
2015-08-16 8:37 ` [RFC 01/11] search: implement index_sync to fixup indexer Eric Wong
2015-08-16 8:37 ` [RFC 02/11] extract redundant Message-ID handling code Eric Wong
2015-08-16 8:37 ` Eric Wong [this message]
2015-08-16 8:37 ` [RFC 04/11] view: display replies in per-message view Eric Wong
2015-08-16 8:37 ` [RFC 05/11] thread: common sorting code Eric Wong
2015-08-16 8:37 ` [RFC 06/11] view: reply threading adjustment Eric Wong
2015-08-16 8:37 ` [RFC 07/11] view: hoist out index_walk function Eric Wong
2015-08-16 9:23 ` Eric Wong
2015-08-16 8:37 ` [RFC 08/11] www: /t/$MESSAGE_ID.html for threads Eric Wong
2015-08-16 8:37 ` [RFC 09/11] search: remove unnecessary xpfx export Eric Wong
2015-08-16 8:37 ` [RFC 10/11] implement /s/$SUBJECT_PATH.html lookups Eric Wong
2015-08-16 8:37 ` [RFC 11/11] SearchMsg: ensure metadata for ghost messages mid Eric Wong
2015-08-16 8:55 ` [RFC 12/11] view: deduplicate common code for loading search results Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1439714279-21923-4-git-send-email-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).