From 7edf30e5349ab5566815e5050e9ba0f53e1d0bb9 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 15 Aug 2015 09:28:33 +0000 Subject: search: make search results more OO This will relieve callers of the need to decode the data we store internally in Xapian --- lib/PublicInbox/Search.pm | 34 ++++++------------- lib/PublicInbox/SearchMsg.pm | 81 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 87 insertions(+), 28 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index e88bfb16..c9c12c0b 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -8,8 +8,6 @@ use PublicInbox::SearchMsg; use base qw/Exporter/; use Search::Xapian qw/:standard/; require PublicInbox::View; -use Date::Parse qw/str2time/; -use POSIX qw//; use Email::MIME; use PublicInbox::MID qw/mid_clean mid_compressed/; @@ -109,8 +107,7 @@ sub add_message { $doc->add_term(xpfx('mid') . $mid); } - my $subj = $mime->header('Subject'); - $subj = '' unless defined $subj; + my $subj = $smsg->subject; if (length $subj) { $doc->add_term(xpfx('subject') . $subj); @@ -119,23 +116,12 @@ sub add_message { $doc->add_term(xpfx('path') . $path); } - my $from = $mime->header('From') || ''; - my @from; - - if ($from) { - @from = Email::Address->parse($from); - $from = $from[0]->name; - } - - my $ts = eval { str2time($mime->header('Date')) } || 0; - my $date = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts)); - $ts = Search::Xapian::sortable_serialise($ts); + my $from = $smsg->from_name; + my $date = $smsg->date; + my $ts = Search::Xapian::sortable_serialise($smsg->ts); $doc->add_value(PublicInbox::Search::TS, $ts); - # this is what we show in index results: - $subj =~ tr/\n/ /; - $from =~ tr/\n/ /; - $doc->set_data("$mid\n$subj\n$from\n$date"); + $doc->set_data($smsg->to_doc_data); my $tg = $self->term_generator; @@ -145,10 +131,8 @@ sub add_message { $tg->index_text($subj) if $subj; $tg->increase_termpos; - if (@from) { - $tg->index_text($from[0]->format); - $tg->increase_termpos; - } + $tg->index_text($smsg->from->format); + $tg->increase_termpos; $mime->walk_parts(sub { my ($part) = @_; @@ -265,7 +249,9 @@ sub do_enquire { my $offset = $opts->{offset} || 0; my $limit = $opts->{limit} || 50; my $mset = $enquire->get_mset($offset, $limit); - my @msgs = map { $_->get_document->get_data } $mset->items; + my @msgs = map { + PublicInbox::SearchMsg->load_doc($_->get_document); + } $mset->items; { count => $mset->get_matches_estimated, msgs => \@msgs } } diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index 920804ac..619a19d4 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -5,6 +5,10 @@ package PublicInbox::SearchMsg; use strict; use warnings; use Search::Xapian; +use Email::Address qw//; +use POSIX qw//; +use Date::Parse qw/str2time/; +use PublicInbox::MID qw/mid_clean mid_compressed/; our $PFX2TERM_RE = undef; sub new { @@ -20,6 +24,78 @@ sub wrap { bless { doc => $doc, mime => undef, mid => $mid }, $class; } +sub load_doc { + my ($class, $doc) = @_; + my ($mid, $subj, $from, $date) = split(/\n/, $doc->get_data); + bless { + doc => $doc, + mid => $mid, + subject => $subj, + date => $date, + from_name => $from, + }, $class; +} + +sub subject { + my ($self) = @_; + my $subj = $self->{subject}; + return $subj if defined $subj; + $subj = $self->{mime}->header('Subject'); + $subj = '' unless defined $subj; + $subj =~ tr/\n/ /; + $self->{subject} = $subj; +} + +sub from { + my ($self) = @_; + my $from = $self->mime->header('From') || ''; + my @from; + + if ($from) { + @from = Email::Address->parse($from); + $self->{from} = $from[0]; + $from = $from[0]->name; + } + $from =~ tr/\n/ /; + $self->{from_name} = $from; + $self->{from}; +} + +sub from_name { + my ($self) = @_; + my $from_name = $self->{from_name}; + return $from_name if defined $from_name; + $self->from; + $self->{from_name}; +} + +sub ts { + my ($self) = @_; + my $ts = $self->{ts}; + return $ts if $ts; + $self->{date} = undef; + $self->date; + $self->{ts}; +} + +sub date { + my ($self) = @_; + my $date = $self->{date}; + return $date if $date; + my $ts = eval { str2time($self->mime->header('Date')) } || 0; + $self->{ts} = $ts; + $self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts)); +} + +sub to_doc_data { + my ($self) = @_; + + $self->mid . "\n" . + $self->subject . "\n" . + $self->from_name . "\n". + $self->date; +} + sub ensure_metadata { my ($self) = @_; my $doc = $self->{doc}; @@ -60,10 +136,7 @@ sub _extract_mid { my ($self) = @_; my $mid = $self->mime->header('Message-ID'); - if ($mid && $mid =~ /<([^>]+)>/) { - return $1; - } - return $mid; + $mid ? mid_compressed(mid_clean($mid)) : $mid; } sub mime { -- cgit v1.2.3-24-ge0c7