about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-08-15 09:28:33 +0000
committerEric Wong <e@80x24.org>2015-08-15 19:15:40 +0000
commit7edf30e5349ab5566815e5050e9ba0f53e1d0bb9 (patch)
treeaa46cb0560f187e197614a4530efbe23969f27d1
parentd7fcdec712accc212bcfa35e50ade1233eb9beb3 (diff)
downloadpublic-inbox-7edf30e5349ab5566815e5050e9ba0f53e1d0bb9.tar.gz
This will relieve callers of the need to decode the data
we store internally in Xapian
-rw-r--r--lib/PublicInbox/Search.pm34
-rw-r--r--lib/PublicInbox/SearchMsg.pm81
-rw-r--r--t/search.t11
3 files changed, 92 insertions, 34 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e88bfb16..c9c12c0b 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -8,8 +8,6 @@ use PublicInbox::SearchMsg;
 use base qw/Exporter/;
 use Search::Xapian qw/:standard/;
 require PublicInbox::View;
-use Date::Parse qw/str2time/;
-use POSIX qw//;
 use Email::MIME;
 use PublicInbox::MID qw/mid_clean mid_compressed/;
 
@@ -109,8 +107,7 @@ sub add_message {
                         $doc->add_term(xpfx('mid') . $mid);
                 }
 
-                my $subj = $mime->header('Subject');
-                $subj = '' unless defined $subj;
+                my $subj = $smsg->subject;
 
                 if (length $subj) {
                         $doc->add_term(xpfx('subject') . $subj);
@@ -119,23 +116,12 @@ sub add_message {
                         $doc->add_term(xpfx('path') . $path);
                 }
 
-                my $from = $mime->header('From') || '';
-                my @from;
-
-                if ($from) {
-                        @from = Email::Address->parse($from);
-                        $from = $from[0]->name;
-                }
-
-                my $ts = eval { str2time($mime->header('Date')) } || 0;
-                my $date = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
-                $ts = Search::Xapian::sortable_serialise($ts);
+                my $from = $smsg->from_name;
+                my $date = $smsg->date;
+                my $ts = Search::Xapian::sortable_serialise($smsg->ts);
                 $doc->add_value(PublicInbox::Search::TS, $ts);
 
-                # this is what we show in index results:
-                $subj =~ tr/\n/ /;
-                $from =~ tr/\n/ /;
-                $doc->set_data("$mid\n$subj\n$from\n$date");
+                $doc->set_data($smsg->to_doc_data);
 
                 my $tg = $self->term_generator;
 
@@ -145,10 +131,8 @@ sub add_message {
                 $tg->index_text($subj) if $subj;
                 $tg->increase_termpos;
 
-                if (@from) {
-                        $tg->index_text($from[0]->format);
-                        $tg->increase_termpos;
-                }
+                $tg->index_text($smsg->from->format);
+                $tg->increase_termpos;
 
                 $mime->walk_parts(sub {
                         my ($part) = @_;
@@ -265,7 +249,9 @@ sub do_enquire {
         my $offset = $opts->{offset} || 0;
         my $limit = $opts->{limit} || 50;
         my $mset = $enquire->get_mset($offset, $limit);
-        my @msgs = map { $_->get_document->get_data } $mset->items;
+        my @msgs = map {
+                PublicInbox::SearchMsg->load_doc($_->get_document);
+        } $mset->items;
 
         { count => $mset->get_matches_estimated, msgs => \@msgs }
 }
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 920804ac..619a19d4 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -5,6 +5,10 @@ package PublicInbox::SearchMsg;
 use strict;
 use warnings;
 use Search::Xapian;
+use Email::Address qw//;
+use POSIX qw//;
+use Date::Parse qw/str2time/;
+use PublicInbox::MID qw/mid_clean mid_compressed/;
 our $PFX2TERM_RE = undef;
 
 sub new {
@@ -20,6 +24,78 @@ sub wrap {
         bless { doc => $doc, mime => undef, mid => $mid }, $class;
 }
 
+sub load_doc {
+        my ($class, $doc) = @_;
+        my ($mid, $subj, $from, $date) = split(/\n/, $doc->get_data);
+        bless {
+                doc => $doc,
+                mid => $mid,
+                subject => $subj,
+                date => $date,
+                from_name => $from,
+        }, $class;
+}
+
+sub subject {
+        my ($self) = @_;
+        my $subj = $self->{subject};
+        return $subj if defined $subj;
+        $subj = $self->{mime}->header('Subject');
+        $subj = '' unless defined $subj;
+        $subj =~ tr/\n/ /;
+        $self->{subject} = $subj;
+}
+
+sub from {
+        my ($self) = @_;
+        my $from = $self->mime->header('From') || '';
+        my @from;
+
+        if ($from) {
+                @from = Email::Address->parse($from);
+                $self->{from} = $from[0];
+                $from = $from[0]->name;
+        }
+        $from =~ tr/\n/ /;
+        $self->{from_name} = $from;
+        $self->{from};
+}
+
+sub from_name {
+        my ($self) = @_;
+        my $from_name = $self->{from_name};
+        return $from_name if defined $from_name;
+        $self->from;
+        $self->{from_name};
+}
+
+sub ts {
+        my ($self) = @_;
+        my $ts = $self->{ts};
+        return $ts if $ts;
+        $self->{date} = undef;
+        $self->date;
+        $self->{ts};
+}
+
+sub date {
+        my ($self) = @_;
+        my $date = $self->{date};
+        return $date if $date;
+        my $ts = eval { str2time($self->mime->header('Date')) } || 0;
+        $self->{ts} = $ts;
+        $self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
+}
+
+sub to_doc_data {
+        my ($self) = @_;
+
+        $self->mid . "\n" .
+        $self->subject . "\n" .
+        $self->from_name . "\n".
+        $self->date;
+}
+
 sub ensure_metadata {
         my ($self) = @_;
         my $doc = $self->{doc};
@@ -60,10 +136,7 @@ sub _extract_mid {
         my ($self) = @_;
 
         my $mid = $self->mime->header('Message-ID');
-        if ($mid && $mid =~ /<([^>]+)>/) {
-                return $1;
-        }
-        return $mid;
+        $mid ? mid_compressed(mid_clean($mid)) : $mid;
 }
 
 sub mime {
diff --git a/t/search.t b/t/search.t
index 201578d4..9de6d286 100644
--- a/t/search.t
+++ b/t/search.t
@@ -48,7 +48,7 @@ my $ro = PublicInbox::Search->new($git_dir);
 
 sub filter_mids {
         my ($res) = @_;
-        sort(map { (split(/\n/, $_))[0] } @{$res->{msgs}});
+        sort(map { $_->mid } @{$res->{msgs}});
 }
 
 {
@@ -144,8 +144,7 @@ sub filter_mids {
 
         # body
         $res = $ro->query('goodbye');
-        is((split(/\n/, $res->{msgs}->[0]))[0], 'last@s',
-           'got goodbye message body');
+        is($res->{msgs}->[0]->mid, 'last@s', 'got goodbye message body');
 }
 
 # long message-id
@@ -226,12 +225,12 @@ sub filter_mids {
                 body => "theatre\nfade\n"));
         my $res = $rw->query("theatre");
         is($res->{count}, 2, "got both matches");
-        like($res->{msgs}->[0], qr/\Anquote\@a/, "non-quoted scores higher");
-        like($res->{msgs}->[1], qr/\Aquote\@a/, "quoted result still returned");
+        is($res->{msgs}->[0]->mid, 'nquote@a', "non-quoted scores higher");
+        is($res->{msgs}->[1]->mid, 'quote@a', "quoted result still returned");
 
         $res = $rw->query("illusions");
         is($res->{count}, 1, "got a match for quoted text");
-        like($res->{msgs}->[0], qr/\Aquote\@a/,
+        is($res->{msgs}->[0]->mid, 'quote@a',
                 "quoted result returned if nothing else");
 }