about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/Search.pm23
-rw-r--r--lib/PublicInbox/SearchMsg.pm39
2 files changed, 30 insertions, 32 deletions
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 2c66e557..f0040500 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -23,7 +23,8 @@ use constant {
         # 3 - message-ID is compressed if it includes '%' (hack!)
         # 4 - change "Re: " normalization, avoid circular Reference ghosts
         # 5 - subject_path drops trailing '.'
-        SCHEMA_VERSION => 5,
+        # 6 - preserve References: order in document data
+        SCHEMA_VERSION => 6,
         QP_FLAGS => FLAG_PHRASE|FLAG_BOOLEAN|FLAG_LOVEHATE|FLAG_WILDCARD,
 };
 
@@ -49,9 +50,9 @@ my %all_pfx = (%bool_pfx_internal, %bool_pfx_external, %prob_prefix);
 sub xpfx { $all_pfx{$_[0]} }
 
 our %PFX2TERM_RMAP;
+my %meta_pfx = (mid => 1, thread => 1, path => 1, type => 1);
 while (my ($k, $v) = each %all_pfx) {
-        next if $prob_prefix{$k};
-        $PFX2TERM_RMAP{$v} = $k;
+        $PFX2TERM_RMAP{$v} = $k if $meta_pfx{$k};
 }
 
 my $mail_query = Search::Xapian::Query->new(xpfx('type') . 'mail');
@@ -129,8 +130,6 @@ sub add_message {
                 my $ts = Search::Xapian::sortable_serialise($smsg->ts);
                 $doc->add_value(PublicInbox::Search::TS, $ts);
 
-                $doc->set_data($smsg->to_doc_data);
-
                 my $tg = $self->term_generator;
 
                 $tg->set_document($doc);
@@ -176,9 +175,11 @@ sub add_message {
                 if ($was_ghost) {
                         $doc_id = $smsg->doc_id;
                         $self->link_message($smsg, 0);
+                        $doc->set_data($smsg->to_doc_data);
                         $db->replace_document($doc_id, $doc);
                 } else {
                         $self->link_message($smsg, 0);
+                        $doc->set_data($smsg->to_doc_data);
                         $doc_id = $db->add_document($doc);
                 }
         };
@@ -352,14 +353,14 @@ sub link_message_to_parents {
         my @refs = $refs ? ($refs =~ /<([^>]+)>/g) : ();
         my $irt = $mime->header_obj->header('In-Reply-To');
         if ($irt) {
-                if ($irt =~ /<([^>]+)>/) {
-                        $irt = $1;
-                }
+                $irt = mid_compressed(mid_clean($irt));
 
                 # maybe some crazies will try to make a circular reference:
                 if ($irt eq $mid) {
                         $irt = undef;
                 } else {
+                        # last References should be $irt
+                        # we will de-dupe later
                         push @refs, $irt;
                 }
         }
@@ -376,12 +377,10 @@ sub link_message_to_parents {
                         $uniq{$ref} = 1;
                         push @refs, $ref;
                 }
-                $irt = undef if (defined $irt && !$uniq{$irt});
         }
         if (@refs) {
-                if (defined $irt) {
-                        $doc->add_term(xpfx('inreplyto') . $irt);
-                }
+                $doc->add_term(xpfx('inreplyto') . $irt) if defined $irt;
+                $smsg->{references_sorted} = '<'.join('><', @refs).'>';
 
                 my $ref_pfx = xpfx('references');
 
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index 14a62eb6..03df7ab4 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -31,13 +31,14 @@ sub load_doc {
         my ($class, $doc) = @_;
         my $data = $doc->get_data;
         $data = $enc_utf8->decode($data);
-        my ($mid, $subj, $from, $date) = split(/\n/, $data);
+        my ($mid, $subj, $from, $date, $refs) = split(/\n/, $data);
         bless {
                 doc => $doc,
                 mid => $mid,
                 subject => $subj,
                 date => $date,
                 from_name => $from,
+                references_sorted => $refs,
         }, $class;
 }
 
@@ -78,17 +79,16 @@ sub ts {
         my ($self) = @_;
         my $ts = $self->{ts};
         return $ts if $ts;
-        $self->{date} = undef;
-        $self->date;
-        $self->{ts};
+        $self->{ts} = eval {
+                str2time($self->date || $self->mime->header('Date'))
+        } || 0;
 }
 
 sub date {
         my ($self) = @_;
         my $date = $self->{date};
         return $date if $date;
-        my $ts = eval { str2time($self->mime->header('Date')) } || 0;
-        $self->{ts} = $ts;
+        my $ts = eval { str2time($self->mime->header('Date')) };
         $self->{date} = POSIX::strftime('%Y-%m-%d %H:%M', gmtime($ts));
 }
 
@@ -98,7 +98,14 @@ sub to_doc_data {
         $self->mid . "\n" .
         $self->subject . "\n" .
         $self->from_name . "\n".
-        $self->date;
+        $self->date . "\n" .
+        $self->references_sorted;
+}
+
+sub references_sorted {
+        my ($self) = @_;
+        my $x = $self->{references_sorted};
+        defined $x ? $x : '';
 }
 
 sub ensure_metadata {
@@ -117,12 +124,7 @@ sub ensure_metadata {
 
                 if ($val =~ s/$PFX2TERM_RE//o) {
                         my $field = $PublicInbox::Search::PFX2TERM_RMAP{$1};
-                        if ($field eq 'references') {
-                                my $refs = $self->{references} ||= [];
-                                push @$refs, $val;
-                        } else {
-                                $self->{$field} = $val;
-                        }
+                        $self->{$field} = $val;
                 }
         }
 }
@@ -138,14 +140,11 @@ sub mini_mime {
                 'X-PI-TS' => $self->ts,
                 'Message-ID' => "<$self->{mid}>",
         );
-        if (my $refs = $self->{references}) {
-                push @h, References => '<' . join('> <', @$refs) . '>';
-        }
-        if (my $irt = $self->{inreplyto}) {
-                push @h, 'In-Reply-To' => "<$irt>";
-        }
 
-        Email::MIME->create(header_str => \@h);
+        my $refs = $self->{references_sorted};
+        my $mime = Email::MIME->create(header_str => \@h);
+        $mime->header_set('References', $refs) if (defined $refs);
+        $mime;
 }
 
 sub mid {