about summary refs log tree commit homepage
path: root/lib/PublicInbox/SearchIdx.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-05-16 10:03:22 +0000
committerEric Wong <e@yhbt.net>2020-05-17 06:56:40 +0000
commite60231148eb604a379033c69e8c4494eb1753783 (patch)
treec49950605f50bc46082e20ee7fe679c6cf76989e /lib/PublicInbox/SearchIdx.pm
parent77aa1a9eae83fa60eb8208710a714aa4f39d9b34 (diff)
downloadpublic-inbox-e60231148eb604a379033c69e8c4494eb1753783.tar.gz
Email::MIME never supported this properly, but there's real
instances of forwarded messages as message/rfc822 attachments.
message/news is legacy thing which we'll see in archives, and
message/global appears to be the new thing.

gmime also supports message/rfc2822, so we'll support it anyways
despite lacking other evidence of its existence.

Existing attachments remain downloadable as a whole message,
but individual attachments of subparts are now downloadable
and can be displayed in HTML, too.

Furthermore, ensure Xapian can now search for common headers
inside those messages as well as the message bodies.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r--lib/PublicInbox/SearchIdx.pm47
1 files changed, 29 insertions, 18 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index 4bdd69f5..5f5ae895 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -284,6 +284,13 @@ sub index_xapian { # msg_iter callback
         if (defined $fn && $fn ne '') {
                 index_text($self, $fn, 1, 'XFN');
         }
+        if ($part->{is_submsg}) {
+                my $mids = mids_for_index($part);
+                index_ids($self, $doc, $part, $mids);
+                my $smsg = PublicInbox::Smsg->new($part);
+                index_users($self, $smsg);
+                index_text($self, $smsg->subject, 1, 'S') if $smsg->subject;
+        }
 
         my ($s, undef) = msg_part_text($part, $ct);
         defined $s or return;
@@ -307,6 +314,27 @@ sub index_xapian { # msg_iter callback
         }
 }
 
+sub index_ids ($$$$) {
+        my ($self, $doc, $hdr, $mids) = @_;
+        for my $mid (@$mids) {
+                index_text($self, $mid, 1, 'XM');
+
+                # because too many Message-IDs are prefixed with
+                # "Pine.LNX."...
+                if ($mid =~ /\w{12,}/) {
+                        my @long = ($mid =~ /(\w{3,}+)/g);
+                        index_text($self, join(' ', @long), 1, 'XM');
+                }
+        }
+        $doc->add_boolean_term('Q' . $_) for @$mids;
+        for my $l ($hdr->header_raw('List-Id')) {
+                $l =~ /<([^>]+)>/ or next;
+                my $lid = $1;
+                $doc->add_boolean_term('G' . $lid);
+                index_text($self, $lid, 1, 'XL'); # probabilistic
+        }
+}
+
 sub add_xapian ($$$$) {
         my ($self, $mime, $smsg, $mids) = @_;
         $smsg->{mime} = $mime; # XXX dangerous
@@ -321,22 +349,12 @@ sub add_xapian ($$$$) {
         add_val($doc, PublicInbox::Search::DT(), $dt);
 
         my $tg = term_generator($self);
-
         $tg->set_document($doc);
         index_text($self, $subj, 1, 'S') if $subj;
         index_users($self, $smsg);
 
         msg_iter($mime, \&index_xapian, [ $self, $doc ]);
-        foreach my $mid (@$mids) {
-                index_text($self, $mid, 1, 'XM');
-
-                # because too many Message-IDs are prefixed with
-                # "Pine.LNX."...
-                if ($mid =~ /\w{12,}/) {
-                        my @long = ($mid =~ /(\w{3,}+)/g);
-                        index_text($self, join(' ', @long), 1, 'XM');
-                }
-        }
+        index_ids($self, $doc, $hdr, $mids);
         $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP
         PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids);
         my $data = $smsg->to_doc_data;
@@ -351,13 +369,6 @@ sub add_xapian ($$$$) {
                         }
                 }
         }
-        $doc->add_boolean_term('Q' . $_) foreach @$mids;
-        for my $l ($hdr->header_raw('List-Id')) {
-                $l =~ /<([^>]+)>/ or next;
-                my $lid = $1;
-                $doc->add_boolean_term('G' . $lid);
-                index_text($self, $lid, 1, 'XL'); # probabilistic
-        }
         $self->{xdb}->replace_document($smsg->{num}, $doc);
 }