diff options
author | Eric Wong <e@yhbt.net> | 2020-05-16 10:03:22 +0000 |
---|---|---|
committer | Eric Wong <e@yhbt.net> | 2020-05-17 06:56:40 +0000 |
commit | e60231148eb604a379033c69e8c4494eb1753783 (patch) | |
tree | c49950605f50bc46082e20ee7fe679c6cf76989e /lib/PublicInbox/SearchIdx.pm | |
parent | 77aa1a9eae83fa60eb8208710a714aa4f39d9b34 (diff) | |
download | public-inbox-e60231148eb604a379033c69e8c4494eb1753783.tar.gz |
Email::MIME never supported this properly, but there's real instances of forwarded messages as message/rfc822 attachments. message/news is legacy thing which we'll see in archives, and message/global appears to be the new thing. gmime also supports message/rfc2822, so we'll support it anyways despite lacking other evidence of its existence. Existing attachments remain downloadable as a whole message, but individual attachments of subparts are now downloadable and can be displayed in HTML, too. Furthermore, ensure Xapian can now search for common headers inside those messages as well as the message bodies.
Diffstat (limited to 'lib/PublicInbox/SearchIdx.pm')
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 47 |
1 files changed, 29 insertions, 18 deletions
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 4bdd69f5..5f5ae895 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -284,6 +284,13 @@ sub index_xapian { # msg_iter callback if (defined $fn && $fn ne '') { index_text($self, $fn, 1, 'XFN'); } + if ($part->{is_submsg}) { + my $mids = mids_for_index($part); + index_ids($self, $doc, $part, $mids); + my $smsg = PublicInbox::Smsg->new($part); + index_users($self, $smsg); + index_text($self, $smsg->subject, 1, 'S') if $smsg->subject; + } my ($s, undef) = msg_part_text($part, $ct); defined $s or return; @@ -307,6 +314,27 @@ sub index_xapian { # msg_iter callback } } +sub index_ids ($$$$) { + my ($self, $doc, $hdr, $mids) = @_; + for my $mid (@$mids) { + index_text($self, $mid, 1, 'XM'); + + # because too many Message-IDs are prefixed with + # "Pine.LNX."... + if ($mid =~ /\w{12,}/) { + my @long = ($mid =~ /(\w{3,}+)/g); + index_text($self, join(' ', @long), 1, 'XM'); + } + } + $doc->add_boolean_term('Q' . $_) for @$mids; + for my $l ($hdr->header_raw('List-Id')) { + $l =~ /<([^>]+)>/ or next; + my $lid = $1; + $doc->add_boolean_term('G' . $lid); + index_text($self, $lid, 1, 'XL'); # probabilistic + } +} + sub add_xapian ($$$$) { my ($self, $mime, $smsg, $mids) = @_; $smsg->{mime} = $mime; # XXX dangerous @@ -321,22 +349,12 @@ sub add_xapian ($$$$) { add_val($doc, PublicInbox::Search::DT(), $dt); my $tg = term_generator($self); - $tg->set_document($doc); index_text($self, $subj, 1, 'S') if $subj; index_users($self, $smsg); msg_iter($mime, \&index_xapian, [ $self, $doc ]); - foreach my $mid (@$mids) { - index_text($self, $mid, 1, 'XM'); - - # because too many Message-IDs are prefixed with - # "Pine.LNX."... - if ($mid =~ /\w{12,}/) { - my @long = ($mid =~ /(\w{3,}+)/g); - index_text($self, join(' ', @long), 1, 'XM'); - } - } + index_ids($self, $doc, $hdr, $mids); $smsg->{to} = $smsg->{cc} = ''; # WWW doesn't need these, only NNTP PublicInbox::OverIdx::parse_references($smsg, $hdr, $mids); my $data = $smsg->to_doc_data; @@ -351,13 +369,6 @@ sub add_xapian ($$$$) { } } } - $doc->add_boolean_term('Q' . $_) foreach @$mids; - for my $l ($hdr->header_raw('List-Id')) { - $l =~ /<([^>]+)>/ or next; - my $lid = $1; - $doc->add_boolean_term('G' . $lid); - index_text($self, $lid, 1, 'XL'); # probabilistic - } $self->{xdb}->replace_document($smsg->{num}, $doc); } |