about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/Linkify.pm31
-rw-r--r--lib/PublicInbox/MID.pm27
-rw-r--r--lib/PublicInbox/OverIdx.pm4
-rw-r--r--lib/PublicInbox/SearchIdx.pm4
-rw-r--r--lib/PublicInbox/SearchMsg.pm4
-rw-r--r--lib/PublicInbox/View.pm106
6 files changed, 118 insertions, 58 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index 175f8d72..af9be3ff 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -89,4 +89,35 @@ sub linkify_2 {
         $_[1];
 }
 
+# single pass linkification of <Message-ID@example.com> within $str
+# with $pfx being the URL prefix
+sub linkify_mids {
+        my ($self, $pfx, $str, $raw) = @_;
+        $$str =~ s!<([^>]+)>!
+                my $msgid = PublicInbox::Hval->new_msgid($1);
+                my $html = $msgid->as_html;
+                my $href = $msgid->{href};
+                $href = ascii_html($href); # for IDN
+
+                # salt this, as this could be exploited to show
+                # links in the HTML which don't show up in the raw mail.
+                my $key = sha1_hex($html . $SALT);
+                my $repl = qq(&lt;<a\nhref="$pfx/$href/">$html</a>&gt;);
+                $repl .= qq{ (<a\nhref="$pfx/$href/raw">raw</a>)} if $raw;
+                $self->{$key} = $repl;
+                'PI-LINK-'. $key;
+                !ge;
+        $$str = ascii_html($$str);
+        $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b!
+                my $key = $1;
+                my $repl = $_[0]->{$key};
+                if (defined $repl) {
+                        $repl;
+                } else {
+                        # false positive or somebody tried to mess with us
+                        $key;
+                }
+        !ge;
+}
+
 1;
diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm
index 14089f91..d7a42c38 100644
--- a/lib/PublicInbox/MID.pm
+++ b/lib/PublicInbox/MID.pm
@@ -7,7 +7,7 @@ use strict;
 use warnings;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC
-        mids references/;
+        mids references mids_for_index/;
 use URI::Escape qw(uri_escape_utf8);
 use Digest::SHA qw/sha1_hex/;
 require PublicInbox::Address;
@@ -54,11 +54,10 @@ sub mid2path {
 # Only for v1 code paths:
 sub mid_mime ($) { mids($_[0]->header_obj)->[0] }
 
-sub mids ($) {
-        my ($hdr) = @_;
+# only intended for Message-ID and X-Alt-Message-ID
+sub extract_mids {
         my @mids;
-        my @v = $hdr->header_raw('Message-Id');
-        foreach my $v (@v) {
+        for my $v (@_) {
                 my @cur = ($v =~ /<([^>]+)>/sg);
                 if (@cur) {
                         push(@mids, @cur);
@@ -66,7 +65,23 @@ sub mids ($) {
                         push(@mids, $v);
                 }
         }
-        uniq_mids(\@mids);
+        \@mids;
+}
+
+sub mids ($) {
+        my ($hdr) = @_;
+        my @mids = $hdr->header_raw('Message-Id');
+        uniq_mids(extract_mids(@mids));
+}
+
+# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them
+# to placate some clients, and we want to ensure NNTP-only clients can
+# import and index without relying on HTTP endpoints
+sub mids_for_index ($) {
+        my ($hdr) = @_;
+        my @mids = $hdr->header_raw('Message-Id');
+        my @alts = $hdr->header_raw('X-Alt-Message-ID');
+        uniq_mids(extract_mids(@mids, @alts));
 }
 
 # last References should be IRT, but some mail clients do things
diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm
index 01ca6f11..189bd21d 100644
--- a/lib/PublicInbox/OverIdx.pm
+++ b/lib/PublicInbox/OverIdx.pm
@@ -13,7 +13,7 @@ use warnings;
 use base qw(PublicInbox::Over);
 use IO::Handle;
 use DBI qw(:sql_types); # SQL_BLOB
-use PublicInbox::MID qw/id_compress mids references/;
+use PublicInbox::MID qw/id_compress mids_for_index references/;
 use PublicInbox::SearchMsg qw(subject_normalized);
 use Compress::Zlib qw(compress);
 use PublicInbox::Search;
@@ -256,7 +256,7 @@ sub add_overview {
                 lines => $lines,
                 blob => $oid,
         }, 'PublicInbox::SearchMsg';
-        my $mids = mids($mime->header_obj);
+        my $mids = mids_for_index($mime->header_obj);
         my $refs = parse_references($smsg, $mid0, $mids);
         my $subj = $smsg->subject;
         my $xpath;
diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm
index aed3875a..b2d71a1f 100644
--- a/lib/PublicInbox/SearchIdx.pm
+++ b/lib/PublicInbox/SearchIdx.pm
@@ -12,7 +12,7 @@ use warnings;
 use base qw(PublicInbox::Search PublicInbox::Lock);
 use PublicInbox::MIME;
 use PublicInbox::InboxWritable;
-use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/;
+use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/;
 use PublicInbox::MsgIter;
 use Carp qw(croak);
 use POSIX qw(strftime);
@@ -344,7 +344,7 @@ sub add_xapian ($$$$$) {
 sub add_message {
         # mime = Email::MIME object
         my ($self, $mime, $bytes, $num, $oid, $mid0) = @_;
-        my $mids = mids($mime->header_obj);
+        my $mids = mids_for_index($mime->header_obj);
         $mid0 = $mids->[0] unless defined $mid0; # v1 compatibility
         unless (defined $num) { # v1
                 $self->_msgmap_init;
diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm
index adadf92e..7561e7f2 100644
--- a/lib/PublicInbox/SearchMsg.pm
+++ b/lib/PublicInbox/SearchMsg.pm
@@ -107,8 +107,8 @@ sub __hdr ($$) {
         return $val if defined $val;
 
         my $mime = $self->{mime} or return;
-        $val = $mime->header($field);
-        $val = '' unless defined $val;
+        my @raw = $mime->header($field);
+        $val = join(', ', @raw);
         $val =~ tr/\t\n/  /;
         $val =~ tr/\r//d;
         $self->{$field} = $val;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index aeb32fc8..39b04174 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -10,7 +10,7 @@ use bytes (); # only for bytes::length
 use PublicInbox::MsgTime qw(msg_datestamp);
 use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
 use PublicInbox::Linkify;
-use PublicInbox::MID qw/id_compress mid_escape mids references/;
+use PublicInbox::MID qw/id_compress mid_escape mids mids_for_index references/;
 use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
@@ -190,8 +190,8 @@ sub fold_addresses ($) {
 
 sub _hdr_names_html ($$) {
         my ($hdr, $field) = @_;
-        my $val = $hdr->header($field) or return '';
-        ascii_html(join(', ', PublicInbox::Address::names($val)));
+        my @vals = $hdr->header($field) or return '';
+        ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals))));
 }
 
 sub nr_to_s ($$$) {
@@ -629,12 +629,11 @@ sub _msg_html_prepare {
         my $over = $ctx->{-inbox}->over;
         my $obfs_ibx = $ctx->{-obfs_ibx};
         my $rv = '';
-        my $mids = mids($hdr);
-        my $multiple = scalar(@$mids) > 1; # zero, one, infinity
+        my $mids = mids_for_index($hdr);
         if ($nr == 0) {
                 if ($more) {
                         $rv .=
-"<pre>WARNING: multiple messages refer to this Message-ID\n</pre>";
+"<pre>WARNING: multiple messages have this Message-ID\n</pre>";
                 }
                 $rv .= "<pre\nid=b>"; # anchor for body start
         } else {
@@ -643,12 +642,11 @@ sub _msg_html_prepare {
         if ($over) {
                 $ctx->{-upfx} = '../';
         }
-        my @title;
-        my $v;
-        if (defined($v = $hdr->header('From'))) {
+        my @title; # (Subject[0], From[0])
+        for my $v ($hdr->header('From')) {
                 $v = PublicInbox::Hval->new($v);
                 my @n = PublicInbox::Address::names($v->raw);
-                $title[1] = ascii_html(join(', ', @n));
+                $title[1] //= ascii_html(join(', ', @n));
                 $v = $v->as_html;
                 if ($obfs_ibx) {
                         obfuscate_addrs($obfs_ibx, $v);
@@ -657,44 +655,51 @@ sub _msg_html_prepare {
                 $rv .= "From: $v\n" if $v ne '';
         }
         foreach my $h (qw(To Cc)) {
-                defined($v = $hdr->header($h)) or next;
-                fold_addresses($v);
-                $v = ascii_html($v);
-                obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                $rv .= "$h: $v\n" if $v ne '';
+                for my $v ($hdr->header($h)) {
+                        fold_addresses($v);
+                        $v = ascii_html($v);
+                        obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+                        $rv .= "$h: $v\n" if $v ne '';
+                }
         }
-        if (defined($v = $hdr->header('Subject')) && ($v ne '')) {
-                $v = ascii_html($v);
-                obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
-                if ($over) {
-                        $rv .= qq(Subject: <a\nhref="#r"\nid=t>$v</a>\n);
-                } else {
-                        $rv .= "Subject: $v\n";
+        my @subj = $hdr->header('Subject');
+        if (@subj) {
+                for my $v (@subj) {
+                        $v = ascii_html($v);
+                        obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx;
+                        $rv .= 'Subject: ';
+                        if ($over) {
+                                $rv .= qq(<a\nhref="#r"\nid=t>$v</a>\n);
+                        } else {
+                                $rv .= "$v\n";
+                        }
+                        $title[0] //= $v;
                 }
-                $title[0] = $v;
         } else { # dummy anchor for thread skeleton at bottom of page
                 $rv .= qq(<a\nhref="#r"\nid=t></a>) if $over;
                 $title[0] = '(no subject)';
         }
-        if (defined($v = $hdr->header('Date'))) {
+        for my $v ($hdr->header('Date')) {
                 $v = ascii_html($v);
                 obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P
                 $rv .= "Date: $v\n";
         }
         $ctx->{-title_html} = join(' - ', @title);
-        foreach (@$mids) {
-                my $mid = PublicInbox::Hval->new_msgid($_) ;
+        if (scalar(@$mids) == 1) { # common case
+                my $mid = PublicInbox::Hval->new_msgid($mids->[0]);
                 my $mhtml = $mid->as_html;
-                if ($multiple) {
-                        my $href = $mid->{href};
-                        $rv .= "Message-ID: ";
-                        $rv .= "<a\nhref=\"../$href/\">";
-                        $rv .= "&lt;$mhtml&gt;</a> ";
-                        $rv .= "(<a\nhref=\"../$href/raw\">raw</a>)\n";
-                } else {
-                        $rv .= "Message-ID: &lt;$mhtml&gt; ";
-                        $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+                $rv .= "Message-ID: &lt;$mhtml&gt; ";
+                $rv .= "(<a\nhref=\"raw\">raw</a>)\n";
+        } else {
+                # X-Alt-Message-ID can happen if a message is injected from
+                # public-inbox-nntpd because of multiple Message-ID headers.
+                my $lnk = PublicInbox::Linkify->new;
+                my $s = '';
+                for my $h (qw(Message-ID X-Alt-Message-ID)) {
+                        $s .= "$h: $_\n" for ($hdr->header_raw($h));
                 }
+                $lnk->linkify_mids('..', \$s, 1);
+                $rv .= $s;
         }
         $rv .= _parent_headers($hdr, $over);
         $rv .= "\n";
@@ -727,8 +732,9 @@ sub thread_skel {
         $$dst .= "$nr+ messages / $expand";
         $$dst .= qq!  <a\nhref="#b">top</a>\n!;
 
-        my $subj = $hdr->header('Subject');
-        defined $subj or $subj = '';
+        # nb: mutt only shows the first Subject in the index pane
+        # when multiple Subject: headers are present, so we follow suit:
+        my $subj = $hdr->header('Subject') // '';
         $subj = '(no subject)' if $subj eq '';
         $ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ];
         $ctx->{cur} = $mid;
@@ -746,21 +752,29 @@ sub thread_skel {
 sub _parent_headers {
         my ($hdr, $over) = @_;
         my $rv = '';
-
-        my $refs = references($hdr);
-        my $irt = pop @$refs;
-        if (defined $irt) {
-                my $v = PublicInbox::Hval->new_msgid($irt);
-                my $html = $v->as_html;
-                my $href = $v->{href};
-                $rv .= "In-Reply-To: &lt;";
-                $rv .= "<a\nhref=\"../$href/\">$html</a>&gt;\n";
+        my @irt = $hdr->header_raw('In-Reply-To');
+        my $refs;
+        if (@irt) {
+                my $lnk = PublicInbox::Linkify->new;
+                $rv .= "In-Reply-To: $_\n" for @irt;
+                $lnk->linkify_mids('..', \$rv);
+        } else {
+                $refs = references($hdr);
+                my $irt = pop @$refs;
+                if (defined $irt) {
+                        my $v = PublicInbox::Hval->new_msgid($irt);
+                        my $html = $v->as_html;
+                        my $href = $v->{href};
+                        $rv .= "In-Reply-To: &lt;";
+                        $rv .= "<a\nhref=\"../$href/\">$html</a>&gt;\n";
+                }
         }
 
         # do not display References: if search is present,
         # we show the thread skeleton at the bottom, instead.
         return $rv if $over;
 
+        $refs //= references($hdr);
         if (@$refs) {
                 @$refs = map { linkify_ref_no_over($_) } @$refs;
                 $rv .= 'References: '. join("\n\t", @$refs) . "\n";