diff options
author | Eric Wong <e@80x24.org> | 2019-10-30 08:47:23 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2019-10-30 08:47:23 +0000 |
commit | e177c05afa9bdc5ca27bc2bd9f946b70290ca3f4 (patch) | |
tree | 6751ee10800d32671bcf35bf9876fd24f928a23b /lib | |
parent | e0c7955ead5251f969c742e3aab8d912f7d510f3 (diff) | |
parent | 24794840fbd2363a51e4c02de1474987b605a56c (diff) | |
download | public-inbox-e177c05afa9bdc5ca27bc2bd9f946b70290ca3f4.tar.gz |
* origin/multi-mid: view: show X-Alt-Message-ID in permalink view, too index: allow search/lookups on X-Alt-Message-ID linkify: support adding "(raw)" link for Message-IDs view: improve warning for multiple Message-IDs view: move '<' and '>' outside <a> view: display redundant headers in permalink search: support multiple From/To/Cc/Subject headers
Diffstat (limited to 'lib')
-rw-r--r-- | lib/PublicInbox/Linkify.pm | 31 | ||||
-rw-r--r-- | lib/PublicInbox/MID.pm | 27 | ||||
-rw-r--r-- | lib/PublicInbox/OverIdx.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SearchIdx.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/SearchMsg.pm | 4 | ||||
-rw-r--r-- | lib/PublicInbox/View.pm | 106 |
6 files changed, 118 insertions, 58 deletions
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm index 175f8d72..af9be3ff 100644 --- a/lib/PublicInbox/Linkify.pm +++ b/lib/PublicInbox/Linkify.pm @@ -89,4 +89,35 @@ sub linkify_2 { $_[1]; } +# single pass linkification of <Message-ID@example.com> within $str +# with $pfx being the URL prefix +sub linkify_mids { + my ($self, $pfx, $str, $raw) = @_; + $$str =~ s!<([^>]+)>! + my $msgid = PublicInbox::Hval->new_msgid($1); + my $html = $msgid->as_html; + my $href = $msgid->{href}; + $href = ascii_html($href); # for IDN + + # salt this, as this could be exploited to show + # links in the HTML which don't show up in the raw mail. + my $key = sha1_hex($html . $SALT); + my $repl = qq(<<a\nhref="$pfx/$href/">$html</a>>); + $repl .= qq{ (<a\nhref="$pfx/$href/raw">raw</a>)} if $raw; + $self->{$key} = $repl; + 'PI-LINK-'. $key; + !ge; + $$str = ascii_html($$str); + $$str =~ s!\bPI-LINK-([a-f0-9]{40})\b! + my $key = $1; + my $repl = $_[0]->{$key}; + if (defined $repl) { + $repl; + } else { + # false positive or somebody tried to mess with us + $key; + } + !ge; +} + 1; diff --git a/lib/PublicInbox/MID.pm b/lib/PublicInbox/MID.pm index 14089f91..d7a42c38 100644 --- a/lib/PublicInbox/MID.pm +++ b/lib/PublicInbox/MID.pm @@ -7,7 +7,7 @@ use strict; use warnings; use base qw/Exporter/; our @EXPORT_OK = qw/mid_clean id_compress mid2path mid_mime mid_escape MID_ESC - mids references/; + mids references mids_for_index/; use URI::Escape qw(uri_escape_utf8); use Digest::SHA qw/sha1_hex/; require PublicInbox::Address; @@ -54,11 +54,10 @@ sub mid2path { # Only for v1 code paths: sub mid_mime ($) { mids($_[0]->header_obj)->[0] } -sub mids ($) { - my ($hdr) = @_; +# only intended for Message-ID and X-Alt-Message-ID +sub extract_mids { my @mids; - my @v = $hdr->header_raw('Message-Id'); - foreach my $v (@v) { + for my $v (@_) { my @cur = ($v =~ /<([^>]+)>/sg); if (@cur) { push(@mids, @cur); @@ -66,7 +65,23 @@ sub mids ($) { push(@mids, $v); } } - uniq_mids(\@mids); + \@mids; +} + +sub mids ($) { + my ($hdr) = @_; + my @mids = $hdr->header_raw('Message-Id'); + uniq_mids(extract_mids(@mids)); +} + +# we allow searching on X-Alt-Message-ID since PublicInbox::NNTP uses them +# to placate some clients, and we want to ensure NNTP-only clients can +# import and index without relying on HTTP endpoints +sub mids_for_index ($) { + my ($hdr) = @_; + my @mids = $hdr->header_raw('Message-Id'); + my @alts = $hdr->header_raw('X-Alt-Message-ID'); + uniq_mids(extract_mids(@mids, @alts)); } # last References should be IRT, but some mail clients do things diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index 01ca6f11..189bd21d 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -13,7 +13,7 @@ use warnings; use base qw(PublicInbox::Over); use IO::Handle; use DBI qw(:sql_types); # SQL_BLOB -use PublicInbox::MID qw/id_compress mids references/; +use PublicInbox::MID qw/id_compress mids_for_index references/; use PublicInbox::SearchMsg qw(subject_normalized); use Compress::Zlib qw(compress); use PublicInbox::Search; @@ -256,7 +256,7 @@ sub add_overview { lines => $lines, blob => $oid, }, 'PublicInbox::SearchMsg'; - my $mids = mids($mime->header_obj); + my $mids = mids_for_index($mime->header_obj); my $refs = parse_references($smsg, $mid0, $mids); my $subj = $smsg->subject; my $xpath; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index aed3875a..b2d71a1f 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -12,7 +12,7 @@ use warnings; use base qw(PublicInbox::Search PublicInbox::Lock); use PublicInbox::MIME; use PublicInbox::InboxWritable; -use PublicInbox::MID qw/mid_clean id_compress mid_mime mids/; +use PublicInbox::MID qw/mid_clean id_compress mid_mime mids_for_index/; use PublicInbox::MsgIter; use Carp qw(croak); use POSIX qw(strftime); @@ -344,7 +344,7 @@ sub add_xapian ($$$$$) { sub add_message { # mime = Email::MIME object my ($self, $mime, $bytes, $num, $oid, $mid0) = @_; - my $mids = mids($mime->header_obj); + my $mids = mids_for_index($mime->header_obj); $mid0 = $mids->[0] unless defined $mid0; # v1 compatibility unless (defined $num) { # v1 $self->_msgmap_init; diff --git a/lib/PublicInbox/SearchMsg.pm b/lib/PublicInbox/SearchMsg.pm index adadf92e..7561e7f2 100644 --- a/lib/PublicInbox/SearchMsg.pm +++ b/lib/PublicInbox/SearchMsg.pm @@ -107,8 +107,8 @@ sub __hdr ($$) { return $val if defined $val; my $mime = $self->{mime} or return; - $val = $mime->header($field); - $val = '' unless defined $val; + my @raw = $mime->header($field); + $val = join(', ', @raw); $val =~ tr/\t\n/ /; $val =~ tr/\r//d; $self->{$field} = $val; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index aeb32fc8..39b04174 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -10,7 +10,7 @@ use bytes (); # only for bytes::length use PublicInbox::MsgTime qw(msg_datestamp); use PublicInbox::Hval qw/ascii_html obfuscate_addrs/; use PublicInbox::Linkify; -use PublicInbox::MID qw/id_compress mid_escape mids references/; +use PublicInbox::MID qw/id_compress mid_escape mids mids_for_index references/; use PublicInbox::MsgIter; use PublicInbox::Address; use PublicInbox::WwwStream; @@ -190,8 +190,8 @@ sub fold_addresses ($) { sub _hdr_names_html ($$) { my ($hdr, $field) = @_; - my $val = $hdr->header($field) or return ''; - ascii_html(join(', ', PublicInbox::Address::names($val))); + my @vals = $hdr->header($field) or return ''; + ascii_html(join(', ', PublicInbox::Address::names(join(',', @vals)))); } sub nr_to_s ($$$) { @@ -629,12 +629,11 @@ sub _msg_html_prepare { my $over = $ctx->{-inbox}->over; my $obfs_ibx = $ctx->{-obfs_ibx}; my $rv = ''; - my $mids = mids($hdr); - my $multiple = scalar(@$mids) > 1; # zero, one, infinity + my $mids = mids_for_index($hdr); if ($nr == 0) { if ($more) { $rv .= -"<pre>WARNING: multiple messages refer to this Message-ID\n</pre>"; +"<pre>WARNING: multiple messages have this Message-ID\n</pre>"; } $rv .= "<pre\nid=b>"; # anchor for body start } else { @@ -643,12 +642,11 @@ sub _msg_html_prepare { if ($over) { $ctx->{-upfx} = '../'; } - my @title; - my $v; - if (defined($v = $hdr->header('From'))) { + my @title; # (Subject[0], From[0]) + for my $v ($hdr->header('From')) { $v = PublicInbox::Hval->new($v); my @n = PublicInbox::Address::names($v->raw); - $title[1] = ascii_html(join(', ', @n)); + $title[1] //= ascii_html(join(', ', @n)); $v = $v->as_html; if ($obfs_ibx) { obfuscate_addrs($obfs_ibx, $v); @@ -657,44 +655,51 @@ sub _msg_html_prepare { $rv .= "From: $v\n" if $v ne ''; } foreach my $h (qw(To Cc)) { - defined($v = $hdr->header($h)) or next; - fold_addresses($v); - $v = ascii_html($v); - obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; - $rv .= "$h: $v\n" if $v ne ''; + for my $v ($hdr->header($h)) { + fold_addresses($v); + $v = ascii_html($v); + obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; + $rv .= "$h: $v\n" if $v ne ''; + } } - if (defined($v = $hdr->header('Subject')) && ($v ne '')) { - $v = ascii_html($v); - obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; - if ($over) { - $rv .= qq(Subject: <a\nhref="#r"\nid=t>$v</a>\n); - } else { - $rv .= "Subject: $v\n"; + my @subj = $hdr->header('Subject'); + if (@subj) { + for my $v (@subj) { + $v = ascii_html($v); + obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; + $rv .= 'Subject: '; + if ($over) { + $rv .= qq(<a\nhref="#r"\nid=t>$v</a>\n); + } else { + $rv .= "$v\n"; + } + $title[0] //= $v; } - $title[0] = $v; } else { # dummy anchor for thread skeleton at bottom of page $rv .= qq(<a\nhref="#r"\nid=t></a>) if $over; $title[0] = '(no subject)'; } - if (defined($v = $hdr->header('Date'))) { + for my $v ($hdr->header('Date')) { $v = ascii_html($v); obfuscate_addrs($obfs_ibx, $v) if $obfs_ibx; # possible :P $rv .= "Date: $v\n"; } $ctx->{-title_html} = join(' - ', @title); - foreach (@$mids) { - my $mid = PublicInbox::Hval->new_msgid($_) ; + if (scalar(@$mids) == 1) { # common case + my $mid = PublicInbox::Hval->new_msgid($mids->[0]); my $mhtml = $mid->as_html; - if ($multiple) { - my $href = $mid->{href}; - $rv .= "Message-ID: "; - $rv .= "<a\nhref=\"../$href/\">"; - $rv .= "<$mhtml></a> "; - $rv .= "(<a\nhref=\"../$href/raw\">raw</a>)\n"; - } else { - $rv .= "Message-ID: <$mhtml> "; - $rv .= "(<a\nhref=\"raw\">raw</a>)\n"; + $rv .= "Message-ID: <$mhtml> "; + $rv .= "(<a\nhref=\"raw\">raw</a>)\n"; + } else { + # X-Alt-Message-ID can happen if a message is injected from + # public-inbox-nntpd because of multiple Message-ID headers. + my $lnk = PublicInbox::Linkify->new; + my $s = ''; + for my $h (qw(Message-ID X-Alt-Message-ID)) { + $s .= "$h: $_\n" for ($hdr->header_raw($h)); } + $lnk->linkify_mids('..', \$s, 1); + $rv .= $s; } $rv .= _parent_headers($hdr, $over); $rv .= "\n"; @@ -727,8 +732,9 @@ sub thread_skel { $$dst .= "$nr+ messages / $expand"; $$dst .= qq! <a\nhref="#b">top</a>\n!; - my $subj = $hdr->header('Subject'); - defined $subj or $subj = ''; + # nb: mutt only shows the first Subject in the index pane + # when multiple Subject: headers are present, so we follow suit: + my $subj = $hdr->header('Subject') // ''; $subj = '(no subject)' if $subj eq ''; $ctx->{prev_subj} = [ split(/ /, subject_normalized($subj)) ]; $ctx->{cur} = $mid; @@ -746,21 +752,29 @@ sub thread_skel { sub _parent_headers { my ($hdr, $over) = @_; my $rv = ''; - - my $refs = references($hdr); - my $irt = pop @$refs; - if (defined $irt) { - my $v = PublicInbox::Hval->new_msgid($irt); - my $html = $v->as_html; - my $href = $v->{href}; - $rv .= "In-Reply-To: <"; - $rv .= "<a\nhref=\"../$href/\">$html</a>>\n"; + my @irt = $hdr->header_raw('In-Reply-To'); + my $refs; + if (@irt) { + my $lnk = PublicInbox::Linkify->new; + $rv .= "In-Reply-To: $_\n" for @irt; + $lnk->linkify_mids('..', \$rv); + } else { + $refs = references($hdr); + my $irt = pop @$refs; + if (defined $irt) { + my $v = PublicInbox::Hval->new_msgid($irt); + my $html = $v->as_html; + my $href = $v->{href}; + $rv .= "In-Reply-To: <"; + $rv .= "<a\nhref=\"../$href/\">$html</a>>\n"; + } } # do not display References: if search is present, # we show the thread skeleton at the bottom, instead. return $rv if $over; + $refs //= references($hdr); if (@$refs) { @$refs = map { linkify_ref_no_over($_) } @$refs; $rv .= 'References: '. join("\n\t", @$refs) . "\n"; |