From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 047CE1F491 for ; Sat, 15 Feb 2020 09:46:42 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 7/8] view: escape ampersand in Message-IDs Date: Sat, 15 Feb 2020 09:46:39 +0000 Message-Id: <20200215094640.19425-8-e@yhbt.net> In-Reply-To: <20200215094640.19425-1-e@yhbt.net> References: <20200215094640.19425-1-e@yhbt.net> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We need to escape ampersands (and some other characters for href attributes), so introduce a `mid_href' sub to do just that. '<', '>' and '"' were always escaped, so there's no risk of tag or attribute injection, but creative Message-IDs could cause confusion for some parsers and generate invalid URLs. Start getting rid of the bloated, over-engineered OO Hval API while we're at it, I only noticed this bug because I started killing off Hval->new* callers. --- lib/PublicInbox/ExtMsg.pm | 19 +++++------- lib/PublicInbox/Hval.pm | 7 ++--- lib/PublicInbox/Linkify.pm | 9 +++--- lib/PublicInbox/Reply.pm | 6 ++-- lib/PublicInbox/SearchView.pm | 4 +-- lib/PublicInbox/View.pm | 51 ++++++++++++++------------------ lib/PublicInbox/WwwAtomStream.pm | 9 +++--- t/psgi_bad_mids.t | 9 ++++-- 8 files changed, 53 insertions(+), 61 deletions(-) diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index c48c2db4..44884ad2 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -8,7 +8,7 @@ package PublicInbox::ExtMsg; use strict; use warnings; -use PublicInbox::Hval qw(ascii_html prurl); +use PublicInbox::Hval qw(ascii_html prurl mid_href); use PublicInbox::WwwStream; our $MIN_PARTIAL_LEN = 16; @@ -129,9 +129,8 @@ sub ext_msg { } my $code = 404; - my $h = PublicInbox::Hval->new_msgid($mid); - my $href = $h->{href}; - my $html = $h->as_html; + my $href = mid_href($mid); + my $html = ascii_html($mid); my $title = "<$html> not found"; my $s = "
Message-ID <$html>\nnot found\n";
 	if ($n_partial) {
@@ -145,10 +144,9 @@ sub ext_msg {
 			my $env = $ctx->{env} if $ibx->{name} eq $cur_name;
 			my $u = $ibx->base_url($env) or next;
 			foreach my $m (@$res) {
-				my $p = PublicInbox::Hval->new_msgid($m);
-				my $r = $p->{href};
-				my $t = $p->as_html;
-				$s .= qq{$u$t/\n};
+				my $href = mid_href($m);
+				my $html = ascii_html($m);
+				$s .= qq{$u$html/\n};
 			}
 		}
 	}
@@ -183,9 +181,8 @@ sub ext_urls {
 
 sub exact {
 	my ($ctx, $found, $mid) = @_;
-	my $h = PublicInbox::Hval->new_msgid($mid);
-	my $href = $h->{href};
-	my $html = $h->as_html;
+	my $href = mid_href($mid);
+	my $html = ascii_html($mid);
 	my $title = "<$html> found in ";
 	my $end = @$found == 1 ? 'another inbox' : 'other inboxes';
 	$ctx->{-title_html} = $title . $end;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index 2e883f81..40c97da4 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -10,7 +10,7 @@ use Encode qw(find_encoding);
 use PublicInbox::MID qw/mid_clean mid_escape/;
 use base qw/Exporter/;
 our @EXPORT_OK = qw/ascii_html obfuscate_addrs to_filename src_escape
-		to_attr prurl/;
+		to_attr prurl mid_href/;
 my $enc_ascii = find_encoding('us-ascii');
 
 # safe-ish acceptable filename pattern for portability
@@ -27,10 +27,7 @@ sub new {
 	}, $class;
 }
 
-sub new_msgid {
-	my ($class, $msgid) = @_;
-	$class->new($msgid, mid_escape($msgid));
-}
+sub mid_href { ascii_html(mid_escape($_[0])) }
 
 # some of these overrides are standard C escapes so they're
 # easy-to-understand when rendered.
diff --git a/lib/PublicInbox/Linkify.pm b/lib/PublicInbox/Linkify.pm
index d176a7cc..2bd8f64a 100644
--- a/lib/PublicInbox/Linkify.pm
+++ b/lib/PublicInbox/Linkify.pm
@@ -13,7 +13,7 @@ package PublicInbox::Linkify;
 use strict;
 use warnings;
 use Digest::SHA qw/sha1_hex/;
-use PublicInbox::Hval qw(ascii_html);
+use PublicInbox::Hval qw(ascii_html mid_href);
 
 my $SALT = rand;
 my $LINK_RE = qr{([\('!])?\b((?:ftps?|https?|nntps?|gopher)://
@@ -94,10 +94,9 @@ sub linkify_2 {
 sub linkify_mids {
 	my ($self, $pfx, $str, $raw) = @_;
 	$$str =~ s!<([^>]+)>!
-		my $msgid = PublicInbox::Hval->new_msgid($1);
-		my $html = $msgid->as_html;
-		my $href = $msgid->{href};
-		$href = ascii_html($href); # for IDN
+		my $mid = $1;
+		my $html = ascii_html($mid);
+		my $href = mid_href($mid);
 
 		# salt this, as this could be exploited to show
 		# links in the HTML which don't show up in the raw mail.
diff --git a/lib/PublicInbox/Reply.pm b/lib/PublicInbox/Reply.pm
index edeb1ac2..5058ff8c 100644
--- a/lib/PublicInbox/Reply.pm
+++ b/lib/PublicInbox/Reply.pm
@@ -6,9 +6,9 @@ package PublicInbox::Reply;
 use strict;
 use warnings;
 use URI::Escape qw/uri_escape_utf8/;
-use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
+use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href);
 use PublicInbox::Address;
-use PublicInbox::MID qw/mid_clean mid_escape/;
+use PublicInbox::MID qw(mid_clean);
 
 sub squote_maybe ($) {
 	my ($val) = @_;
@@ -76,7 +76,7 @@ sub mailto_arg_link {
 	$subj = "Re: $subj" unless $subj =~ /\bRe:/i;
 	my $mid = $hdr->header_raw('Message-ID');
 	push @arg, '--in-reply-to='.squote_maybe(mid_clean($mid));
-	my $irt = mid_escape($mid);
+	my $irt = mid_href($mid);
 	delete $cc->{$to};
 	if ($obfs) {
 		my $arg_to = $to;
diff --git a/lib/PublicInbox/SearchView.pm b/lib/PublicInbox/SearchView.pm
index 7e508bb7..9b67b045 100644
--- a/lib/PublicInbox/SearchView.pm
+++ b/lib/PublicInbox/SearchView.pm
@@ -7,7 +7,7 @@ use strict;
 use warnings;
 use URI::Escape qw(uri_unescape uri_escape);
 use PublicInbox::SearchMsg;
-use PublicInbox::Hval qw/ascii_html obfuscate_addrs/;
+use PublicInbox::Hval qw(ascii_html obfuscate_addrs mid_href);
 use PublicInbox::View;
 use PublicInbox::WwwAtomStream;
 use PublicInbox::SearchThread;
@@ -115,7 +115,7 @@ sub mset_summary {
 			obfuscate_addrs($obfs_ibx, $f);
 		}
 		my $date = PublicInbox::View::fmt_ts($smsg->{ds});
-		my $mid = PublicInbox::Hval->new_msgid($smsg->{mid})->{href};
+		my $mid = mid_href($smsg->{mid});
 		$s = '(no subject)' if $s eq '';
 		$$res .= qq{$rank. }.
 			$s . "\n";
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index d4bfa62d..14b7d81d 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -8,9 +8,9 @@ use strict;
 use warnings;
 use bytes (); # only for bytes::length
 use PublicInbox::MsgTime qw(msg_datestamp);
-use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl);
+use PublicInbox::Hval qw(ascii_html obfuscate_addrs prurl mid_href);
 use PublicInbox::Linkify;
-use PublicInbox::MID qw/id_compress mid_escape mids mids_for_index references/;
+use PublicInbox::MID qw/id_compress mids mids_for_index references/;
 use PublicInbox::MsgIter;
 use PublicInbox::Address;
 use PublicInbox::WwwStream;
@@ -29,7 +29,7 @@ sub msg_page_i {
 	my $more = $ctx->{more};
 	if ($nr == 1) {
 		# $more cannot be true w/o $smsg being defined:
-		$ctx->{mhref} = $more ? '../'.mid_escape($ctx->{smsg}->mid).'/'
+		$ctx->{mhref} = $more ? '../'.mid_href($ctx->{smsg}->{mid}).'/'
 				      : '';
 		multipart_text_as_html(delete $ctx->{mime}, $ctx);
 		${delete $ctx->{obuf}} .= '

'; @@ -84,7 +84,7 @@ sub msg_page_more { my $next = $ibx->over->next_by_mid($ctx->{mid}, \$id, \$prev); $ctx->{more} = $next ? [ $id, $prev, $next ] : undef; return '' unless $smsg; - $ctx->{mhref} = '../' . mid_escape($smsg->{mid}) . '/'; + $ctx->{mhref} = '../' . mid_href($smsg->{mid}) . '/'; my $mime = delete $smsg->{mime}; _msg_page_prepare_obuf($mime->header_obj, $ctx, $nr); multipart_text_as_html($mime, $ctx); @@ -220,7 +220,7 @@ sub index_entry { obfuscate_addrs($obfs_ibx, $from) if $obfs_ibx; $rv .= "From: $from @ ".fmt_ts($ds)." UTC"; my $upfx = $ctx->{-upfx}; - my $mhref = $upfx . mid_escape($mid_raw) . '/'; + my $mhref = $upfx . mid_href($mid_raw) . '/'; $rv .= qq{ (permalink / }; $rv .= qq{raw)\n}; my $to = fold_addresses(_hdr_names_html($hdr, 'To')); @@ -244,9 +244,8 @@ sub index_entry { my $mapping = $ctx->{mapping}; if (!$mapping && (defined($irt) || defined($irt = in_reply_to($hdr)))) { - my $mirt = PublicInbox::Hval->new_msgid($irt); - my $href = $upfx . $mirt->{href}. '/'; - my $html = $mirt->as_html; + my $href = $upfx . mid_href($irt) . '/'; + my $html = ascii_html($irt); $rv .= qq(In-Reply-To: <$html>\n) } $rv .= "\n"; @@ -672,8 +671,7 @@ sub _msg_page_prepare_obuf { } $ctx->{-title_html} = join(' - ', @title); if (scalar(@$mids) == 1) { # common case - my $mid = PublicInbox::Hval->new_msgid($mids->[0]); - my $mhtml = $mid->as_html; + my $mhtml = ascii_html($mids->[0]); $rv .= "Message-ID: <$mhtml> "; $rv .= "(raw)\n"; } else { @@ -751,9 +749,8 @@ sub _parent_headers { $refs = references($hdr); my $irt = pop @$refs; if (defined $irt) { - my $v = PublicInbox::Hval->new_msgid($irt); - my $html = $v->as_html; - my $href = $v->{href}; + my $html = ascii_html($irt); + my $href = mid_href($irt); $rv .= "In-Reply-To: <"; $rv .= "$html>\n"; } @@ -787,17 +784,17 @@ sub html_footer { $next = $prev = ' '; if (my $n = $ctx->{next_msg}) { - $n = PublicInbox::Hval->new_msgid($n)->{href}; + $n = mid_href($n); $next = "next"; } my $u; my $par = $ctx->{parent_msg}; if ($par) { - $u = PublicInbox::Hval->new_msgid($par)->{href}; + $u = mid_href($par); $u = "$upfx$u/"; } if (my $p = $ctx->{prev_msg}) { - $prev = PublicInbox::Hval->new_msgid($p)->{href}; + $prev = mid_href($p); if ($p && $par && $p eq $par) { $prev = "prev parent'; @@ -819,9 +816,9 @@ sub html_footer { } sub linkify_ref_no_over { - my $v = PublicInbox::Hval->new_msgid($_[0]); - my $html = $v->as_html; - my $href = $v->{href}; + my ($mid) = @_; + my $href = mid_href($mid); + my $html = ascii_html($mid); "<$html>"; } @@ -833,9 +830,8 @@ sub anchor_for { sub ghost_parent { my ($upfx, $mid) = @_; - $mid = PublicInbox::Hval->new_msgid($mid); - my $href = $mid->{href}; - my $html = $mid->as_html; + my $href = mid_href($mid); + my $html = ascii_html($mid); qq{[parent not found: <$html>]}; } @@ -996,7 +992,7 @@ sub skel_dump { # walk_thread callback $map->[0] = "$d$end"; $id = "\nid=r".$id; } else { - $m = $ctx->{-upfx}.mid_escape($mid).'/'; + $m = $ctx->{-upfx}.mid_href($mid).'/'; } $$skel .= $d . "" . $end; 1; @@ -1010,9 +1006,8 @@ sub _skel_ghost { $d .= ' ' if exists $ctx->{searchview}; $d .= indent_for($level) . th_pfx($level); my $upfx = $ctx->{-upfx}; - my $m = PublicInbox::Hval->new_msgid($mid); - my $href = $upfx . $m->{href} . '/'; - my $html = $m->as_html; + my $href = $upfx . mid_href($mid) . '/'; + my $html = ascii_html($mid); my $mapping = $ctx->{mapping}; my $map = $mapping->{$mid} if $mapping; @@ -1088,7 +1083,7 @@ sub dump_topics { @$topic = (); next unless defined $top_subj; # ghost topic my $mid = delete $seen->{$top_subj}; - my $href = mid_escape($mid); + my $href = mid_href($mid); my $prev_subj = [ split(/ /, $top_subj) ]; $top_subj = ascii_html($top_subj); $ds = fmt_ts($ds); @@ -1118,7 +1113,7 @@ sub dump_topics { $prev_subj = \@next_prev; $subj = ascii_html($subj); obfuscate_addrs($obfs_ibx, $subj) if $obfs_ibx; - $href = mid_escape($mid); + $href = mid_href($mid); $s .= indent_for($level) . TCHILD; $s .= qq($subj$omit\n); } diff --git a/lib/PublicInbox/WwwAtomStream.pm b/lib/PublicInbox/WwwAtomStream.pm index 658934a2..aa917ed8 100644 --- a/lib/PublicInbox/WwwAtomStream.pm +++ b/lib/PublicInbox/WwwAtomStream.pm @@ -12,8 +12,7 @@ use warnings; use POSIX qw(strftime); use Digest::SHA qw(sha1_hex); use PublicInbox::Address; -use PublicInbox::Hval qw(ascii_html); -use PublicInbox::MID qw(mid_escape); +use PublicInbox::Hval qw(ascii_html mid_href); use PublicInbox::MsgTime qw(msg_timestamp); # called by PSGI server after getline: @@ -71,7 +70,7 @@ sub atom_header { my $mid = $ctx->{mid}; my $page_id; if (defined $mid) { # per-thread - $self_url .= mid_escape($mid).'/t.atom'; + $self_url .= mid_href($mid).'/t.atom'; $page_id = to_uuid("t\n".$mid) } elsif (defined $search_q) { my $query = $search_q->{'q'}; @@ -109,13 +108,13 @@ sub feed_entry { my $base = $ctx->{feed_base_url}; if (defined $irt) { my $irt_uuid = to_uuid($irt); - $irt = mid_escape($irt); + $irt = mid_href($irt); $irt = qq(); } else { $irt = ''; } - my $href = $base . mid_escape($mid) . '/'; + my $href = $base . mid_href($mid) . '/'; my $t = msg_timestamp($hdr); my @t = gmtime(defined $t ? $t : time); my $updated = feed_updated(@t); diff --git a/t/psgi_bad_mids.t b/t/psgi_bad_mids.t index d86c90bc..43025a4d 100644 --- a/t/psgi_bad_mids.t +++ b/t/psgi_bad_mids.t @@ -28,6 +28,7 @@ $im->{parallel} = 0; my $msgs = <<''; F1V5OR6NMF.3M649JTLO9IXD@tux.localdomain/hehe1"'/foo +F1V5NB0PTU.3U0DCVGAJ750Z@tux&.ampersand F1V5MIHGCU.2ABINKW6WBE8N@tux.localdomain/raw F1V5LF9D9C.2QT5PGXZQ050E@tux.localdomain/t.atom F1V58X3CMU.2DCCVAKQZGADV@tux.localdomain/../../../../foo @@ -70,9 +71,13 @@ test_psgi(sub { $www->call(@_) }, sub { 'got escaped links to all messages'); @xmids = reverse @xmids; + my %uxs = ( gt => '>', lt => '<' ); foreach my $i (0..$#xmids) { - $res = $cb->(GET("/bad-mids/$xmids[$i]/raw")); - is($res->code, 200, 'got 200 OK raw message'); + my $uri = $xmids[$i]; + $uri =~ s/&#([0-9]+);/sprintf("%c", $1)/sge; + $uri =~ s/&(lt|gt);/$uxs{$1}/sge; + $res = $cb->(GET("/bad-mids/$uri/raw")); + is($res->code, 200, 'got 200 OK raw message '.$uri); like($res->content, qr/Message-ID: <\Q$mids[$i]\E>/s, 'retrieved correct message'); }