From 483a67ca0613a75bb80ea4c1201cb2d5f2cf063d Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Apr 2014 00:21:21 +0000 Subject: html: refactor header value handling to be OO This helps us keep track of escaping which needs to be done for various levels. --- lib/PublicInbox/Feed.pm | 29 ++++++++++++++------------- lib/PublicInbox/Hval.pm | 53 +++++++++++++++++++++++++++++++++++++++++++++++++ lib/PublicInbox/View.pm | 24 ++++++++-------------- 3 files changed, 76 insertions(+), 30 deletions(-) create mode 100644 lib/PublicInbox/Hval.pm (limited to 'lib') diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 93ee80bb..abfc0a9a 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -4,11 +4,11 @@ package PublicInbox::Feed; use strict; use warnings; use Email::Address; -use URI::Escape qw/uri_escape/; use Encode qw/find_encoding/; use Encode::MIME::Header; use CGI qw(escapeHTML); use Date::Parse qw(strptime str2time); +use PublicInbox::Hval; eval { require Git }; # this is GPLv2+, so we are OK to use it use constant { DATEFMT => '%Y-%m-%dT%H:%M:%SZ', @@ -262,10 +262,9 @@ sub add_to_feed { my $midurl = $feed_opts->{midurl} || 'http://example.com/m/'; my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/'; - my $mid = utf8_header($mime, "Message-ID") or return 0; - # FIXME: refactor - my (undef, $href) = PublicInbox::View::trim_message_id($mid); - + my $mid = $mime->header('Message-ID'); + $mid = PublicInbox::Hval->new_msgid($mid); + my $href = $mid->as_href; my $content = PublicInbox::View->as_feed_entry($mime, "$fullurl$href.html"); defined($content) or return 0; @@ -273,7 +272,8 @@ sub add_to_feed { my $subject = utf8_header($mime, "Subject") || ""; length($subject) or return 0; - my $from = utf8_header($mime, "From") or return 0; + my $from = $mime->header('From') or return 0; + my @from = Email::Address->parse($from); my $name = $from[0]->name; @@ -281,9 +281,10 @@ sub add_to_feed { my $email = $from[0]->address; defined $email or $email = ""; - my $date = utf8_header($mime, "Date"); + my $date = $mime->header('Date'); $date or return 0; - $date = feed_date($date) or return 0; + $date = PublicInbox::Hval->new_oneline($date); + $date = feed_date($date->as_utf8) or return 0; $feed->add_entry( author => { name => $name, email => $email }, title => $subject, @@ -300,17 +301,17 @@ sub dump_html_line { if ($self->message) { $args->[0] .= (' ' x $level); my $simple = $self->message; - my $subj = utf8_header($simple, "Subject"); - my $mid = utf8_header($simple, "Message-ID"); - $mid =~ s/\A\z//; - my $url = $args->[1] . xs_html(uri_escape($mid)); + my $subj = $simple->header('Subject'); + my $mid = $simple->header('Message-ID'); + $mid = PublicInbox::Hval->new_msgid($mid); + my $url = $args->[1] . $mid->as_href; my $from = utf8_header($simple, "From"); my @from = Email::Address->parse($from); $from = $from[0]->name; (defined($from) && length($from)) or $from = $from[0]->address; $from = xs_html($from); - $subj = xs_html($subj); + $subj = PublicInbox::Hval->new_oneline($subj); + $subj = $subj->as_html; $args->[0] .= "$subj $from\n"; } dump_html_line($self->child, $level+1, $args) if $self->child; diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm new file mode 100644 index 00000000..26a2d0bb --- /dev/null +++ b/lib/PublicInbox/Hval.pm @@ -0,0 +1,53 @@ +# Copyright (C) 2014, Eric Wong and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# represents a header value in various forms +package PublicInbox::Hval; +use strict; +use warnings; +use fields qw(raw -as_utf8); +use Encode qw(find_encoding); +use CGI qw(escapeHTML); +use URI::Escape qw(uri_escape); + +my $enc_utf8 = find_encoding('utf8'); +my $enc_ascii = find_encoding('us-ascii'); +my $enc_mime = find_encoding('MIME-Header'); + +sub new { + my ($class, $raw) = @_; + my $self = fields::new($class); + + # we never care about leading/trailing whitespace + $raw =~ s/\A\s*//; + $raw =~ s/\s*\z//; + $self->{raw} = $raw; + $self; +} + +sub new_msgid { + my ($class, $raw) = @_; + $raw =~ s/\A\z//; + $class->new($raw); +} + +sub new_oneline { + my ($class, $raw) = @_; + $raw = '' unless defined $raw; + $raw =~ tr/\t\n / /s; # squeeze spaces + $raw =~ tr/\r//d; # kill CR + $class->new($raw); +} + +sub as_utf8 { + my ($self) = @_; + $self->{-as_utf8} ||= $enc_utf8->encode($self->{raw}); +} + +sub ascii_html { $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF) } + +sub as_html { ascii_html($_[0]->as_utf8) } +sub as_href { ascii_html(uri_escape($_[0]->as_utf8)) } + +1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 355d346a..ecd49156 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -3,6 +3,7 @@ package PublicInbox::View; use strict; use warnings; +use PublicInbox::Hval; use URI::Escape qw/uri_escape/; use CGI qw/escapeHTML/; use Encode qw/find_encoding/; @@ -135,16 +136,6 @@ sub add_text_body_full { $s; } -sub trim_message_id { - my ($mid) = @_; - $mid =~ s/\A\s*\s*\z//; - my $html = ascii_html($mid); - my $href = ascii_html(uri_escape($mid)); - - ($html, $href); -} - sub ascii_html { $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF); } @@ -178,17 +169,18 @@ sub headers_to_html_header { my $mid = $simple->header('Message-ID'); if (defined $mid) { - my ($html, $href) = trim_message_id($mid); - $rv .= "Message-ID: <$html> "; - unless ($full_pfx) { - $href = "../m/$href"; - } + $mid = PublicInbox::Hval->new_msgid($mid); + $rv .= 'Message-ID: <' . $mid->as_html . '> '; + my $href = $mid->as_href; + $href = "../m/$href" unless $full_pfx; $rv .= "(original)\n"; } my $irp = $simple->header('In-Reply-To'); if (defined $irp) { - my ($html, $href) = trim_message_id($irp); + $irp = PublicInbox::Hval->new_msgid($irp); + my $html = $irp->as_html; + my $href = $irp->as_href; $rv .= "In-Reply-To: <"; $rv .= "$html>\n"; } -- cgit v1.2.3-24-ge0c7