about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2014-04-24 00:21:21 +0000
committerEric Wong <e@80x24.org>2014-04-24 00:21:21 +0000
commit483a67ca0613a75bb80ea4c1201cb2d5f2cf063d (patch)
treeb46b775c6eb619708789734cebf7d349589428bb /lib
parentdc4a8227f8949694e4fc3f40bb0278d4b4713381 (diff)
downloadpublic-inbox-483a67ca0613a75bb80ea4c1201cb2d5f2cf063d.tar.gz
This helps us keep track of escaping which needs to be done
for various levels.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/Feed.pm29
-rw-r--r--lib/PublicInbox/Hval.pm53
-rw-r--r--lib/PublicInbox/View.pm24
3 files changed, 76 insertions, 30 deletions
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 93ee80bb..abfc0a9a 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -4,11 +4,11 @@ package PublicInbox::Feed;
 use strict;
 use warnings;
 use Email::Address;
-use URI::Escape qw/uri_escape/;
 use Encode qw/find_encoding/;
 use Encode::MIME::Header;
 use CGI qw(escapeHTML);
 use Date::Parse qw(strptime str2time);
+use PublicInbox::Hval;
 eval { require Git }; # this is GPLv2+, so we are OK to use it
 use constant {
         DATEFMT => '%Y-%m-%dT%H:%M:%SZ',
@@ -262,10 +262,9 @@ sub add_to_feed {
         my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
         my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
 
-        my $mid = utf8_header($mime, "Message-ID") or return 0;
-        # FIXME: refactor
-        my (undef, $href) = PublicInbox::View::trim_message_id($mid);
-
+        my $mid = $mime->header('Message-ID');
+        $mid = PublicInbox::Hval->new_msgid($mid);
+        my $href = $mid->as_href;
         my $content = PublicInbox::View->as_feed_entry($mime,
                                                         "$fullurl$href.html");
         defined($content) or return 0;
@@ -273,7 +272,8 @@ sub add_to_feed {
         my $subject = utf8_header($mime, "Subject") || "";
         length($subject) or return 0;
 
-        my $from = utf8_header($mime, "From") or return 0;
+        my $from = $mime->header('From') or return 0;
+
 
         my @from = Email::Address->parse($from);
         my $name = $from[0]->name;
@@ -281,9 +281,10 @@ sub add_to_feed {
         my $email = $from[0]->address;
         defined $email or $email = "";
 
-        my $date = utf8_header($mime, "Date");
+        my $date = $mime->header('Date');
         $date or return 0;
-        $date = feed_date($date) or return 0;
+        $date = PublicInbox::Hval->new_oneline($date);
+        $date = feed_date($date->as_utf8) or return 0;
         $feed->add_entry(
                 author => { name => $name, email => $email },
                 title => $subject,
@@ -300,17 +301,17 @@ sub dump_html_line {
         if ($self->message) {
                 $args->[0] .= (' ' x $level);
                 my $simple = $self->message;
-                my $subj = utf8_header($simple, "Subject");
-                my $mid = utf8_header($simple, "Message-ID");
-                $mid =~ s/\A<//;
-                $mid =~ s/>\z//;
-                my $url = $args->[1] . xs_html(uri_escape($mid));
+                my $subj = $simple->header('Subject');
+                my $mid = $simple->header('Message-ID');
+                $mid = PublicInbox::Hval->new_msgid($mid);
+                my $url = $args->[1] . $mid->as_href;
                 my $from = utf8_header($simple, "From");
                 my @from = Email::Address->parse($from);
                 $from = $from[0]->name;
                 (defined($from) && length($from)) or $from = $from[0]->address;
                 $from = xs_html($from);
-                $subj = xs_html($subj);
+                $subj = PublicInbox::Hval->new_oneline($subj);
+                $subj = $subj->as_html;
                 $args->[0] .= "<a href=\"$url.html\">$subj</a> $from\n";
         }
         dump_html_line($self->child, $level+1, $args) if $self->child;
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
new file mode 100644
index 00000000..26a2d0bb
--- /dev/null
+++ b/lib/PublicInbox/Hval.pm
@@ -0,0 +1,53 @@
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+#
+# represents a header value in various forms
+package PublicInbox::Hval;
+use strict;
+use warnings;
+use fields qw(raw -as_utf8);
+use Encode qw(find_encoding);
+use CGI qw(escapeHTML);
+use URI::Escape qw(uri_escape);
+
+my $enc_utf8 = find_encoding('utf8');
+my $enc_ascii = find_encoding('us-ascii');
+my $enc_mime = find_encoding('MIME-Header');
+
+sub new {
+        my ($class, $raw) = @_;
+        my $self = fields::new($class);
+
+        # we never care about leading/trailing whitespace
+        $raw =~ s/\A\s*//;
+        $raw =~ s/\s*\z//;
+        $self->{raw} = $raw;
+        $self;
+}
+
+sub new_msgid {
+        my ($class, $raw) = @_;
+        $raw =~ s/\A<//;
+        $raw =~ s/>\z//;
+        $class->new($raw);
+}
+
+sub new_oneline {
+        my ($class, $raw) = @_;
+        $raw = '' unless defined $raw;
+        $raw =~ tr/\t\n / /s; # squeeze spaces
+        $raw =~ tr/\r//d; # kill CR
+        $class->new($raw);
+}
+
+sub as_utf8 {
+        my ($self) = @_;
+        $self->{-as_utf8} ||= $enc_utf8->encode($self->{raw});
+}
+
+sub ascii_html { $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF) }
+
+sub as_html { ascii_html($_[0]->as_utf8) }
+sub as_href { ascii_html(uri_escape($_[0]->as_utf8)) }
+
+1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 355d346a..ecd49156 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -3,6 +3,7 @@
 package PublicInbox::View;
 use strict;
 use warnings;
+use PublicInbox::Hval;
 use URI::Escape qw/uri_escape/;
 use CGI qw/escapeHTML/;
 use Encode qw/find_encoding/;
@@ -135,16 +136,6 @@ sub add_text_body_full {
         $s;
 }
 
-sub trim_message_id {
-        my ($mid) = @_;
-        $mid =~ s/\A\s*<//;
-        $mid =~ s/>\s*\z//;
-        my $html = ascii_html($mid);
-        my $href = ascii_html(uri_escape($mid));
-
-        ($html, $href);
-}
-
 sub ascii_html {
         $enc_ascii->encode(escapeHTML($_[0]), Encode::HTMLCREF);
 }
@@ -178,17 +169,18 @@ sub headers_to_html_header {
 
         my $mid = $simple->header('Message-ID');
         if (defined $mid) {
-                my ($html, $href) = trim_message_id($mid);
-                $rv .= "Message-ID: &lt;$html&gt; ";
-                unless ($full_pfx) {
-                        $href = "../m/$href";
-                }
+                $mid = PublicInbox::Hval->new_msgid($mid);
+                $rv .= 'Message-ID: &lt;' . $mid->as_html . '&gt; ';
+                my $href = $mid->as_href;
+                $href = "../m/$href" unless $full_pfx;
                 $rv .= "(<a href=\"$href.txt\">original</a>)\n";
         }
 
         my $irp = $simple->header('In-Reply-To');
         if (defined $irp) {
-                my ($html, $href) = trim_message_id($irp);
+                $irp = PublicInbox::Hval->new_msgid($irp);
+                my $html = $irp->as_html;
+                my $href = $irp->as_href;
                 $rv .= "In-Reply-To: &lt;";
                 $rv .= "<a href=\"$href.html\">$html</a>&gt;\n";
         }