about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <normalperson@yhbt.net>2014-02-25 03:01:04 +0000
committerEric Wong <normalperson@yhbt.net>2014-02-25 03:31:25 +0000
commit25cc5a69a4a38076ea9e587dfa75165fef2273da (patch)
tree5a799335ca505f4ec72431e4497b0ab10c7ff872
parent858f0a2960123d6d2cbced1bb18e4e5e524df21e (diff)
downloadpublic-inbox-25cc5a69a4a38076ea9e587dfa75165fef2273da.tar.gz
This is to keep content accessible to search engines.
-rw-r--r--lib/PublicInbox/View.pm99
-rw-r--r--t/view.t55
2 files changed, 154 insertions, 0 deletions
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
new file mode 100644
index 00000000..125fcd62
--- /dev/null
+++ b/lib/PublicInbox/View.pm
@@ -0,0 +1,99 @@
+# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+package PublicInbox::View;
+use strict;
+use warnings;
+use CGI qw/escapeHTML escape/;
+use Encode qw/decode encode/;
+use Encode::MIME::Header;
+
+# only one public function:
+sub as_html {
+        my ($class, $mime) = @_;
+
+        headers_to_html_header($mime) .
+                multipart_text_as_html($mime) .
+                "</pre>\n";
+}
+
+# only private functions below.
+
+sub multipart_text_as_html {
+        my ($mime) = @_;
+        my $rv = "";
+        my $part_nr = 0;
+
+        # scan through all parts, looking for displayable text
+        $mime->walk_parts(sub {
+                my ($part) = @_;
+                return if $part->subparts; # walk_parts already recurses
+
+                my $part_type = $part->content_type;
+                if ($part_type =~ m!\btext/[a-z0-9\+\._-]+\b!i) {
+                        my $fn = $part->filename;
+
+                        if ($part_nr > 0) {
+                                defined($fn) or $fn = "part #$part_nr";
+                                $rv .= add_filename_line($fn);
+                        }
+
+                        # n.b. $part->body should already be decoded if text
+                        $rv .= escapeHTML($part->body);
+                        $rv .= "\n" unless $rv =~ /\n\z/s;
+                } else {
+                        $rv .= "-- part #$part_nr ";
+                        $rv .= escapeHTML($part_type);
+                        $rv .= " skipped\n";
+                }
+                ++$part_nr;
+        });
+        $rv;
+}
+
+sub add_filename_line {
+        my ($fn) = @_;
+        my $len = 72;
+        my $pad = "-";
+
+        $len -= length($fn);
+        $pad x= ($len/2) if ($len > 0);
+        "$pad " . escapeHTML($fn) . " $pad\n";
+}
+
+sub headers_to_html_header {
+        my ($simple) = @_;
+
+        my $rv = "";
+        my @title;
+        foreach my $h (qw(From To Cc Subject Date)) {
+                my $v = $simple->header($h);
+                defined $v or next;
+                $v = decode("MIME-Header", $v);
+                $v = encode("utf8", $v);
+                $v = escapeHTML($v);
+                $v =~ tr/\n/ /;
+                $rv .= "$h: $v\n";
+
+                if ($h eq "From" || $h eq "Subject") {
+                        push @title, $v;
+                }
+        }
+
+        foreach my $h (qw(Message-ID In-Reply-To)) {
+                my $v = $simple->header($h);
+                defined $v or next;
+                $v =~ tr/<>//d;
+                my $html = escapeHTML($v);
+                my $href = escapeHTML(escape($v));
+                $rv .= "$h: <a href=\"$href\">$html</a>\n";
+        }
+
+        $rv .= "\n";
+
+        return ("<html><head><title>".
+                join(' - ', @title) .
+                '</title></head><body><pre style="white-space:pre-wrap">' .
+                $rv);
+}
+
+1;
diff --git a/t/view.t b/t/view.t
new file mode 100644
index 00000000..93372d16
--- /dev/null
+++ b/t/view.t
@@ -0,0 +1,55 @@
+# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+use strict;
+use warnings;
+use Test::More;
+use Email::MIME;
+use PublicInbox::View;
+
+# plain text
+{
+        my $s = Email::Simple->create(
+                header => [
+                        From => 'a@example.com',
+                        To => 'b@example.com',
+                        'Content-Type' => 'text/plain',
+                        'Message-ID' => '<hello@example.com>',
+                        Subject => 'this is a subject',
+                ],
+                body => "hello world\n",
+        );
+        $s = Email::MIME->new($s->as_string);
+        my $html = PublicInbox::View->as_html($s);
+
+        # ghetto
+        like($html, qr/<a href="hello%40/s, "MID link present");
+        like($html, qr/hello world\b/, "body present");
+}
+
+# multipart crap
+{
+        my $parts = [
+                Email::MIME->create(
+                        attributes => { content_type => 'text/plain', },
+                        body => 'hi',
+                ),
+                Email::MIME->create(
+                        attributes => { content_type => 'text/plain', },
+                        body => 'bye',
+                )
+        ];
+        my $mime = Email::MIME->create(
+                header_str => [
+                        From => 'a@example.com',
+                        Subject => 'blargh',
+                        'Message-ID' => '<blah@xeample.com>',
+                        'In-Reply-To' => '<irp@xeample.com>',
+                        ],
+                parts => $parts,
+        );
+
+        my $html = PublicInbox::View->as_html($mime);
+        print $html;
+}
+
+done_testing();