about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-08-22 08:00:37 +0000
committerEric Wong <e@80x24.org>2015-08-22 08:01:53 +0000
commit1761fba7befab2681276ac8f123593610ad27e58 (patch)
tree2725fdbd3691a8f00310e9e9ebf96de2deb321ca
parent797ba8046562864a09ed36e6040055babb536615 (diff)
downloadpublic-inbox-1761fba7befab2681276ac8f123593610ad27e58.tar.gz
We will attempt to generate Atom feeds "by hand" as the
XML::Atom::SimpleFeed API does not support streaming output.
Since email is large and servers are small, this should prevent
wasting memory when we generate larger feeds.

Of course, we hope clients use SAX parsers capable of handling
large streams without slurping.
-rw-r--r--INSTALL1
-rw-r--r--Makefile.PL1
-rw-r--r--lib/PublicInbox/Feed.pm88
-rw-r--r--lib/PublicInbox/WWW.pm6
-rw-r--r--t/common.perl16
-rw-r--r--t/feed.t13
-rw-r--r--t/html_index.t14
7 files changed, 78 insertions, 61 deletions
diff --git a/INSTALL b/INSTALL
index 2bd2eca1..17f1c2b1 100644
--- a/INSTALL
+++ b/INSTALL
@@ -39,7 +39,6 @@ Optional modules:
   - CGI[1]                     perl-modules[2]
   - Mail::Thread (2.5+)[1]     libmail-thread-perl
   - URI::Escape[1]             liburi-perl
-  - XML::Atom::SimpleFeed[1]   libxml-atom-simplefeed-perl
   - Search::Xapian[3]          libsearch-xapian-perl
   - IO::Compress::Gzip[3]      libio-compress-perl
 
diff --git a/Makefile.PL b/Makefile.PL
index 277e9a29..a9b15cab 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -26,7 +26,6 @@ WriteMakefile(
                 'IPC::Run' => 0,
                 'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
                 'URI::Escape' => 0,
-                'XML::Atom::SimpleFeed' => 0,
                 # We have more test dependencies, but do not force
                 # users to install them
         },
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 5d122ac6..8dd6d036 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# Copyright (C) 2013-2015, all contributors <meta@public-inbox.org>
 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
 package PublicInbox::Feed;
 use strict;
@@ -20,43 +20,50 @@ my $enc_utf8 = find_encoding('UTF-8');
 
 # main function
 sub generate {
-        my ($class, $ctx) = @_;
-        require XML::Atom::SimpleFeed;
+        my ($ctx) = @_;
+        sub { emit_atom($_[0], $ctx) };
+}
+
+sub generate_html_index {
+        my ($ctx) = @_;
+        sub { emit_html_index($_[0], $ctx) };
+}
+
+# private subs
+
+sub emit_atom {
+        my ($cb, $ctx) = @_;
         require POSIX;
+        my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
         my $max = $ctx->{max} || MAX_PER_PAGE;
-
         my $feed_opts = get_feedopts($ctx);
         my $addr = $feed_opts->{address};
         $addr = $addr->[0] if ref($addr);
-        my $feed = XML::Atom::SimpleFeed->new(
-                title => $feed_opts->{description} || "unnamed feed",
-                link => $feed_opts->{url} || "http://example.com/",
-                link => {
-                        rel => 'self',
-                        href => $feed_opts->{atomurl} ||
-                                "http://example.com/atom.xml",
-                },
-                id => 'mailto:' . ($addr || 'public-inbox@example.com'),
-                updated => POSIX::strftime(DATEFMT, gmtime),
-        );
-        $feed->no_generator;
+        $addr ||= 'public-inbox@example.com';
+        my $title = $feed_opts->{description} || "unnamed feed";
+        $title = PublicInbox::Hval->new_oneline($title)->as_html;
+        my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+        my $url = $feed_opts->{url} || "http://example.com/";
+        my $atomurl = $feed_opts->{atomurl};
+        $fh->write(qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+                qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+                qq{<title$type>$title</title>} .
+                qq{<link\nhref="$url"/>} .
+                qq{<link\nrel="self"\nhref="$atomurl"/>} .
+                qq{<id>mailto:$addr</id>} .
+                '<updated>' . POSIX::strftime(DATEFMT, gmtime) . '</updated>');
 
         my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
         each_recent_blob($ctx, sub {
                 my ($add, undef) = @_;
-                add_to_feed($feed_opts, $feed, $add, $git);
+                add_to_feed($feed_opts, $fh, $add, $git);
         });
         $git = undef; # destroy pipes
         Email::Address->purge_cache;
-        $feed->as_string;
-}
-
-sub generate_html_index {
-        my ($ctx) = @_;
-        sub { emit_html_index($_[0], $ctx) };
+        $fh->write("</feed>");
+        $fh->close;
 }
 
-# private subs
 
 sub emit_html_index {
         my ($cb, $ctx) = @_;
@@ -253,10 +260,9 @@ sub feed_date {
 
 # returns 0 (skipped) or 1 (added)
 sub add_to_feed {
-        my ($feed_opts, $feed, $add, $git) = @_;
+        my ($feed_opts, $fh, $add, $git) = @_;
 
         my $mime = do_cat_mail($git, $add) or return 0;
-        my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
         my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
 
         my $header_obj = $mime->header_obj;
@@ -268,30 +274,34 @@ sub add_to_feed {
         defined($content) or return 0;
         $mime = undef;
 
-        my $subject = mime_header($header_obj, 'Subject') or return 0;
+        my $title = mime_header($header_obj, 'Subject') or return 0;
+        $title = PublicInbox::Hval->new_oneline($title)->as_html;
+        my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
 
         my $from = mime_header($header_obj, 'From') or return 0;
         my @from = Email::Address->parse($from);
-        my $name = $from[0]->name;
-        defined $name or $name = "";
+        my $name = PublicInbox::Hval->new_oneline($from[0]->name)->as_html;
         my $email = $from[0]->address;
-        defined $email or $email = "";
+        $email = PublicInbox::Hval->new_oneline($email)->as_html;
 
         my $date = $header_obj->header('Date');
         $date = PublicInbox::Hval->new_oneline($date);
         $date = feed_date($date->raw) or return 0;
+
+        $fh->write("<entry><author><name>$name</name><email>$email</email>" .
+                   "</author><title$type>$title</title>" .
+                   "<updated>$date</updated>" .
+                   qq{<content\ntype="xhtml">} .
+                   qq{<div\nxmlns="http://www.w3.org/1999/xhtml">});
+        $fh->write($content);
+
         $add =~ tr!/!!d;
         my $h = '[a-f0-9]';
         my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
-
-        $feed->add_entry(
-                author => { name => $name, email => $email },
-                title => $subject,
-                updated => $date,
-                content => { type => 'xhtml', content => $content },
-                link => $midurl . $href,
-                id => 'urn:uuid:' . join('-', @uuid5),
-        );
+        my $id = 'urn:uuid:' . join('-', @uuid5);
+        my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
+        $fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
+                   "<id>$id</id></entry>");
         1;
 }
 
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 54f63991..30a7a436 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -81,7 +81,6 @@ sub preload {
         require Email::MIME;
         require Digest::SHA;
         require POSIX;
-        require XML::Atom::SimpleFeed;
 
         eval {
                 require PublicInbox::Search;
@@ -120,11 +119,10 @@ sub invalid_list_mid {
 # /$LISTNAME/atom.xml                       -> Atom feed, includes replies
 sub get_atom {
         my ($ctx, $cgi) = @_;
-        require PublicInbox::Feed;
         $ctx->{pi_config} = $pi_config;
         $ctx->{cgi} = $cgi;
-        [ 200, [ 'Content-Type' => 'application/xml' ],
-          [ PublicInbox::Feed->generate($ctx) ] ]
+        require PublicInbox::Feed;
+        PublicInbox::Feed::generate($ctx);
 }
 
 # /$LISTNAME/?r=$GIT_COMMIT                 -> HTML only
diff --git a/t/common.perl b/t/common.perl
new file mode 100644
index 00000000..a3585bf6
--- /dev/null
+++ b/t/common.perl
@@ -0,0 +1,16 @@
+require IO::File;
+use POSIX qw/dup/;
+
+sub stream_to_string {
+        my ($cb) = @_;
+        my $headers;
+        my $io = IO::File->new_tmpfile;
+        my $dup = dup($io->fileno);
+        my $response = sub { $headers = \@_, $io };
+        $cb->($response);
+        $io = IO::File->new;
+        $io->fdopen($dup, 'r+');
+        $io->seek(0, 0);
+        $io->read(my $str, ($io->stat)[7]);
+        $str;
+}
diff --git a/t/feed.t b/t/feed.t
index 978e2156..6102e8a7 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -9,6 +9,11 @@ use PublicInbox::Config;
 use IPC::Run qw/run/;
 use File::Temp qw/tempdir/;
 my $have_xml_feed = eval { require XML::Feed; 1 };
+require 't/common.perl';
+
+sub string_feed {
+        stream_to_string(PublicInbox::Feed::generate($_[0]));
+}
 
 my $tmpdir = tempdir(CLEANUP => 1);
 my $git_dir = "$tmpdir/gittest";
@@ -58,7 +63,7 @@ EOF
 {
         # check initial feed
         {
-                my $feed = PublicInbox::Feed->generate({
+                my $feed = string_feed({
                         git_dir => $git_dir,
                         max => 3
                 });
@@ -101,7 +106,7 @@ EOF
 
         # check spam shows up
         {
-                my $spammy_feed = PublicInbox::Feed->generate({
+                my $spammy_feed = string_feed({
                         git_dir => $git_dir,
                         max => 3
                 });
@@ -123,7 +128,7 @@ EOF
 
         # spam no longer shows up
         {
-                my $feed = PublicInbox::Feed->generate({
+                my $feed = string_feed({
                         git_dir => $git_dir,
                         max => 3
                 });
@@ -140,7 +145,7 @@ EOF
 # check pi_config
 {
         foreach my $addr (('a@example.com'), ['a@example.com','b@localhost']) {
-                my $feed = PublicInbox::Feed->generate({
+                my $feed = string_feed({
                         git_dir => $git_dir,
                         max => 3,
                         listname => 'asdf',
diff --git a/t/html_index.t b/t/html_index.t
index 6286fc47..73311f65 100644
--- a/t/html_index.t
+++ b/t/html_index.t
@@ -55,18 +55,8 @@ EOF
                 git_dir => $git_dir,
                 max => 3
         });
-        my $headers;
-        my $io = IO::File->new_tmpfile;
-        use POSIX qw/dup/;
-        my $dup = dup($io->fileno);
-        my $response = sub { $headers = \@_, $io };
-        $cb->($response);
-        $io = IO::File->new;
-        $io->fdopen($dup, 'r+');
-        $io->seek(0, 0);
-        $io->read(my $feed, 666666);
-        like($feed, qr/html/, "feed is valid HTML :)");
-        $io->close;
+        require 't/common.perl';
+        like(stream_to_string($cb), qr/html/, "feed is valid HTML :)");
 }
 
 done_testing();