From 1761fba7befab2681276ac8f123593610ad27e58 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 22 Aug 2015 08:00:37 +0000 Subject: remove XML::Atom::SimpleFeed dependency We will attempt to generate Atom feeds "by hand" as the XML::Atom::SimpleFeed API does not support streaming output. Since email is large and servers are small, this should prevent wasting memory when we generate larger feeds. Of course, we hope clients use SAX parsers capable of handling large streams without slurping. --- lib/PublicInbox/Feed.pm | 88 +++++++++++++++++++++++++++---------------------- lib/PublicInbox/WWW.pm | 6 ++-- 2 files changed, 51 insertions(+), 43 deletions(-) (limited to 'lib/PublicInbox') diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 5d122ac6..8dd6d036 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -1,4 +1,4 @@ -# Copyright (C) 2013, Eric Wong and all contributors +# Copyright (C) 2013-2015, all contributors # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) package PublicInbox::Feed; use strict; @@ -20,43 +20,50 @@ my $enc_utf8 = find_encoding('UTF-8'); # main function sub generate { - my ($class, $ctx) = @_; - require XML::Atom::SimpleFeed; + my ($ctx) = @_; + sub { emit_atom($_[0], $ctx) }; +} + +sub generate_html_index { + my ($ctx) = @_; + sub { emit_html_index($_[0], $ctx) }; +} + +# private subs + +sub emit_atom { + my ($cb, $ctx) = @_; require POSIX; + my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]); my $max = $ctx->{max} || MAX_PER_PAGE; - my $feed_opts = get_feedopts($ctx); my $addr = $feed_opts->{address}; $addr = $addr->[0] if ref($addr); - my $feed = XML::Atom::SimpleFeed->new( - title => $feed_opts->{description} || "unnamed feed", - link => $feed_opts->{url} || "http://example.com/", - link => { - rel => 'self', - href => $feed_opts->{atomurl} || - "http://example.com/atom.xml", - }, - id => 'mailto:' . ($addr || 'public-inbox@example.com'), - updated => POSIX::strftime(DATEFMT, gmtime), - ); - $feed->no_generator; + $addr ||= 'public-inbox@example.com'; + my $title = $feed_opts->{description} || "unnamed feed"; + $title = PublicInbox::Hval->new_oneline($title)->as_html; + my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; + my $url = $feed_opts->{url} || "http://example.com/"; + my $atomurl = $feed_opts->{atomurl}; + $fh->write(qq(\n) . + qq{} . + qq{$title} . + qq{} . + qq{} . + qq{mailto:$addr} . + '' . POSIX::strftime(DATEFMT, gmtime) . ''); my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); each_recent_blob($ctx, sub { my ($add, undef) = @_; - add_to_feed($feed_opts, $feed, $add, $git); + add_to_feed($feed_opts, $fh, $add, $git); }); $git = undef; # destroy pipes Email::Address->purge_cache; - $feed->as_string; -} - -sub generate_html_index { - my ($ctx) = @_; - sub { emit_html_index($_[0], $ctx) }; + $fh->write(""); + $fh->close; } -# private subs sub emit_html_index { my ($cb, $ctx) = @_; @@ -253,10 +260,9 @@ sub feed_date { # returns 0 (skipped) or 1 (added) sub add_to_feed { - my ($feed_opts, $feed, $add, $git) = @_; + my ($feed_opts, $fh, $add, $git) = @_; my $mime = do_cat_mail($git, $add) or return 0; - my $midurl = $feed_opts->{midurl} || 'http://example.com/m/'; my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/'; my $header_obj = $mime->header_obj; @@ -268,30 +274,34 @@ sub add_to_feed { defined($content) or return 0; $mime = undef; - my $subject = mime_header($header_obj, 'Subject') or return 0; + my $title = mime_header($header_obj, 'Subject') or return 0; + $title = PublicInbox::Hval->new_oneline($title)->as_html; + my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; my $from = mime_header($header_obj, 'From') or return 0; my @from = Email::Address->parse($from); - my $name = $from[0]->name; - defined $name or $name = ""; + my $name = PublicInbox::Hval->new_oneline($from[0]->name)->as_html; my $email = $from[0]->address; - defined $email or $email = ""; + $email = PublicInbox::Hval->new_oneline($email)->as_html; my $date = $header_obj->header('Date'); $date = PublicInbox::Hval->new_oneline($date); $date = feed_date($date->raw) or return 0; + + $fh->write("$name$email" . + "$title" . + "$date" . + qq{} . + qq{}); + $fh->write($content); + $add =~ tr!/!!d; my $h = '[a-f0-9]'; my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); - - $feed->add_entry( - author => { name => $name, email => $email }, - title => $subject, - updated => $date, - content => { type => 'xhtml', content => $content }, - link => $midurl . $href, - id => 'urn:uuid:' . join('-', @uuid5), - ); + my $id = 'urn:uuid:' . join('-', @uuid5); + my $midurl = $feed_opts->{midurl} || 'http://example.com/m/'; + $fh->write(qq{}. + "$id"); 1; } diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index 54f63991..30a7a436 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -81,7 +81,6 @@ sub preload { require Email::MIME; require Digest::SHA; require POSIX; - require XML::Atom::SimpleFeed; eval { require PublicInbox::Search; @@ -120,11 +119,10 @@ sub invalid_list_mid { # /$LISTNAME/atom.xml -> Atom feed, includes replies sub get_atom { my ($ctx, $cgi) = @_; - require PublicInbox::Feed; $ctx->{pi_config} = $pi_config; $ctx->{cgi} = $cgi; - [ 200, [ 'Content-Type' => 'application/xml' ], - [ PublicInbox::Feed->generate($ctx) ] ] + require PublicInbox::Feed; + PublicInbox::Feed::generate($ctx); } # /$LISTNAME/?r=$GIT_COMMIT -> HTML only -- cgit v1.2.3-24-ge0c7