user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Cc: Eric Wong <e@80x24.org>
Subject: [PATCH] remove XML::Atom::SimpleFeed dependency
Date: Sat, 22 Aug 2015 08:00:37 +0000	[thread overview]
Message-ID: <1440230437-13669-1-git-send-email-e@80x24.org> (raw)

We will attempt to generate Atom feeds "by hand" as the
XML::Atom::SimpleFeed API does not support streaming output.
Since email is large and servers are small, this should prevent
wasting memory when we generate larger feeds.

Of course, we hope clients use SAX parsers capable of handling
large streams without slurping.
---
 INSTALL                 |  1 -
 Makefile.PL             |  1 -
 lib/PublicInbox/Feed.pm | 88 +++++++++++++++++++++++++++----------------------
 lib/PublicInbox/Hval.pm |  8 +++++
 lib/PublicInbox/WWW.pm  |  6 ++--
 t/common.perl           | 16 +++++++++
 t/feed.t                | 13 +++++---
 t/html_index.t          | 14 ++------
 8 files changed, 86 insertions(+), 61 deletions(-)
 create mode 100644 t/common.perl

diff --git a/INSTALL b/INSTALL
index 2bd2eca..17f1c2b 100644
--- a/INSTALL
+++ b/INSTALL
@@ -39,7 +39,6 @@ Optional modules:
   - CGI[1]                     perl-modules[2]
   - Mail::Thread (2.5+)[1]     libmail-thread-perl
   - URI::Escape[1]             liburi-perl
-  - XML::Atom::SimpleFeed[1]   libxml-atom-simplefeed-perl
   - Search::Xapian[3]          libsearch-xapian-perl
   - IO::Compress::Gzip[3]      libio-compress-perl
 
diff --git a/Makefile.PL b/Makefile.PL
index 277e9a2..a9b15ca 100644
--- a/Makefile.PL
+++ b/Makefile.PL
@@ -26,7 +26,6 @@ WriteMakefile(
 		'IPC::Run' => 0,
 		'Mail::Thread' => '2.5', # 2.5+ needed for Email::Simple compat
 		'URI::Escape' => 0,
-		'XML::Atom::SimpleFeed' => 0,
 		# We have more test dependencies, but do not force
 		# users to install them
 	},
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 5d122ac..8dd6d03 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2013, Eric Wong <normalperson@yhbt.net> and all contributors
+# Copyright (C) 2013-2015, all contributors <meta@public-inbox.org>
 # License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
 package PublicInbox::Feed;
 use strict;
@@ -20,43 +20,50 @@ my $enc_utf8 = find_encoding('UTF-8');
 
 # main function
 sub generate {
-	my ($class, $ctx) = @_;
-	require XML::Atom::SimpleFeed;
+	my ($ctx) = @_;
+	sub { emit_atom($_[0], $ctx) };
+}
+
+sub generate_html_index {
+	my ($ctx) = @_;
+	sub { emit_html_index($_[0], $ctx) };
+}
+
+# private subs
+
+sub emit_atom {
+	my ($cb, $ctx) = @_;
 	require POSIX;
+	my $fh = $cb->([ 200, ['Content-Type' => 'application/xml']]);
 	my $max = $ctx->{max} || MAX_PER_PAGE;
-
 	my $feed_opts = get_feedopts($ctx);
 	my $addr = $feed_opts->{address};
 	$addr = $addr->[0] if ref($addr);
-	my $feed = XML::Atom::SimpleFeed->new(
-		title => $feed_opts->{description} || "unnamed feed",
-		link => $feed_opts->{url} || "http://example.com/",
-		link => {
-			rel => 'self',
-			href => $feed_opts->{atomurl} ||
-				"http://example.com/atom.xml",
-		},
-		id => 'mailto:' . ($addr || 'public-inbox@example.com'),
-		updated => POSIX::strftime(DATEFMT, gmtime),
-	);
-	$feed->no_generator;
+	$addr ||= 'public-inbox@example.com';
+	my $title = $feed_opts->{description} || "unnamed feed";
+	$title = PublicInbox::Hval->new_oneline($title)->as_html;
+	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+	my $url = $feed_opts->{url} || "http://example.com/";
+	my $atomurl = $feed_opts->{atomurl};
+	$fh->write(qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
+		qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
+		qq{<title$type>$title</title>} .
+		qq{<link\nhref="$url"/>} .
+		qq{<link\nrel="self"\nhref="$atomurl"/>} .
+		qq{<id>mailto:$addr</id>} .
+		'<updated>' . POSIX::strftime(DATEFMT, gmtime) . '</updated>');
 
 	my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
 	each_recent_blob($ctx, sub {
 		my ($add, undef) = @_;
-		add_to_feed($feed_opts, $feed, $add, $git);
+		add_to_feed($feed_opts, $fh, $add, $git);
 	});
 	$git = undef; # destroy pipes
 	Email::Address->purge_cache;
-	$feed->as_string;
-}
-
-sub generate_html_index {
-	my ($ctx) = @_;
-	sub { emit_html_index($_[0], $ctx) };
+	$fh->write("</feed>");
+	$fh->close;
 }
 
-# private subs
 
 sub emit_html_index {
 	my ($cb, $ctx) = @_;
@@ -253,10 +260,9 @@ sub feed_date {
 
 # returns 0 (skipped) or 1 (added)
 sub add_to_feed {
-	my ($feed_opts, $feed, $add, $git) = @_;
+	my ($feed_opts, $fh, $add, $git) = @_;
 
 	my $mime = do_cat_mail($git, $add) or return 0;
-	my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
 	my $fullurl = $feed_opts->{fullurl} || 'http://example.com/f/';
 
 	my $header_obj = $mime->header_obj;
@@ -268,30 +274,34 @@ sub add_to_feed {
 	defined($content) or return 0;
 	$mime = undef;
 
-	my $subject = mime_header($header_obj, 'Subject') or return 0;
+	my $title = mime_header($header_obj, 'Subject') or return 0;
+	$title = PublicInbox::Hval->new_oneline($title)->as_html;
+	my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
 
 	my $from = mime_header($header_obj, 'From') or return 0;
 	my @from = Email::Address->parse($from);
-	my $name = $from[0]->name;
-	defined $name or $name = "";
+	my $name = PublicInbox::Hval->new_oneline($from[0]->name)->as_html;
 	my $email = $from[0]->address;
-	defined $email or $email = "";
+	$email = PublicInbox::Hval->new_oneline($email)->as_html;
 
 	my $date = $header_obj->header('Date');
 	$date = PublicInbox::Hval->new_oneline($date);
 	$date = feed_date($date->raw) or return 0;
+
+	$fh->write("<entry><author><name>$name</name><email>$email</email>" .
+		   "</author><title$type>$title</title>" .
+		   "<updated>$date</updated>" .
+		   qq{<content\ntype="xhtml">} .
+		   qq{<div\nxmlns="http://www.w3.org/1999/xhtml">});
+	$fh->write($content);
+
 	$add =~ tr!/!!d;
 	my $h = '[a-f0-9]';
 	my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
-
-	$feed->add_entry(
-		author => { name => $name, email => $email },
-		title => $subject,
-		updated => $date,
-		content => { type => 'xhtml', content => $content },
-		link => $midurl . $href,
-		id => 'urn:uuid:' . join('-', @uuid5),
-	);
+	my $id = 'urn:uuid:' . join('-', @uuid5);
+	my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
+	$fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
+		   "<id>$id</id></entry>");
 	1;
 }
 
diff --git a/lib/PublicInbox/Hval.pm b/lib/PublicInbox/Hval.pm
index d8b31c8..59b2bb3 100644
--- a/lib/PublicInbox/Hval.pm
+++ b/lib/PublicInbox/Hval.pm
@@ -46,6 +46,8 @@ my %xhtml_map = (
 	'>' => '&gt;',
 );
 
+my %attr_map = (%xhtml_map, "\xA" => '&#10;', "\xD" => '&#13;');
+
 sub ascii_html {
 	my ($s) = @_;
 	$s =~ s/\r\n/\n/sg; # fixup bad line endings
@@ -53,6 +55,12 @@ sub ascii_html {
 	$enc_ascii->encode($s, Encode::HTMLCREF);
 }
 
+sub attr_escape {
+	my ($s) = @_;
+	$s =~ s/([\x0A\x0D<>&'"])/$attr_map{$1}/ge;
+	$enc_ascii->encode($s, Encode::HTMLCREF);
+}
+
 sub as_html { ascii_html($_[0]->{raw}) }
 sub as_href { ascii_html(uri_escape_utf8($_[0]->{href})) }
 
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index 54f6399..30a7a43 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -81,7 +81,6 @@ sub preload {
 	require Email::MIME;
 	require Digest::SHA;
 	require POSIX;
-	require XML::Atom::SimpleFeed;
 
 	eval {
 		require PublicInbox::Search;
@@ -120,11 +119,10 @@ sub invalid_list_mid {
 # /$LISTNAME/atom.xml                       -> Atom feed, includes replies
 sub get_atom {
 	my ($ctx, $cgi) = @_;
-	require PublicInbox::Feed;
 	$ctx->{pi_config} = $pi_config;
 	$ctx->{cgi} = $cgi;
-	[ 200, [ 'Content-Type' => 'application/xml' ],
-	  [ PublicInbox::Feed->generate($ctx) ] ]
+	require PublicInbox::Feed;
+	PublicInbox::Feed::generate($ctx);
 }
 
 # /$LISTNAME/?r=$GIT_COMMIT                 -> HTML only
diff --git a/t/common.perl b/t/common.perl
new file mode 100644
index 0000000..a3585bf
--- /dev/null
+++ b/t/common.perl
@@ -0,0 +1,16 @@
+require IO::File;
+use POSIX qw/dup/;
+
+sub stream_to_string {
+	my ($cb) = @_;
+	my $headers;
+	my $io = IO::File->new_tmpfile;
+	my $dup = dup($io->fileno);
+	my $response = sub { $headers = \@_, $io };
+	$cb->($response);
+	$io = IO::File->new;
+	$io->fdopen($dup, 'r+');
+	$io->seek(0, 0);
+	$io->read(my $str, ($io->stat)[7]);
+	$str;
+}
diff --git a/t/feed.t b/t/feed.t
index 978e215..6102e8a 100644
--- a/t/feed.t
+++ b/t/feed.t
@@ -9,6 +9,11 @@ use PublicInbox::Config;
 use IPC::Run qw/run/;
 use File::Temp qw/tempdir/;
 my $have_xml_feed = eval { require XML::Feed; 1 };
+require 't/common.perl';
+
+sub string_feed {
+	stream_to_string(PublicInbox::Feed::generate($_[0]));
+}
 
 my $tmpdir = tempdir(CLEANUP => 1);
 my $git_dir = "$tmpdir/gittest";
@@ -58,7 +63,7 @@ EOF
 {
 	# check initial feed
 	{
-		my $feed = PublicInbox::Feed->generate({
+		my $feed = string_feed({
 			git_dir => $git_dir,
 			max => 3
 		});
@@ -101,7 +106,7 @@ EOF
 
 	# check spam shows up
 	{
-		my $spammy_feed = PublicInbox::Feed->generate({
+		my $spammy_feed = string_feed({
 			git_dir => $git_dir,
 			max => 3
 		});
@@ -123,7 +128,7 @@ EOF
 
 	# spam no longer shows up
 	{
-		my $feed = PublicInbox::Feed->generate({
+		my $feed = string_feed({
 			git_dir => $git_dir,
 			max => 3
 		});
@@ -140,7 +145,7 @@ EOF
 # check pi_config
 {
 	foreach my $addr (('a@example.com'), ['a@example.com','b@localhost']) {
-		my $feed = PublicInbox::Feed->generate({
+		my $feed = string_feed({
 			git_dir => $git_dir,
 			max => 3,
 			listname => 'asdf',
diff --git a/t/html_index.t b/t/html_index.t
index 6286fc4..73311f6 100644
--- a/t/html_index.t
+++ b/t/html_index.t
@@ -55,18 +55,8 @@ EOF
 		git_dir => $git_dir,
 		max => 3
 	});
-	my $headers;
-	my $io = IO::File->new_tmpfile;
-	use POSIX qw/dup/;
-	my $dup = dup($io->fileno);
-	my $response = sub { $headers = \@_, $io };
-	$cb->($response);
-	$io = IO::File->new;
-	$io->fdopen($dup, 'r+');
-	$io->seek(0, 0);
-	$io->read(my $feed, 666666);
-	like($feed, qr/html/, "feed is valid HTML :)");
-	$io->close;
+	require 't/common.perl';
+	like(stream_to_string($cb), qr/html/, "feed is valid HTML :)");
 }
 
 done_testing();
-- 
EW


             reply	other threads:[~2015-08-22  8:00 UTC|newest]

Thread overview: 2+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-22  8:00 Eric Wong [this message]
2015-08-22  8:03 ` [PATCH v2] remove XML::Atom::SimpleFeed dependency Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440230437-13669-1-git-send-email-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).