diff options
author | Eric Wong <e@80x24.org> | 2016-12-03 00:24:06 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2016-12-03 01:48:14 +0000 |
commit | 95d4bf7aded41cb3b0040c321d315532f68633e1 (patch) | |
tree | b971609ef6fd3665d7d68352600d22ffe5b05cf8 /lib/PublicInbox/Feed.pm | |
parent | 21f5b7a8bcd942b19475c1c0c265f39dfdf93608 (diff) | |
download | public-inbox-95d4bf7aded41cb3b0040c321d315532f68633e1.tar.gz |
This will let us stream larger Atom documents bodies without wasting too much memory and reduce the amount of round-trip requests needed to get necessary information. Hopefully clients are using streaming (SAX) parsers, too. This is the final transition in the core public-inbox code to allow migrating to a "pull"-based body streaming scheme which allows a HTTP server to respond appropriately to backpressure from slow clients.
Diffstat (limited to 'lib/PublicInbox/Feed.pm')
-rw-r--r-- | lib/PublicInbox/Feed.pm | 188 |
1 files changed, 30 insertions, 158 deletions
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 25fec10a..31d82adb 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -6,26 +6,45 @@ package PublicInbox::Feed; use strict; use warnings; use Email::MIME; -use Date::Parse qw(strptime); -use PublicInbox::Hval qw/ascii_html/; use PublicInbox::View; -use PublicInbox::MID qw/mid_clean mid2path/; -use PublicInbox::Address; -use POSIX qw/strftime/; +use PublicInbox::WwwAtomStream; use constant { - DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # Atom standard MAX_PER_PAGE => 25, # this needs to be tunable }; # main function sub generate { my ($ctx) = @_; - sub { emit_atom($_[0], $ctx) }; + my @paths; + each_recent_blob($ctx, sub { push @paths, $_[0] }); + return _no_thread() unless @paths; + + my $ibx = $ctx->{-inbox}; + PublicInbox::WwwAtomStream->response($ctx, 200, sub { + while (my $path = shift @paths) { + my $mime = do_cat_mail($ibx, $path) or next; + return $mime; + } + }); } sub generate_thread_atom { my ($ctx) = @_; - sub { emit_atom_thread($_[0], $ctx) }; + my $mid = $ctx->{mid}; + my $res = $ctx->{srch}->get_thread($mid); + return _no_thread() unless $res->{total}; + + my $ibx = $ctx->{-inbox}; + my $html_url = $ibx->base_url($ctx->{env}); + $html_url .= PublicInbox::Hval->new_msgid($mid)->{href}; + $ctx->{-html_url} = $html_url; + my $msgs = $res->{msgs}; + PublicInbox::WwwAtomStream->response($ctx, 200, sub { + while (my $msg = shift @$msgs) { + $msg = $ibx->msg_by_smsg($msg) and + return Email::MIME->new($msg); + } + }); } sub generate_html_index { @@ -73,80 +92,8 @@ sub new_html { # private subs -sub title_tag { - my ($title) = @_; - $title =~ tr/\t\n / /s; # squeeze spaces - # try to avoid the type attribute in title: - $title = ascii_html($title); - my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; - "<title$type>$title</title>"; -} - -sub atom_header { - my ($feed_opts, $title) = @_; - - $title = title_tag($feed_opts->{description}) unless (defined $title); - - qq(<?xml version="1.0" encoding="us-ascii"?>\n) . - qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} . - qq{$title} . - qq(<link\nrel="alternate"\ntype="text/html") . - qq(\nhref="$feed_opts->{url}"/>) . - qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) . - qq(<id>mailto:$feed_opts->{id_addr}</id>); -} - -sub emit_atom { - my ($cb, $ctx) = @_; - my $feed_opts = get_feedopts($ctx); - my $fh = $cb->([ 200, ['Content-Type' => 'application/atom+xml']]); - my $max = $ctx->{max} || MAX_PER_PAGE; - my $x = atom_header($feed_opts); - my $ibx = $ctx->{-inbox}; - each_recent_blob($ctx, sub { - my ($path, undef, $ts) = @_; - if (defined $x) { - $fh->write($x . feed_updated(undef, $ts)); - $x = undef; - } - my $s = feed_entry($feed_opts, $path, $ibx) or return 0; - $fh->write($s); - 1; - }); - end_feed($fh); -} - -sub _no_thread { - my ($cb) = @_; - $cb->([404, ['Content-Type', 'text/plain'], - ["No feed found for thread\n"]]); -} - -sub end_feed { - my ($fh) = @_; - $fh->write('</feed>'); - $fh->close; -} - -sub emit_atom_thread { - my ($cb, $ctx) = @_; - my $mid = $ctx->{mid}; - my $res = $ctx->{srch}->get_thread($mid); - return _no_thread($cb) unless $res->{total}; - my $feed_opts = get_feedopts($ctx); - my $fh = $cb->([200, ['Content-Type' => 'application/atom+xml']]); - my $ibx = $ctx->{-inbox}; - my $html_url = $ibx->base_url($ctx->{env}); - $html_url .= PublicInbox::Hval->new_msgid($mid)->{href}; - - $feed_opts->{url} = $html_url; - $feed_opts->{emit_header} = 1; - - foreach my $msg (@{$res->{msgs}}) { - my $s = feed_entry($feed_opts, mid2path($msg->mid), $ibx); - $fh->write($s) if defined $s; - } - end_feed($fh); +sub _no_thread () { + [404, ['Content-Type', 'text/plain'], ["No feed found for thread\n"]]; } sub new_html_footer { @@ -199,7 +146,7 @@ sub each_recent_blob { if ($line =~ /$addmsg/o) { my $add = $1; next if $deleted{$add}; # optimization-only - $nr += $cb->($add, $cur_commit, $ts, $u, $subj); + $cb->($add, $cur_commit, $ts, $u, $subj) and $nr++; if ($nr >= $max) { $last = 1; last; @@ -228,81 +175,6 @@ sub each_recent_blob { ($first_commit, $last_commit); } -# private functions below -sub get_feedopts { - my ($ctx) = @_; - my $inbox = $ctx->{inbox}; - my $obj = $ctx->{-inbox}; - my %rv = ( description => $obj->description ); - - $rv{address} = $obj->{address}; - $rv{id_addr} = $obj->{-primary_address}; - my $url_base = $obj->base_url($ctx->{env}); - if (my $mid = $ctx->{mid}) { # per-thread feed: - $rv{atomurl} = "$url_base$mid/t.atom"; - } else { - $rv{atomurl} = $url_base."new.atom"; - } - $rv{url} ||= $url_base; - $rv{midurl} = $url_base; - - \%rv; -} - -sub feed_updated { - my ($date, $ts) = @_; - my @t = eval { strptime($date) } if defined $date; - @t = gmtime($ts || time) unless scalar @t; - - '<updated>' . strftime(DATEFMT, @t) . '</updated>'; -} - -# returns undef or string -sub feed_entry { - my ($feed_opts, $add, $ibx) = @_; - - my $mime = do_cat_mail($ibx, $add) or return; - my $url = $feed_opts->{url}; - my $midurl = $feed_opts->{midurl}; - - my $header_obj = $mime->header_obj; - my $mid = mid_clean($header_obj->header_raw('Message-ID')); - $mid = PublicInbox::Hval->new_msgid($mid); - my $href = $midurl . $mid->{href}. '/'; - - my $date = $header_obj->header('Date'); - my $updated = feed_updated($date); - - my $title = $header_obj->header('Subject'); - defined $title or return; - $title = title_tag($title); - - my $from = $header_obj->header('From') or return; - my ($email) = PublicInbox::Address::emails($from); - my $name = join(', ',PublicInbox::Address::names($from)); - $name = ascii_html($name); - $email = ascii_html($email); - - my $s = ''; - if (delete $feed_opts->{emit_header}) { - $s .= atom_header($feed_opts, $title) . $updated; - } - $s .= "<entry><author><name>$name</name><email>$email</email>" . - "</author>$title$updated" . - qq{<content\ntype="xhtml">} . - qq{<div\nxmlns="http://www.w3.org/1999/xhtml">} . - qq(<pre\nstyle="white-space:pre-wrap">) . - PublicInbox::View::multipart_text_as_html($mime, $href) . - '</pre>'; - - $add =~ tr!/!!d; - my $h = '[a-f0-9]'; - my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); - my $id = 'urn:uuid:' . join('-', @uuid5); - $s .= qq!</div></content><link\nhref="$href"/>!. - "<id>$id</id></entry>"; -} - sub do_cat_mail { my ($ibx, $path) = @_; my $mime = eval { $ibx->msg_by_path($path) } or return; |