From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.3.2 (2011-06-06) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-3.0 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00, URIBL_BLOCKED shortcircuit=no autolearn=unavailable version=3.3.2 X-Original-To: meta@public-inbox.org Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id AB05C1FD24 for ; Tue, 1 Sep 2015 08:55:29 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 06/11] implement per-thread Atom feeds Date: Tue, 1 Sep 2015 08:55:23 +0000 Message-Id: <1441097728-31950-6-git-send-email-e@80x24.org> In-Reply-To: <1441097728-31950-1-git-send-email-e@80x24.org> References: <1441097728-31950-1-git-send-email-e@80x24.org> List-Id: This allows users to subscribe to only a single thread with their feed reader without subscribing to the rest of the thread. Update our endpoint notes while we're at it. --- Documentation/design_www.txt | 31 ++++++++------- lib/PublicInbox/Feed.pm | 92 ++++++++++++++++++++++++++++++++------------ lib/PublicInbox/View.pm | 3 +- lib/PublicInbox/WWW.pm | 13 +++++++ t/cgi.t | 12 ++++++ 5 files changed, 112 insertions(+), 39 deletions(-) diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt index 55e9268..d25afca 100644 --- a/Documentation/design_www.txt +++ b/Documentation/design_www.txt @@ -6,25 +6,30 @@ URL naming /$LISTNAME/atom.xml -> Atom feed #### Optional, relies on Search::Xapian -/$LISTNAME/t/$MESSAGE_ID.html -> HTML content of thread +/$LISTNAME/t/$MESSAGE_ID/ -> HTML content of thread +/$LISTNAME/t/$MESSAGE_ID/atom -> Atom feed for thread +/$LISTNAME/t/$MESSAGE_ID/mbox.gz -> gzipped mbox of thread ### Stable endpoints -/$LISTNAME/m/$MESSAGE_ID.html -> HTML content (short quotes) -/$LISTNAME/m/$MESSAGE_ID.txt -> raw mbox -/$LISTNAME/m/$MESSAGE_ID -> 301 to .html version -/$LISTNAME/f/$MESSAGE_ID.html -> HTML content (full quotes) -/$LISTNAME/f/$MESSAGE_ID -> 301 to .html version -/$LISTNAME/f/$MESSAGE_ID.txt -> 301 to ../m/$MESSAGE_ID.txt +/$LISTNAME/m/$MESSAGE_ID/ -> HTML content (short quotes) +/$LISTNAME/m/$MESSAGE_ID -> 301 to above +/$LISTNAME/m/$MESSAGE_ID/raw -> raw mbox +/$LISTNAME/f/$MESSAGE_ID/ -> HTML content (full quotes) +/$LISTNAME/f/$MESSAGE_ID -> 301 to above +/$LISTNAME/f/$MESSAGE_ID/raw (*) -> 301 to ../m/$MESSAGE_ID/raw + +### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies) +/$LISTNAME/m/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/ +/$LISTNAME/m/$MESSAGE_ID.txt -> 301 to $MESSAGE_ID/raw +/$LISTNAME/f/$MESSAGE_ID.html -> 301 to $MESSAGE_ID/ +/$LISTNAME/f/$MESSAGE_ID.txt (*) -> 301 to ../m/$MESSAGE_ID/raw + FIXME: we must refactor/cleanup/add tests for most of our CGI before adding more endpoints and features. -Maybe TODO (these might be expensive) -------------------------------------- -/$LISTNAME/t/$MESSAGE_ID.mbox -> mbox content of thread - -We use file name suffixes on all of these (except /) so URLs may easily -cached/memoized using a static file server. +(*) These URLs were never linked, but only exist as a convenience to folks + who edit existing URLs Encoding notes -------------- diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index 3540e9a..1fef984 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -9,7 +9,7 @@ use Date::Parse qw(strptime); use PublicInbox::Hval; use PublicInbox::GitCatFile; use PublicInbox::View; -use PublicInbox::MID qw/mid_clean mid_compress/; +use PublicInbox::MID qw/mid_clean mid_compress mid2path/; use POSIX qw/strftime/; use constant { DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard @@ -25,6 +25,11 @@ sub generate { sub { emit_atom($_[0], $ctx) }; } +sub generate_thread_atom { + my ($ctx) = @_; + sub { emit_atom_thread($_[0], $ctx) }; +} + sub generate_html_index { my ($ctx) = @_; sub { emit_html_index($_[0], $ctx) }; @@ -32,15 +37,22 @@ sub generate_html_index { # private subs -sub atom_header { - my ($feed_opts) = @_; - my $title = $feed_opts->{description}; +sub title_tag { + my ($title) = @_; + # try to avoid the type attribute in title: $title = PublicInbox::Hval->new_oneline($title)->as_html; my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; + "$title"; +} + +sub atom_header { + my ($feed_opts, $title) = @_; + + $title = title_tag($feed_opts->{description}) unless (defined $title); qq(\n) . qq{} . - qq{$title} . + qq{$title} . qq({url}"/>) . qq({atomurl}"/>) . qq(mailto:$feed_opts->{id_addr}); @@ -56,19 +68,50 @@ sub emit_atom { each_recent_blob($ctx, sub { my ($path, undef, $ts) = @_; if (defined $x) { - $fh->write($x . ''. - strftime(DATEFMT, gmtime($ts)) . - ''); + $fh->write($x . '' . + strftime(DATEFMT, gmtime($ts)) . + ''); $x = undef; } add_to_feed($feed_opts, $fh, $path, $git); }); $git = undef; # destroy pipes + _end_feed($fh); +} + +sub _no_thread { + my ($cb) = @_; + my $fh = $cb->([404, ['Content-Type' => 'text/plain']]); + $fh->write("No feed found for thread\n"); + $fh->close; +} + +sub _end_feed { + my ($fh) = @_; Email::Address->purge_cache; - $fh->write(""); + $fh->write(''); $fh->close; } +sub emit_atom_thread { + my ($cb, $ctx) = @_; + my $res = $ctx->{srch}->get_thread($ctx->{mid}); + return _no_thread($cb) unless $res->{total}; + my $fh = $cb->([200, ['Content-Type' => 'application/xml']]); + my $feed_opts = get_feedopts($ctx); + + my $html_url = $feed_opts->{atomurl} = $ctx->{self_url}; + $html_url =~ s!/atom\z!/!; + $feed_opts->{url} = $html_url; + $feed_opts->{emit_header} = 1; + + my $git = PublicInbox::GitCatFile->new($ctx->{git_dir}); + foreach my $msg (@{$res->{msgs}}) { + add_to_feed($feed_opts, $fh, mid2path($msg->mid), $git); + } + $git = undef; # destroy pipes + _end_feed($fh); +} sub emit_html_index { my ($cb, $ctx) = @_; @@ -233,7 +276,6 @@ sub get_feedopts { my $url_base; if ($cgi) { - my $path_info = $cgi->path_info; my $base; if (ref($cgi) eq 'CGI') { $base = $cgi->url(-base); @@ -241,13 +283,11 @@ sub get_feedopts { $base = $cgi->base->as_string; $base =~ s!/\z!!; } - $url_base = $path_info; - if ($url_base =~ s!/(?:|index\.html)?\z!!) { - $rv{atomurl} = "$base$url_base/atom.xml"; + $url_base = "$base/$listname"; + if (my $mid = $ctx->{mid}) { # per-thread feed: + $rv{atomurl} = "$url_base/t/$mid/atom"; } else { - $url_base =~ s!/atom\.xml\z!!; - $rv{atomurl} = $base . $path_info; - $url_base = $base . $url_base; # XXX is this needed? + $rv{atomurl} = "$url_base/atom.xml"; } } else { $url_base = "http://example.com"; @@ -288,9 +328,12 @@ sub add_to_feed { defined($content) or return 0; $mime = undef; + my $date = $header_obj->header('Date'); + $date = PublicInbox::Hval->new_oneline($date); + $date = feed_date($date->raw) or return 0; + my $title = mime_header($header_obj, 'Subject') or return 0; - $title = PublicInbox::Hval->new_oneline($title)->as_html; - my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : ''; + $title = title_tag($title); my $from = mime_header($header_obj, 'From') or return 0; my @from = Email::Address->parse($from) or return 0; @@ -298,13 +341,12 @@ sub add_to_feed { my $email = $from[0]->address; $email = PublicInbox::Hval->new_oneline($email)->as_html; - my $date = $header_obj->header('Date'); - $date = PublicInbox::Hval->new_oneline($date); - $date = feed_date($date->raw) or return 0; - + if (delete $feed_opts->{emit_header}) { + $fh->write(atom_header($feed_opts, $title) . + "$date"); + } $fh->write("$name$email" . - "$title" . - "$date" . + "$title$date" . qq{} . qq{}); $fh->write($content); @@ -313,7 +355,7 @@ sub add_to_feed { my $h = '[a-f0-9]'; my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o); my $id = 'urn:uuid:' . join('-', @uuid5); - my $midurl = $feed_opts->{midurl} || 'http://example.com/m/'; + my $midurl = $feed_opts->{midurl}; $fh->write(qq{}. "$id"); 1; diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm index 477c4b6..a30bf70 100644 --- a/lib/PublicInbox/View.pm +++ b/lib/PublicInbox/View.pm @@ -160,7 +160,8 @@ sub emit_thread_html { my $next = ""; $next .= $final_anchor == 1 ? 'only message in' : 'end of'; $next .= " thread, back to index\n"; - $next .= "download: mbox.gz\n\n"; + $next .= "download: mbox.gz"; + $next .= " / Atom feed\n\n"; $fh->write("
" . PRE_WRAP . $next . $foot . ""); $fh->close; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index e6eec3d..c99c25f 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -56,6 +56,9 @@ sub run { invalid_list_mid(\%ctx, $1, $2) || get_thread_mbox(\%ctx, $sfx); + } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) { + invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx); + # legacy redirects } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) { my $pfx = $2; @@ -348,4 +351,14 @@ sub get_thread_mbox { PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx); } + +# /$LISTNAME/t/$MESSAGE_ID/atom -> thread as Atom feed +sub get_thread_atom { + my ($ctx) = @_; + searcher($ctx) or return need_search($ctx); + $ctx->{self_url} = self_url($ctx->{cgi}); + require PublicInbox::Feed; + PublicInbox::Feed::generate_thread_atom($ctx); +} + 1; diff --git a/t/cgi.t b/t/cgi.t index fc28ae3..d84e634 100644 --- a/t/cgi.t +++ b/t/cgi.t @@ -200,6 +200,18 @@ EOF } else { like($res->{head}, qr/^Status: 501 /, "search not available"); } + + my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed; + if ($have_xml_feed) { + $path = "/test/t/blahblah%40example.com/atom"; + $res = cgi_run($path); + like($res->{head}, qr/^Status: 200 /, "atom returned 200"); + like($res->{head}, qr!^Content-Type: application/xml!m, + "search returned atom"); + my $p = XML::Feed->parse(\($res->{body})); + is($p->format, "Atom", "parsed atom feed"); + is(scalar $p->entries, 3, "parsed three entries"); + } } # redirect list-name-only URLs -- EW