about summary refs log tree commit homepage
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2015-09-01 08:55:23 +0000
committerEric Wong <e@80x24.org>2015-09-01 08:56:04 +0000
commitd44ed46ee92c78aaaed64975c4d6846613963be4 (patch)
tree0a7546dc1b8b375e49fbca495f7077535dc464c0
parent864dc1c4c61d240651a9f48983e0bf00147a3953 (diff)
downloadpublic-inbox-d44ed46ee92c78aaaed64975c4d6846613963be4.tar.gz
This allows users to subscribe to only a single thread
with their feed reader without subscribing to the rest of
the thread.

Update our endpoint notes while we're at it.
-rw-r--r--Documentation/design_www.txt31
-rw-r--r--lib/PublicInbox/Feed.pm92
-rw-r--r--lib/PublicInbox/View.pm3
-rw-r--r--lib/PublicInbox/WWW.pm13
-rw-r--r--t/cgi.t12
5 files changed, 112 insertions, 39 deletions
diff --git a/Documentation/design_www.txt b/Documentation/design_www.txt
index 55e9268c..d25afca3 100644
--- a/Documentation/design_www.txt
+++ b/Documentation/design_www.txt
@@ -6,25 +6,30 @@ URL naming
 /$LISTNAME/atom.xml                             -> Atom feed
 
 #### Optional, relies on Search::Xapian
-/$LISTNAME/t/$MESSAGE_ID.html                   -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/                       -> HTML content of thread
+/$LISTNAME/t/$MESSAGE_ID/atom                   -> Atom feed for thread
+/$LISTNAME/t/$MESSAGE_ID/mbox.gz                -> gzipped mbox of thread
 
 ### Stable endpoints
-/$LISTNAME/m/$MESSAGE_ID.html                   -> HTML content (short quotes)
-/$LISTNAME/m/$MESSAGE_ID.txt                    -> raw mbox
-/$LISTNAME/m/$MESSAGE_ID                        -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.html                   -> HTML content (full quotes)
-/$LISTNAME/f/$MESSAGE_ID                        -> 301 to .html version
-/$LISTNAME/f/$MESSAGE_ID.txt                    -> 301 to ../m/$MESSAGE_ID.txt
+/$LISTNAME/m/$MESSAGE_ID/                       -> HTML content (short quotes)
+/$LISTNAME/m/$MESSAGE_ID                        -> 301 to above
+/$LISTNAME/m/$MESSAGE_ID/raw                    -> raw mbox
+/$LISTNAME/f/$MESSAGE_ID/                       -> HTML content (full quotes)
+/$LISTNAME/f/$MESSAGE_ID                        -> 301 to above
+/$LISTNAME/f/$MESSAGE_ID/raw (*)                -> 301 to ../m/$MESSAGE_ID/raw
+
+### Legacy endpoints (may be ambiguous given Message-IDs with similar suffies)
+/$LISTNAME/m/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
+/$LISTNAME/m/$MESSAGE_ID.txt                    -> 301 to $MESSAGE_ID/raw
+/$LISTNAME/f/$MESSAGE_ID.html                   -> 301 to $MESSAGE_ID/
+/$LISTNAME/f/$MESSAGE_ID.txt (*)                -> 301 to ../m/$MESSAGE_ID/raw
+
 
 FIXME: we must refactor/cleanup/add tests for most of our CGI before
 adding more endpoints and features.
 
-Maybe TODO (these might be expensive)
--------------------------------------
-/$LISTNAME/t/$MESSAGE_ID.mbox                   -> mbox content of thread
-
-We use file name suffixes on all of these (except /) so URLs may easily
-cached/memoized using a static file server.
+(*) These URLs were never linked, but only exist as a convenience to folks
+    who edit existing URLs
 
 Encoding notes
 --------------
diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm
index 3540e9a4..1fef9849 100644
--- a/lib/PublicInbox/Feed.pm
+++ b/lib/PublicInbox/Feed.pm
@@ -9,7 +9,7 @@ use Date::Parse qw(strptime);
 use PublicInbox::Hval;
 use PublicInbox::GitCatFile;
 use PublicInbox::View;
-use PublicInbox::MID qw/mid_clean mid_compress/;
+use PublicInbox::MID qw/mid_clean mid_compress mid2path/;
 use POSIX qw/strftime/;
 use constant {
         DATEFMT => '%Y-%m-%dT%H:%M:%SZ', # atom standard
@@ -25,6 +25,11 @@ sub generate {
         sub { emit_atom($_[0], $ctx) };
 }
 
+sub generate_thread_atom {
+        my ($ctx) = @_;
+        sub { emit_atom_thread($_[0], $ctx) };
+}
+
 sub generate_html_index {
         my ($ctx) = @_;
         sub { emit_html_index($_[0], $ctx) };
@@ -32,15 +37,22 @@ sub generate_html_index {
 
 # private subs
 
-sub atom_header {
-        my ($feed_opts) = @_;
-        my $title = $feed_opts->{description};
+sub title_tag {
+        my ($title) = @_;
+        # try to avoid the type attribute in title:
         $title = PublicInbox::Hval->new_oneline($title)->as_html;
         my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+        "<title$type>$title</title>";
+}
+
+sub atom_header {
+        my ($feed_opts, $title) = @_;
+
+        $title = title_tag($feed_opts->{description}) unless (defined $title);
 
         qq(<?xml version="1.0" encoding="us-ascii"?>\n) .
         qq{<feed\nxmlns="http://www.w3.org/2005/Atom">} .
-        qq{<title$type>$title</title>} .
+        qq{$title} .
         qq(<link\nhref="$feed_opts->{url}"/>) .
         qq(<link\nrel="self"\nhref="$feed_opts->{atomurl}"/>) .
         qq(<id>mailto:$feed_opts->{id_addr}</id>);
@@ -56,19 +68,50 @@ sub emit_atom {
         each_recent_blob($ctx, sub {
                 my ($path, undef, $ts) = @_;
                 if (defined $x) {
-                        $fh->write($x . '<updated>'.
-                                        strftime(DATEFMT, gmtime($ts)) .
-                                        '</updated>');
+                        $fh->write($x . '<updated>' .
+                                   strftime(DATEFMT, gmtime($ts)) .
+                                   '</updated>');
                         $x = undef;
                 }
                 add_to_feed($feed_opts, $fh, $path, $git);
         });
         $git = undef; # destroy pipes
+        _end_feed($fh);
+}
+
+sub _no_thread {
+        my ($cb) = @_;
+        my $fh = $cb->([404, ['Content-Type' => 'text/plain']]);
+        $fh->write("No feed found for thread\n");
+        $fh->close;
+}
+
+sub _end_feed {
+        my ($fh) = @_;
         Email::Address->purge_cache;
-        $fh->write("</feed>");
+        $fh->write('</feed>');
         $fh->close;
 }
 
+sub emit_atom_thread {
+        my ($cb, $ctx) = @_;
+        my $res = $ctx->{srch}->get_thread($ctx->{mid});
+        return _no_thread($cb) unless $res->{total};
+        my $fh = $cb->([200, ['Content-Type' => 'application/xml']]);
+        my $feed_opts = get_feedopts($ctx);
+
+        my $html_url = $feed_opts->{atomurl} = $ctx->{self_url};
+        $html_url =~ s!/atom\z!/!;
+        $feed_opts->{url} = $html_url;
+        $feed_opts->{emit_header} = 1;
+
+        my $git = PublicInbox::GitCatFile->new($ctx->{git_dir});
+        foreach my $msg (@{$res->{msgs}}) {
+                add_to_feed($feed_opts, $fh, mid2path($msg->mid), $git);
+        }
+        $git = undef; # destroy pipes
+        _end_feed($fh);
+}
 
 sub emit_html_index {
         my ($cb, $ctx) = @_;
@@ -233,7 +276,6 @@ sub get_feedopts {
 
         my $url_base;
         if ($cgi) {
-                my $path_info = $cgi->path_info;
                 my $base;
                 if (ref($cgi) eq 'CGI') {
                         $base = $cgi->url(-base);
@@ -241,13 +283,11 @@ sub get_feedopts {
                         $base = $cgi->base->as_string;
                         $base =~ s!/\z!!;
                 }
-                $url_base = $path_info;
-                if ($url_base =~ s!/(?:|index\.html)?\z!!) {
-                        $rv{atomurl} = "$base$url_base/atom.xml";
+                $url_base = "$base/$listname";
+                if (my $mid = $ctx->{mid}) { # per-thread feed:
+                        $rv{atomurl} = "$url_base/t/$mid/atom";
                 } else {
-                        $url_base =~ s!/atom\.xml\z!!;
-                        $rv{atomurl} = $base . $path_info;
-                        $url_base = $base . $url_base; # XXX is this needed?
+                        $rv{atomurl} = "$url_base/atom.xml";
                 }
         } else {
                 $url_base = "http://example.com";
@@ -288,9 +328,12 @@ sub add_to_feed {
         defined($content) or return 0;
         $mime = undef;
 
+        my $date = $header_obj->header('Date');
+        $date = PublicInbox::Hval->new_oneline($date);
+        $date = feed_date($date->raw) or return 0;
+
         my $title = mime_header($header_obj, 'Subject') or return 0;
-        $title = PublicInbox::Hval->new_oneline($title)->as_html;
-        my $type = index($title, '&') >= 0 ? "\ntype=\"html\"" : '';
+        $title = title_tag($title);
 
         my $from = mime_header($header_obj, 'From') or return 0;
         my @from = Email::Address->parse($from) or return 0;
@@ -298,13 +341,12 @@ sub add_to_feed {
         my $email = $from[0]->address;
         $email = PublicInbox::Hval->new_oneline($email)->as_html;
 
-        my $date = $header_obj->header('Date');
-        $date = PublicInbox::Hval->new_oneline($date);
-        $date = feed_date($date->raw) or return 0;
-
+        if (delete $feed_opts->{emit_header}) {
+                $fh->write(atom_header($feed_opts, $title) .
+                           "<updated>$date</updated>");
+        }
         $fh->write("<entry><author><name>$name</name><email>$email</email>" .
-                   "</author><title$type>$title</title>" .
-                   "<updated>$date</updated>" .
+                   "</author>$title$date" .
                    qq{<content\ntype="xhtml">} .
                    qq{<div\nxmlns="http://www.w3.org/1999/xhtml">});
         $fh->write($content);
@@ -313,7 +355,7 @@ sub add_to_feed {
         my $h = '[a-f0-9]';
         my (@uuid5) = ($add =~ m!\A($h{8})($h{4})($h{4})($h{4})($h{12})!o);
         my $id = 'urn:uuid:' . join('-', @uuid5);
-        my $midurl = $feed_opts->{midurl} || 'http://example.com/m/';
+        my $midurl = $feed_opts->{midurl};
         $fh->write(qq{</div></content><link\nhref="$midurl$href"/>}.
                    "<id>$id</id></entry>");
         1;
diff --git a/lib/PublicInbox/View.pm b/lib/PublicInbox/View.pm
index 477c4b6f..a30bf70a 100644
--- a/lib/PublicInbox/View.pm
+++ b/lib/PublicInbox/View.pm
@@ -160,7 +160,8 @@ sub emit_thread_html {
         my $next = "<a\nid=\"s$final_anchor\">";
         $next .= $final_anchor == 1 ? 'only message in' : 'end of';
         $next .= " thread</a>, back to <a\nhref=\"../../\">index</a>\n";
-        $next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>\n\n";
+        $next .= "download: <a\nhref=\"mbox.gz\">mbox.gz</a>";
+        $next .= " / <a\nhref=\"atom\">Atom feed</a>\n\n";
         $fh->write("<hr />" . PRE_WRAP . $next . $foot .
                    "</pre></body></html>");
         $fh->close;
diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm
index e6eec3d6..c99c25f8 100644
--- a/lib/PublicInbox/WWW.pm
+++ b/lib/PublicInbox/WWW.pm
@@ -56,6 +56,9 @@ sub run {
                 invalid_list_mid(\%ctx, $1, $2) ||
                         get_thread_mbox(\%ctx, $sfx);
 
+        } elsif ($path_info =~ m!$LISTNAME_RE/t/(\S+)/atom\z!o) {
+                invalid_list_mid(\%ctx, $1, $2) || get_thread_atom(\%ctx);
+
         # legacy redirects
         } elsif ($path_info =~ m!$LISTNAME_RE/(t|m|f)/(\S+)\.html\z!o) {
                 my $pfx = $2;
@@ -348,4 +351,14 @@ sub get_thread_mbox {
         PublicInbox::Mbox::thread_mbox($ctx, $srch, $sfx);
 }
 
+
+# /$LISTNAME/t/$MESSAGE_ID/atom                  -> thread as Atom feed
+sub get_thread_atom {
+        my ($ctx) = @_;
+        searcher($ctx) or return need_search($ctx);
+        $ctx->{self_url} = self_url($ctx->{cgi});
+        require PublicInbox::Feed;
+        PublicInbox::Feed::generate_thread_atom($ctx);
+}
+
 1;
diff --git a/t/cgi.t b/t/cgi.t
index fc28ae30..d84e6348 100644
--- a/t/cgi.t
+++ b/t/cgi.t
@@ -200,6 +200,18 @@ EOF
         } else {
                 like($res->{head}, qr/^Status: 501 /, "search not available");
         }
+
+        my $have_xml_feed = eval { require XML::Feed; 1 } if $indexed;
+        if ($have_xml_feed) {
+                $path = "/test/t/blahblah%40example.com/atom";
+                $res = cgi_run($path);
+                like($res->{head}, qr/^Status: 200 /, "atom returned 200");
+                like($res->{head}, qr!^Content-Type: application/xml!m,
+                        "search returned atom");
+                my $p = XML::Feed->parse(\($res->{body}));
+                is($p->format, "Atom", "parsed atom feed");
+                is(scalar $p->entries, 3, "parsed three entries");
+        }
 }
 
 # redirect list-name-only URLs