diff options
-rw-r--r-- | Makefile.PL | 10 | ||||
-rw-r--r-- | lib/PublicInbox/Feed.pm | 42 | ||||
-rwxr-xr-x | public-inbox-cgi | 92 | ||||
-rw-r--r-- | t/cgi.t | 153 |
4 files changed, 282 insertions, 15 deletions
diff --git a/Makefile.PL b/Makefile.PL index 5d3ee75c..6b2e16ff 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -8,14 +8,20 @@ WriteMakefile( VERSION => '0.0.0', AUTHOR => 'Eric Wong <normalperson@yhbt.net>', ABSTRACT => 'public-inbox.org infrastructure', - EXE_FILES => [qw/public-inbox-mda/], + EXE_FILES => [qw/public-inbox-mda public-inbox-cgi/], PREREQ_PM => { - # note: we use ssoma(1) and spamc(1), + # note: we use ssoma(1) and spamc(1), # NOT the Perl modules + # We also depend on git through ssoma. 'Email::MIME' => 0, 'Email::MIME::ContentType' => 0, 'Email::Filter' => 0, 'Email::Address' => 0, + 'Date::Parse' => 0, + 'Encode::MIME::Header' => 0, + 'XML::Atom::SimpleFeed' => 0, + # We have more test dependencies, but do not force + # users to install them }, ); diff --git a/lib/PublicInbox/Feed.pm b/lib/PublicInbox/Feed.pm index da877f3f..704effce 100644 --- a/lib/PublicInbox/Feed.pm +++ b/lib/PublicInbox/Feed.pm @@ -16,22 +16,23 @@ use constant DATEFMT => '%Y-%m-%dT%H:%M:%SZ'; our $dt_parser = DateTime::Format::Mail->new(loose => 1); # main function +# FIXME: takes too many args, cleanup sub generate { - my ($class, $git_dir, $max) = @_; + my ($class, $git_dir, $max, $pi_config, $listname, $cgi, $top) = @_; $max ||= 25; local $ENV{GIT_DIR} = $git_dir; - my $feed_opts = get_feedopts(); + my $feed_opts = get_feedopts($pi_config, $listname, $cgi); my $feed = XML::Atom::SimpleFeed->new( - title => $feed_opts->{title}, + title => $feed_opts->{description} || "unnamed feed", link => $feed_opts->{url} || "http://example.com/", link => { rel => 'self', - href => $feed_opts->{atomUrl} || + href => $feed_opts->{atomurl} || "http://example.com/atom", }, - id => $feed_opts->{email} || 'public-inbox@example.com', + id => $feed_opts->{address} || 'public-inbox@example.com', updated => strftime(DATEFMT, gmtime), ); @@ -48,12 +49,13 @@ sub generate { if ($line =~ /^:000000 100644 0{40} ([a-f0-9]{40})/) { my $add = $1; next if $deleted{$add}; - $nr += add_to_feed($feed_opts, $feed, $add); + $nr += add_to_feed($feed_opts, $feed, $add, $top); last if $nr >= $max; } elsif ($line =~ /^:100644 000000 ([a-f0-9]{40}) 0{40}/) { $deleted{$1} = 1; } } + close $log; $feed->as_string; @@ -61,12 +63,22 @@ sub generate { # private functions below sub get_feedopts { + my ($pi_config, $listname, $cgi) = @_; my %rv; - foreach my $key (qw(title url atomUrl email)) { - my $tmp = `git config publicInboxFeed.$key`; - chomp $tmp; - $rv{$key} = $tmp; + if ($pi_config && defined $listname && length $listname) { + foreach my $key (qw(description address url atomurl midurl)) { + $rv{$key} = $pi_config->get($listname, $key); + } + } + if ($cgi) { + my $cgi_url = $cgi->self_url; + my $url_base = $cgi_url; + $url_base =~ s!/?(?:index|all)\.atom\.xml\z!!; + $rv{url} ||= "$url_base/"; + $rv{midurl} ||= "$url_base/mid/%s.html"; + $rv{atomurl} = $cgi_url; } + \%rv; } @@ -83,7 +95,7 @@ sub feed_date { # returns 0 (skipped) or 1 (added) sub add_to_feed { - my ($feed_opts, $feed, $add) = @_; + my ($feed_opts, $feed, $add, $top) = @_; # we can use git cat-file --batch if performance becomes a # problem, but I doubt it... @@ -91,10 +103,14 @@ sub add_to_feed { return 0 if $? != 0; my $mime = Email::MIME->new($str); + if ($top && $mime->header("In-Reply-To")) { + return 0; + } + my $content = msg_content($mime); defined($content) or return 0; - my $mid_url = $feed_opts->{mid_url} || "http://example.com/mid/%s"; + my $midurl = $feed_opts->{midurl} || "http://example.com/mid/%s.html"; my $mid = utf8_header($mime, "Message-ID") or return 0; $mid =~ s/\A<//; $mid =~ s/>\z//; @@ -110,7 +126,7 @@ sub add_to_feed { my $email = $from[0]->address; defined $email or $email = ""; - my $url = sprintf($mid_url, uri_escape($mid)); + my $url = sprintf($midurl, uri_escape($mid)); my $date = utf8_header($mime, "Date"); $date or return 0; $date = feed_date($date) or return 0; diff --git a/public-inbox-cgi b/public-inbox-cgi new file mode 100755 index 00000000..cfcf3feb --- /dev/null +++ b/public-inbox-cgi @@ -0,0 +1,92 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# We focus on the lowest common denominators here: +# - targeted at text-only console browsers (lynx, w3m, etc..) +# - Only basic HTML, CSS only for line-wrapping <pre> text content for GUIs +# - No JavaScript, graphics or icons allowed. +# - Must not rely on static content +# - UTF-8 is only for user-content, 7-bit US-ASCII for us + +use 5.008; +use strict; +use warnings; +use CGI qw(:cgi :escapeHTML -nosticky); # PSGI/FastCGI/mod_perl compat +use CGI::Util qw(unescape); +use Encode; +use PublicInbox::Config; +our $LISTNAME_RE = qr!\A/([\w\.\-]+)!; +our $pi_config; +BEGIN { + $pi_config = PublicInbox::Config->new; + # TODO: detect and reload config as needed + if ($ENV{MOD_PERL}) { + CGI->compile; + } +} + +sub main { + my $cgi = CGI->new; + if ($cgi->request_method !~ /\AGET|HEAD\z/) { + return r($cgi, "405 Method Not Allowed"); + } + my $path_info = decode_utf8($ENV{PATH_INFO}); + if ($path_info eq "/") { + r($cgi, "404 Not Found"); + } elsif ($path_info =~ m!$LISTNAME_RE/?\z!o) { + get_list_log($cgi, $1); + } elsif ($path_info =~ m!$LISTNAME_RE/all\z!o) { + get_list_all($cgi, $1); + } elsif ($path_info =~ m!$LISTNAME_RE/index\.atom\.xml\z!o) { + get_atom_index($cgi, $1); + } elsif ($path_info =~ m!$LISTNAME_RE/all\.atom\.xml\z!o) { + get_atom_all($cgi, $1); + } elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\.txt\z!o) { + get_mid_txt($cgi, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\.html\z!o) { + get_mid_html($cgi, $1, $2); + } elsif ($path_info =~ m!$LISTNAME_RE/mid/(\S+)\z!o) { + redirect_mid_html($cgi, $1, $2); + } else { + r($cgi, "404 Not Found"); + } +} + +binmode STDOUT, ':utf8'; +main(); + +# simple response for errors +sub r { + print $_[0]->header(-type => "text/plain", + -status => $_[1], + -charset => 'utf-8'); +} + +# /$LISTNAME/all.atom.xml -> Atom feed, includes replies +sub get_atom_all { + my ($cgi, $listname) = @_; + my $git_dir = $pi_config->get($listname, "mainrepo"); + defined $git_dir or return r($cgi, "404 Not Found"); + + require PublicInbox::Feed; + print $cgi->header(-type => "application/xml", -charset => 'us-ascii', + -status => '200 OK'); + + print PublicInbox::Feed->generate($git_dir, undef, + $pi_config, $listname, $cgi); +} + +# /$LISTNAME/index.atom.xml -> Atom feed +sub get_atom_index { + my ($cgi, $listname) = @_; + my $git_dir = $pi_config->get($listname, "mainrepo"); + defined $git_dir or return r($cgi, "404 Not Found"); + + require PublicInbox::Feed; + print $cgi->header(-type => "application/xml", -charset => 'us-ascii', + -status => '200 OK'); + + print PublicInbox::Feed->generate($git_dir, undef, + $pi_config, $listname, $cgi, 1); +} diff --git a/t/cgi.t b/t/cgi.t new file mode 100644 index 00000000..f359cf6e --- /dev/null +++ b/t/cgi.t @@ -0,0 +1,153 @@ +# Copyright (C) 2014, Eric Wong <normalperson@yhbt.net> and all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +use strict; +use warnings; +use Test::More; +use Email::MIME; +use File::Temp qw/tempdir/; +use Cwd; +use IPC::Run qw/run/; + +use constant CGI => "blib/script/public-inbox-cgi"; +my $mda = "blib/script/public-inbox-mda"; +my $tmpdir = tempdir(CLEANUP => 1); +my $home = "$tmpdir/pi-home"; +my $pi_home = "$home/.public-inbox"; +my $pi_config = "$pi_home/config"; +my $maindir = "$tmpdir/main.git"; +my $main_bin = getcwd()."/t/main-bin"; +my $main_path = "$main_bin:$ENV{PATH}"; # for spamc ham mock +my $addr = 'test-public@example.com'; +my $cfgpfx = "publicinbox.test"; + +{ + ok(-x "$main_bin/spamc", + "spamc ham mock found (run in top of source tree"); + ok(-x $mda, "$mda is executable"); + is(1, mkdir($home, 0755), "setup ~/ for testing"); + is(1, mkdir($pi_home, 0755), "setup ~/.public-inbox"); + is(0, system(qw(git init -q --bare), $maindir), "git init (main)"); + + my %cfg = ( + "$cfgpfx.address" => $addr, + "$cfgpfx.mainrepo" => $maindir, + "$cfgpfx.description" => 'test for public-inbox', + ); + while (my ($k,$v) = each %cfg) { + is(0, system(qw(git config --file), $pi_config, $k, $v), + "setup $k"); + } +} + +{ + my $failbox = "$home/fail.mbox"; + local $ENV{PI_FAILBOX} = $failbox; + local $ENV{HOME} = $home; + local $ENV{RECIPIENT} = $addr; + + # ensure successful message delivery + { + my $simple = Email::Simple->new(<<EOF); +From: Me <me\@example.com> +To: You <you\@example.com> +Cc: $addr +Message-Id: <blah\@example.com> +Subject: hihi +Date: Thu, 01 Jan 1970 00:00:00 +0000 + +zzzzzz +EOF + my $in = $simple->as_string; + run_with_env({PATH => $main_path}, [$mda], \$in); + local $ENV{GIT_DIR} = $maindir; + my $rev = `git rev-list HEAD`; + like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); + } + + # deliver a reply, too + { + my $reply = Email::Simple->new(<<EOF); +From: You <you\@example.com> +To: Me <me\@example.com> +Cc: $addr +In-Reply-To: <blah\@example.com> +Message-Id: <blahblah\@example.com> +Subject: Re: hihi +Date: Thu, 01 Jan 1970 00:00:01 +0000 + +Me wrote: +> zzzzzz + +what? +EOF + my $in = $reply->as_string; + run_with_env({PATH => $main_path}, [$mda], \$in); + local $ENV{GIT_DIR} = $maindir; + my $rev = `git rev-list HEAD`; + like($rev, qr/\A[a-f0-9]{40}/, "good revision committed"); + } + +} + +# obvious failures, first +{ + local $ENV{HOME} = $home; + my $res = cgi_run("/", "", "PUT"); + like($res->{head}, qr/Status:\s*405/i, "PUT not allowed"); + + $res = cgi_run("/"); + like($res->{head}, qr/Status:\s*404/i, "index returns 404"); +} + +# atom feeds +{ + local $ENV{HOME} = $home; + my $res = cgi_run("/test/all.atom.xml"); + like($res->{body}, qr/<title>test for public-inbox/, + "set title in XML feed"); + like($res->{body}, + qr!http://test\.example\.com/test/mid/blah%40example\.com!, + "link id set"); + like($res->{body}, qr/what\?/, "reply included"); + + $res = cgi_run("/test/index.atom.xml"); + unlike($res->{body}, qr/what\?/, "reply not included in index"); +} + +# indices +{ + local $ENV{HOME} = $home; + my $res = cgi_run("/test/all.atom.xml"); + like($res->{body}, qr/<title>test for public-inbox/, + "set title in XML feed"); + like($res->{body}, + qr!http://test\.example\.com/test/mid/blah%40example\.com!, + "link id set"); + like($res->{body}, qr/what\?/, "reply included"); + + $res = cgi_run("/test/index.atom.xml"); + unlike($res->{body}, qr/what\?/, "reply not included in index"); +} + +done_testing(); + +sub run_with_env { + my ($env, @args) = @_; + my $init = sub { foreach my $k (keys %$env) { $ENV{$k} = $env->{$k} } }; + run(@args, init => $init); +} + +sub cgi_run { + my %env = ( + PATH_INFO => $_[0], + QUERY_STRING => $_[1] || "", + REQUEST_METHOD => $_[2] || "GET", + GATEWAY_INTERFACE => 'CGI/1.1', + HTTP_ACCEPT => '*/*', + HTTP_HOST => 'test.example.com', + ); + my ($in, $out, $err) = ("", "", ""); + my $rc = run_with_env(\%env, [CGI], \$in, \$out, \$err); + my ($head, $body) = split(/\r\n\r\n/, $out, 2); + { head => $head, body => $body, rc => $rc, err => $err } +} |