about summary refs log tree commit homepage
path: root/script
diff options
Diffstat (limited to 'script')
7 files changed, 569 insertions, 0 deletions
diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd
new file mode 100755
index 00000000..6109af01
--- /dev/null
+++ b/script/public-inbox-httpd
@@ -0,0 +1,129 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Standalone HTTP server for public-inbox.
+use strict;
+use warnings;
+use Plack::Util;
+use PublicInbox::Daemon;
+use PublicInbox::HTTP;
+use Plack::Request;
+use Plack::Builder;
+my %httpds;
+my $app;
+my $refresh = sub {
+        if (@ARGV) {
+                eval { $app = Plack::Util::load_psgi(@ARGV) };
+                if ($@) {
+                        die $@,
+"$0 runs in /, command-line paths must be absolute\n";
+                }
+        } else {
+                require PublicInbox::WWW;
+                PublicInbox::WWW->preload;
+                my $www = PublicInbox::WWW->new;
+                $app = eval {
+                        my $deflate_types = eval {
+                                require Plack::Middleware::Deflater;
+                                [ 'text/html', 'text/plain',
+                                        'application/atom+xml' ]
+                        };
+                        builder {
+                                enable 'Chunked';
+                                if ($deflate_types) {
+                                        enable 'Deflater',
+                                                content_type => $deflate_types
+                                }
+                                enable 'ReverseProxy';
+                                enable 'Head';
+                                sub { $www->call(@_) };
+                        };
+                };
+        }
+daemon_run('', $refresh,
+        sub ($$$) { # post_accept
+                my ($client, $addr, $srv) = @_;
+                my $fd = fileno($srv);
+                my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app);
+                PublicInbox::HTTP->new($client, $addr, $h),
+        });
+package PublicInbox::HTTPD::Async;
+use strict;
+use warnings;
+use base qw(Danga::Socket);
+use fields qw(cb);
+sub new {
+        my ($class, $io, $cb) = @_;
+        my $self = fields::new($class);
+        $io->blocking(0);
+        $self->SUPER::new($io);
+        $self->{cb} = $cb;
+        $self->watch_read(1);
+        $self;
+sub event_read { $_[0]->{cb}->() }
+sub event_hup { $_[0]->{cb}->() }
+sub event_err { $_[0]->{cb}->() }
+sub sysread { shift->{sock}->sysread(@_) }
+package PublicInbox::HTTPD;
+use strict;
+use warnings;
+use Plack::Util;
+sub pi_httpd_async {
+        my ($io, $cb) = @_;
+        PublicInbox::HTTPD::Async->new($io, $cb);
+sub new {
+        my ($class, $sock, $app) = @_;
+        my $n = getsockname($sock) or die "not a socket: $sock $!\n";
+        my ($port, $addr);
+        if (length($n) >= 28) {
+                require Socket6;
+                ($port, $addr) = Socket6::unpack_sockaddr_in6($n);
+        } else {
+                ($port, $addr) = Socket::unpack_sockaddr_in($n);
+        }
+        my %env = (
+                REMOTE_HOST => '',
+                REMOTE_PORT => 0,
+                SERVER_NAME => $addr,
+                SERVER_PORT => $port,
+                SCRIPT_NAME => '',
+                'psgi.version' => [ 1, 1 ],
+                'psgi.errors' => \*STDERR,
+                'psgi.url_scheme' => 'http',
+                'psgi.nonblocking' => Plack::Util::TRUE,
+                'psgi.streaming' => Plack::Util::TRUE,
+                'psgi.run_once'         => Plack::Util::FALSE,
+                'psgi.multithread' => Plack::Util::FALSE,
+                'psgi.multiprocess' => Plack::Util::TRUE,
+                'psgix.harakiri'=> Plack::Util::FALSE,
+                'psgix.input.buffered' => Plack::Util::TRUE,
+                'pi-httpd.async' => do {
+                        no warnings 'once';
+                        *pi_httpd_async
+                },
+        );
+        bless {
+                err => \*STDERR,
+                out => \*STDOUT,
+                app => $app,
+                env => \%env,
+        }, $class;
diff --git a/script/public-inbox-index b/script/public-inbox-index
new file mode 100755
index 00000000..578d91d5
--- /dev/null
+++ b/script/public-inbox-index
@@ -0,0 +1,65 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Basic tool to create a Xapian search index for a git repository
+# configured for public-inbox.
+# Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/>
+# highly recommended: eatmydata public-inbox-index GIT_DIR
+use strict;
+use warnings;
+my $usage = "public-inbox-index GIT_DIR";
+use PublicInbox::Config;
+eval { require PublicInbox::SearchIdx };
+if ($@) {
+        print STDERR "Search::Xapian required for $0\n";
+        exit 1;
+my @dirs;
+sub resolve_git_dir {
+        my ($cd) = @_;
+        my @cmd = qw(git rev-parse --git-dir);
+        my $cmd = join(' ', @cmd);
+        my $pid = open my $fh, '-|';
+        defined $pid or die "forking $cmd failed: $!\n";
+        if ($pid == 0) {
+                if (defined $cd) {
+                        chdir $cd or die "chdir $cd failed: $!\n";
+                }
+                exec @cmd;
+                die "Failed to exec $cmd: $!\n";
+        } else {
+                my $dir = eval {
+                        local $/;
+                        <$fh>;
+                };
+                close $fh or die "error in $cmd: $!\n";
+                chomp $dir;
+                return $cd if ($dir eq '.' && defined $cd);
+                $dir;
+        }
+if (@ARGV) {
+        @dirs = map { resolve_git_dir($_) } @ARGV;
+} else {
+        @dirs = (resolve_git_dir());
+sub usage { print STDERR "Usage: $usage\n"; exit 1 }
+usage() unless @dirs;
+foreach my $dir (@dirs) {
+        index_dir($dir);
+sub index_dir {
+        my ($git_dir) = @_;
+        -d $git_dir or die "$git_dir does not appear to be a git repository\n";
+        system('git', "--git-dir=$git_dir", 'update-server-info') and
+                die "git update-server-info failed for $git_dir";
+        my $s = PublicInbox::SearchIdx->new($git_dir, 1);
+        $s->index_sync;
diff --git a/script/public-inbox-init b/script/public-inbox-init
new file mode 100755
index 00000000..d66361df
--- /dev/null
+++ b/script/public-inbox-init
@@ -0,0 +1,74 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2014-2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Initializes a public-inbox, basically a wrapper for git-init(1)
+use strict;
+use warnings;
+my $usage = "public-inbox-init NAME GIT_DIR HTTP_URL ADDRESS [ADDRESS..]";
+use PublicInbox::Config;
+use File::Temp qw/tempfile/;
+use File::Basename qw/dirname/;
+use File::Path qw/mkpath/;
+use Cwd qw/abs_path/;
+sub x { system(@_) and die join(' ', @_). " failed: $?\n" }
+sub usage { print STDERR "Usage: $usage\n"; exit 1 }
+my $name = shift @ARGV or usage();
+my $git_dir = shift @ARGV or usage();
+my $http_url = shift @ARGV or usage();
+my (@address) = @ARGV;
+@address or usage();
+my %seen;
+my $pi_config = PublicInbox::Config->default_file;
+my $dir = dirname($pi_config);
+mkpath($dir); # will croak on fatal errors
+my ($fh, $filename) = tempfile('pi-init-XXXXXXXX', DIR => $dir);
+if (-e $pi_config) {
+        open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n";
+        my $old;
+        {
+                local $/;
+                $old = <$oh>;
+        }
+        print $fh $old or die "failed to write: $!\n";
+        close $oh or die "failed to close $pi_config: $!\n";
+        # yes, this conflict checking is racy if multiple instances of this
+        # script are run by the same $PI_DIR
+        my $cfg = PublicInbox::Config->new;
+        my $conflict;
+        foreach my $addr (@address) {
+                my $found = $cfg->lookup($addr);
+                if ($found) {
+                        if ($found->{listname} ne $name) {
+                                print STDERR
+                                        "`$addr' already defined for ",
+                                        "`$found->{listname}',\n",
+                                        "does not match intend `$name'\n";
+                                $conflict = 1;
+                        } else {
+                                $seen{lc($addr)} = 1;
+                        }
+                }
+        }
+        exit(1) if $conflict;
+close $fh or die "failed to close $filename: $!\n";
+my $pfx = "publicinbox.$name";
+my @x = (qw/git config/, "--file=$filename");
+$git_dir = abs_path($git_dir);
+x(qw(git init -q --bare), $git_dir);
+foreach my $addr (@address) {
+        next if $seen{lc($addr)};
+        x(@x, "--add", "$pfx.address", $addr);
+x(@x, "$pfx.url", $http_url);
+x(@x, "$pfx.mainrepo", $git_dir);
+rename $filename, $pi_config or
+        die "failed to rename `$filename' to `$pi_config': $!\n";
diff --git a/script/public-inbox-learn b/script/public-inbox-learn
new file mode 100755
index 00000000..0c7b4199
--- /dev/null
+++ b/script/public-inbox-learn
@@ -0,0 +1,89 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2014-2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Used for training spam (via SpamAssassin) and removing messages from a
+# public-inbox
+my $usage = "$0 (spam|ham) < /path/to/message";
+use strict;
+use warnings;
+use PublicInbox::Config;
+use Email::MIME;
+use Email::Address;
+use IPC::Run qw/run/;
+my $train = shift or die "usage: $usage\n";
+if ($train !~ /\A(?:ham|spam)\z/) {
+        die "`$train' not recognized.\nusage: $usage\n";
+my $pi_config = PublicInbox::Config->new;
+my $mime = Email::MIME->new(eval { local $/; <> });
+# get all recipients
+my %dests;
+foreach my $h (qw(Cc To)) {
+        foreach my $recipient (Email::Address->parse($mime->header($h))) {
+                $dests{lc($recipient->address)} = 1;
+        }
+my ($name, $email, $date);
+if ($train eq "ham") {
+        require PublicInbox::MDA;
+        require PublicInbox::Filter;
+        PublicInbox::Filter->run($mime);
+        ($name, $email, $date) = PublicInbox::MDA->author_info($mime);
+my $in = $mime->as_string;
+my $err = 0;
+my @output = qw(> /dev/null > /dev/null);
+# n.b. message may be cross-posted to multiple public-inboxes
+foreach my $recipient (keys %dests) {
+        my $dst = $pi_config->lookup($recipient) or next;
+        my $git_dir = $dst->{mainrepo} or next;
+        my ($out, $err) = ("", "");
+        # We do not touch GIT_COMMITTER_* env here so we can track
+        # who trained the message.
+        # We will not touch GIT_AUTHOR_* when learning spam messages, either
+        if ($train eq "spam") {
+                # This needs to be idempotent, as my inotify trainer
+                # may train for each cross-posted message, and this
+                # script already learns for every list in
+                # ~/.public-inbox/config
+                if (!run(["ssoma-rm", $git_dir], \$in, \$out, \$err)) {
+                        if ($err !~ /^git cat-file .+ failed: 32768$/) {
+                                $err = 1;
+                        }
+                }
+        } else { # $train eq "ham"
+                # no checking for spam here, we assume the message has
+                # been reviewed by a human at this point:
+                PublicInbox::MDA->set_list_headers($mime, $dst);
+                my $s  = $mime->as_string;
+                local $ENV{GIT_AUTHOR_NAME} = $name;
+                local $ENV{GIT_AUTHOR_EMAIL} = $email;
+                local $ENV{GIT_AUTHOR_DATE} = $date;
+                # Ham messages are trained when they're marked into
+                # a SEEN state, so this is idempotent:
+                run([PublicInbox::MDA->cmd, $git_dir], \$s, \$out, \$err);
+                if ($err !~ /CONFLICT/) {
+                        $err = 1;
+                }
+        }
+        if (!run([qw(spamc -L), $train], \$in, @output)) {
+                $err = 1;
+        }
+        $err or eval {
+                require PublicInbox::SearchIdx;
+                PublicInbox::SearchIdx->new($git_dir, 2)->index_sync;
+        };
+exit $err;
diff --git a/script/public-inbox-mda b/script/public-inbox-mda
new file mode 100755
index 00000000..24feeb81
--- /dev/null
+++ b/script/public-inbox-mda
@@ -0,0 +1,111 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2013-2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Mail delivery agent for public-inbox, run from your MTA upon mail delivery
+use strict;
+use warnings;
+my $usage = 'public-inbox-mda < rfc2822_message';
+use Email::Filter;
+use Email::MIME;
+use Email::Address;
+use File::Path::Expand qw/expand_filename/;
+use IPC::Run qw(run);
+use PublicInbox::MDA;
+use PublicInbox::Filter;
+use PublicInbox::Config;
+# n.b: hopefully we can setup the emergency path without bailing due to
+# user error, we really want to setup the emergency destination ASAP
+# in case there's bugs in our code or user error.
+my $emergency = $ENV{PI_EMERGENCY} || '~/.public-inbox/emergency/';
+$emergency = expand_filename($emergency);
+# this reads the message from stdin
+my $filter = Email::Filter->new(emergency => $emergency);
+my $config = PublicInbox::Config->new;
+my $recipient = $ENV{ORIGINAL_RECIPIENT};
+defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n";
+my $dst = $config->lookup($recipient); # first check
+defined $dst or exit(1);
+my $main_repo = $dst->{mainrepo} or exit(1);
+my $filtered; # string dest
+if (PublicInbox::MDA->precheck($filter, $dst->{address}) &&
+    do_spamc($filter->simple, \$filtered)) {
+        # update our message with SA headers (in case our filter rejects it)
+        my $msg = Email::MIME->new(\$filtered);
+        $filtered = undef;
+        $filter->simple($msg);
+        my $filter_arg;
+        my $fcfg = $dst->{filter};
+        if (!defined $fcfg || $filter eq 'reject') {
+                $filter_arg = $filter;
+        } elsif ($fcfg eq 'scrub') {
+                $filter_arg = undef; # the default for legacy versions
+        } else {
+                warn "publicinbox.$dst->{listname}.filter=$fcfg invalid\n";
+                warn "must be either 'scrub' or 'reject' (the default)\n";
+        }
+        if (PublicInbox::Filter->run($msg, $filter_arg)) {
+                # run spamc again on the HTML-free message
+                if (do_spamc($msg, \$filtered)) {
+                        $msg = Email::MIME->new(\$filtered);
+                        PublicInbox::MDA->set_list_headers($msg, $dst);
+                        $filter->simple($msg);
+                        my ($name, $email, $date) =
+                                        PublicInbox::MDA->author_info($msg);
+                        END {
+                                index_sync($main_repo) if ($? == 0);
+                        };
+                        local $ENV{GIT_AUTHOR_NAME} = $name;
+                        local $ENV{GIT_AUTHOR_EMAIL} = $email;
+                        local $ENV{GIT_AUTHOR_DATE} = $date;
+                        local $ENV{GIT_COMMITTER_EMAIL} = $recipient;
+                        local $ENV{GIT_COMMITTER_NAME} = $dst->{listname};
+                        $filter->pipe(PublicInbox::MDA->cmd, $main_repo);
+                }
+        }
+} else {
+        # Ensure emergency spam gets spamassassin headers.
+        # This makes it easier to prioritize obvious spam from less obvious
+        if (defined($filtered) && $filtered ne '') {
+                my $drop = Email::MIME->new(\$filtered);
+                $filtered = undef;
+                $filter->simple($drop);
+        }
+exit 0; # goes to emergency
+# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers
+# not using Email::Filter->pipe here since we want the stdout of
+# the command even on failure (spamc will set $? on error).
+sub do_spamc {
+        my ($msg, $out) = @_;
+        eval {
+                my $orig = $msg->as_string;
+                run([qw/spamc -E --headers/], \$orig, $out);
+        };
+        return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1;
+sub index_sync {
+        my ($git_dir) = @_;
+        # potentially user-visible, ignore errors:
+        system('git', "--git-dir=$git_dir", 'update-server-info');
+        eval {
+                require PublicInbox::SearchIdx;
+                PublicInbox::SearchIdx->new($git_dir, 2)->index_sync;
+        };
diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd
new file mode 100755
index 00000000..23d269d4
--- /dev/null
+++ b/script/public-inbox-nntpd
@@ -0,0 +1,69 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2015 all contributors <meta@public-inbox.org>
+# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt)
+# Standalone NNTP server for public-inbox.
+use strict;
+use warnings;
+require PublicInbox::Daemon;
+require PublicInbox::NewsGroup;
+require PublicInbox::NNTP;
+require PublicInbox::Config;
+my $nntpd = PublicInbox::NNTPD->new;
+        sub { $nntpd->refresh_groups }, # refresh
+        sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }); # post_accept
+package PublicInbox::NNTPD;
+use strict;
+use warnings;
+sub new {
+        my ($class) = @_;
+        bless {
+                groups => {},
+                err => \*STDERR,
+                out => \*STDOUT,
+                grouplist => [],
+        }, $class;
+sub refresh_groups () {
+        my ($self) = @_;
+        my $pi_config = PublicInbox::Config->new;
+        my $new = {};
+        my @list;
+        foreach my $k (keys %$pi_config) {
+                $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next;
+                my $g = $1;
+                my $git_dir = $pi_config->{$k};
+                my $addr = $pi_config->{"publicinbox.$g.address"};
+                my $ngname = $pi_config->{"publicinbox.$g.newsgroup"};
+                if (defined $ngname) {
+                        next if ($ngname eq ''); # disabled
+                        $g = $ngname;
+                }
+                my $ng = PublicInbox::NewsGroup->new($g, $git_dir, $addr);
+                my $old_ng = $self->{groups}->{$g};
+                # Reuse the old one if possible since it can hold
+                # references to valid mm and gcf objects
+                if ($old_ng) {
+                        $old_ng->update($ng);
+                        $ng = $old_ng;
+                }
+                # Only valid if msgmap and search works
+                if ($ng->usable) {
+                        $new->{$g} = $ng;
+                        push @list, $ng;
+                }
+        }
+        @list =        sort { $a->{name} cmp $b->{name} } @list;
+        $self->{grouplist} = \@list;
+        # this will destroy old groups that got deleted
+        %{$self->{groups}} = %$new;
diff --git a/script/public-inbox.cgi b/script/public-inbox.cgi
new file mode 100755
index 00000000..ee9510c1
--- /dev/null
+++ b/script/public-inbox.cgi
@@ -0,0 +1,32 @@
+#!/usr/bin/perl -w
+# Copyright (C) 2014-2016 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ or later <https://www.gnu.org/licenses/agpl-3.0.txt>
+# Enables using PublicInbox::WWW as a CGI script
+use strict;
+use warnings;
+use Plack::Loader;
+use Plack::Builder;
+use Plack::Request;
+use Plack::Handler::CGI;
+use PublicInbox::WWW;
+BEGIN { PublicInbox::WWW->preload if $ENV{MOD_PERL} }
+my $www = PublicInbox::WWW->new;
+my $have_deflater = eval { require Plack::Middleware::Deflater; 1 };
+my $app = builder {
+        if ($have_deflater) {
+                enable 'Deflater',
+                        content_type => [ 'text/html', 'text/plain',
+                                        'application/atom+xml' ];
+        }
+        # Enable to ensure redirects and Atom feed URLs are generated
+        # properly when running behind a reverse proxy server which
+        # sets X-Forwarded-For and X-Forwarded-Proto request headers.
+        # See Plack::Middleware::ReverseProxy documentation for details
+        # enable 'ReverseProxy';
+        enable 'Head';
+        sub { $www->call(@_) };