From 052f26f3ada1042afa5acadbecc48b487f4e2d52 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 27 Feb 2016 21:57:57 +0000 Subject: move executables to script/ directory This seems to match more closely with what is expected of Perl packages based on how blib is used. Hopefully makes the top-level source tree less cluttered and things easier-to-find. --- script/public-inbox-httpd | 129 ++++++++++++++++++++++++++++++++++++++++++++++ script/public-inbox-index | 65 +++++++++++++++++++++++ script/public-inbox-init | 74 ++++++++++++++++++++++++++ script/public-inbox-learn | 89 ++++++++++++++++++++++++++++++++ script/public-inbox-mda | 111 +++++++++++++++++++++++++++++++++++++++ script/public-inbox-nntpd | 69 +++++++++++++++++++++++++ script/public-inbox.cgi | 32 ++++++++++++ 7 files changed, 569 insertions(+) create mode 100755 script/public-inbox-httpd create mode 100755 script/public-inbox-index create mode 100755 script/public-inbox-init create mode 100755 script/public-inbox-learn create mode 100755 script/public-inbox-mda create mode 100755 script/public-inbox-nntpd create mode 100755 script/public-inbox.cgi (limited to 'script') diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd new file mode 100755 index 00000000..6109af01 --- /dev/null +++ b/script/public-inbox-httpd @@ -0,0 +1,129 @@ +#!/usr/bin/perl -w +# Copyright (C) 2016 all contributors +# License: AGPL-3.0+ +# +# Standalone HTTP server for public-inbox. +use strict; +use warnings; +use Plack::Util; +use PublicInbox::Daemon; +use PublicInbox::HTTP; +use Plack::Request; +use Plack::Builder; +my %httpds; +my $app; +my $refresh = sub { + if (@ARGV) { + eval { $app = Plack::Util::load_psgi(@ARGV) }; + if ($@) { + die $@, +"$0 runs in /, command-line paths must be absolute\n"; + } + } else { + require PublicInbox::WWW; + PublicInbox::WWW->preload; + my $www = PublicInbox::WWW->new; + $app = eval { + my $deflate_types = eval { + require Plack::Middleware::Deflater; + [ 'text/html', 'text/plain', + 'application/atom+xml' ] + }; + builder { + enable 'Chunked'; + if ($deflate_types) { + enable 'Deflater', + content_type => $deflate_types + } + enable 'ReverseProxy'; + enable 'Head'; + sub { $www->call(@_) }; + }; + }; + } +}; + +daemon_run('0.0.0.0:8080', $refresh, + sub ($$$) { # post_accept + my ($client, $addr, $srv) = @_; + my $fd = fileno($srv); + my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app); + PublicInbox::HTTP->new($client, $addr, $h), + }); + +1; + +package PublicInbox::HTTPD::Async; +use strict; +use warnings; +use base qw(Danga::Socket); +use fields qw(cb); + +sub new { + my ($class, $io, $cb) = @_; + my $self = fields::new($class); + $io->blocking(0); + $self->SUPER::new($io); + $self->{cb} = $cb; + $self->watch_read(1); + $self; +} + +sub event_read { $_[0]->{cb}->() } +sub event_hup { $_[0]->{cb}->() } +sub event_err { $_[0]->{cb}->() } +sub sysread { shift->{sock}->sysread(@_) } + +1; + +package PublicInbox::HTTPD; +use strict; +use warnings; +use Plack::Util; + +sub pi_httpd_async { + my ($io, $cb) = @_; + PublicInbox::HTTPD::Async->new($io, $cb); +} + +sub new { + my ($class, $sock, $app) = @_; + my $n = getsockname($sock) or die "not a socket: $sock $!\n"; + my ($port, $addr); + if (length($n) >= 28) { + require Socket6; + ($port, $addr) = Socket6::unpack_sockaddr_in6($n); + } else { + ($port, $addr) = Socket::unpack_sockaddr_in($n); + } + + my %env = ( + REMOTE_HOST => '', + REMOTE_PORT => 0, + SERVER_NAME => $addr, + SERVER_PORT => $port, + SCRIPT_NAME => '', + 'psgi.version' => [ 1, 1 ], + 'psgi.errors' => \*STDERR, + 'psgi.url_scheme' => 'http', + 'psgi.nonblocking' => Plack::Util::TRUE, + 'psgi.streaming' => Plack::Util::TRUE, + 'psgi.run_once' => Plack::Util::FALSE, + 'psgi.multithread' => Plack::Util::FALSE, + 'psgi.multiprocess' => Plack::Util::TRUE, + 'psgix.harakiri'=> Plack::Util::FALSE, + 'psgix.input.buffered' => Plack::Util::TRUE, + 'pi-httpd.async' => do { + no warnings 'once'; + *pi_httpd_async + }, + ); + bless { + err => \*STDERR, + out => \*STDOUT, + app => $app, + env => \%env, + }, $class; +} + +1; diff --git a/script/public-inbox-index b/script/public-inbox-index new file mode 100755 index 00000000..578d91d5 --- /dev/null +++ b/script/public-inbox-index @@ -0,0 +1,65 @@ +#!/usr/bin/perl -w +# Copyright (C) 2015 all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# Basic tool to create a Xapian search index for a git repository +# configured for public-inbox. +# Usage with libeatmydata +# highly recommended: eatmydata public-inbox-index GIT_DIR + +use strict; +use warnings; +my $usage = "public-inbox-index GIT_DIR"; +use PublicInbox::Config; +eval { require PublicInbox::SearchIdx }; +if ($@) { + print STDERR "Search::Xapian required for $0\n"; + exit 1; +} +my @dirs; + +sub resolve_git_dir { + my ($cd) = @_; + my @cmd = qw(git rev-parse --git-dir); + my $cmd = join(' ', @cmd); + my $pid = open my $fh, '-|'; + defined $pid or die "forking $cmd failed: $!\n"; + if ($pid == 0) { + if (defined $cd) { + chdir $cd or die "chdir $cd failed: $!\n"; + } + exec @cmd; + die "Failed to exec $cmd: $!\n"; + } else { + my $dir = eval { + local $/; + <$fh>; + }; + close $fh or die "error in $cmd: $!\n"; + chomp $dir; + return $cd if ($dir eq '.' && defined $cd); + $dir; + } +} + +if (@ARGV) { + @dirs = map { resolve_git_dir($_) } @ARGV; +} else { + @dirs = (resolve_git_dir()); +} + +sub usage { print STDERR "Usage: $usage\n"; exit 1 } +usage() unless @dirs; + +foreach my $dir (@dirs) { + index_dir($dir); +} + +sub index_dir { + my ($git_dir) = @_; + -d $git_dir or die "$git_dir does not appear to be a git repository\n"; + + system('git', "--git-dir=$git_dir", 'update-server-info') and + die "git update-server-info failed for $git_dir"; + my $s = PublicInbox::SearchIdx->new($git_dir, 1); + $s->index_sync; +} diff --git a/script/public-inbox-init b/script/public-inbox-init new file mode 100755 index 00000000..d66361df --- /dev/null +++ b/script/public-inbox-init @@ -0,0 +1,74 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014-2015 all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Initializes a public-inbox, basically a wrapper for git-init(1) +use strict; +use warnings; +my $usage = "public-inbox-init NAME GIT_DIR HTTP_URL ADDRESS [ADDRESS..]"; +use PublicInbox::Config; +use File::Temp qw/tempfile/; +use File::Basename qw/dirname/; +use File::Path qw/mkpath/; +use Cwd qw/abs_path/; + +sub x { system(@_) and die join(' ', @_). " failed: $?\n" } +sub usage { print STDERR "Usage: $usage\n"; exit 1 } + +my $name = shift @ARGV or usage(); +my $git_dir = shift @ARGV or usage(); +my $http_url = shift @ARGV or usage(); +my (@address) = @ARGV; +@address or usage(); +my %seen; + +my $pi_config = PublicInbox::Config->default_file; +my $dir = dirname($pi_config); +mkpath($dir); # will croak on fatal errors +my ($fh, $filename) = tempfile('pi-init-XXXXXXXX', DIR => $dir); +if (-e $pi_config) { + open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; + my $old; + { + local $/; + $old = <$oh>; + } + print $fh $old or die "failed to write: $!\n"; + close $oh or die "failed to close $pi_config: $!\n"; + + # yes, this conflict checking is racy if multiple instances of this + # script are run by the same $PI_DIR + my $cfg = PublicInbox::Config->new; + my $conflict; + foreach my $addr (@address) { + my $found = $cfg->lookup($addr); + if ($found) { + if ($found->{listname} ne $name) { + print STDERR + "`$addr' already defined for ", + "`$found->{listname}',\n", + "does not match intend `$name'\n"; + $conflict = 1; + } else { + $seen{lc($addr)} = 1; + } + } + } + + exit(1) if $conflict; +} +close $fh or die "failed to close $filename: $!\n"; + +my $pfx = "publicinbox.$name"; +my @x = (qw/git config/, "--file=$filename"); +$git_dir = abs_path($git_dir); +x(qw(git init -q --bare), $git_dir); +foreach my $addr (@address) { + next if $seen{lc($addr)}; + x(@x, "--add", "$pfx.address", $addr); +} +x(@x, "$pfx.url", $http_url); +x(@x, "$pfx.mainrepo", $git_dir); + +rename $filename, $pi_config or + die "failed to rename `$filename' to `$pi_config': $!\n"; diff --git a/script/public-inbox-learn b/script/public-inbox-learn new file mode 100755 index 00000000..0c7b4199 --- /dev/null +++ b/script/public-inbox-learn @@ -0,0 +1,89 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014-2015 all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Used for training spam (via SpamAssassin) and removing messages from a +# public-inbox +my $usage = "$0 (spam|ham) < /path/to/message"; +use strict; +use warnings; +use PublicInbox::Config; +use Email::MIME; +use Email::Address; +use IPC::Run qw/run/; +my $train = shift or die "usage: $usage\n"; +if ($train !~ /\A(?:ham|spam)\z/) { + die "`$train' not recognized.\nusage: $usage\n"; +} + +my $pi_config = PublicInbox::Config->new; +my $mime = Email::MIME->new(eval { local $/; <> }); + +# get all recipients +my %dests; +foreach my $h (qw(Cc To)) { + foreach my $recipient (Email::Address->parse($mime->header($h))) { + $dests{lc($recipient->address)} = 1; + } +} + +my ($name, $email, $date); + +if ($train eq "ham") { + require PublicInbox::MDA; + require PublicInbox::Filter; + PublicInbox::Filter->run($mime); + ($name, $email, $date) = PublicInbox::MDA->author_info($mime); +} + +my $in = $mime->as_string; +my $err = 0; +my @output = qw(> /dev/null > /dev/null); + +# n.b. message may be cross-posted to multiple public-inboxes +foreach my $recipient (keys %dests) { + my $dst = $pi_config->lookup($recipient) or next; + my $git_dir = $dst->{mainrepo} or next; + my ($out, $err) = ("", ""); + + # We do not touch GIT_COMMITTER_* env here so we can track + # who trained the message. + # We will not touch GIT_AUTHOR_* when learning spam messages, either + if ($train eq "spam") { + # This needs to be idempotent, as my inotify trainer + # may train for each cross-posted message, and this + # script already learns for every list in + # ~/.public-inbox/config + if (!run(["ssoma-rm", $git_dir], \$in, \$out, \$err)) { + if ($err !~ /^git cat-file .+ failed: 32768$/) { + $err = 1; + } + } + } else { # $train eq "ham" + # no checking for spam here, we assume the message has + # been reviewed by a human at this point: + PublicInbox::MDA->set_list_headers($mime, $dst); + my $s = $mime->as_string; + + local $ENV{GIT_AUTHOR_NAME} = $name; + local $ENV{GIT_AUTHOR_EMAIL} = $email; + local $ENV{GIT_AUTHOR_DATE} = $date; + + # Ham messages are trained when they're marked into + # a SEEN state, so this is idempotent: + run([PublicInbox::MDA->cmd, $git_dir], \$s, \$out, \$err); + if ($err !~ /CONFLICT/) { + $err = 1; + } + } + if (!run([qw(spamc -L), $train], \$in, @output)) { + $err = 1; + } + + $err or eval { + require PublicInbox::SearchIdx; + PublicInbox::SearchIdx->new($git_dir, 2)->index_sync; + }; +} + +exit $err; diff --git a/script/public-inbox-mda b/script/public-inbox-mda new file mode 100755 index 00000000..24feeb81 --- /dev/null +++ b/script/public-inbox-mda @@ -0,0 +1,111 @@ +#!/usr/bin/perl -w +# Copyright (C) 2013-2015 all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Mail delivery agent for public-inbox, run from your MTA upon mail delivery +use strict; +use warnings; +my $usage = 'public-inbox-mda < rfc2822_message'; + +use Email::Filter; +use Email::MIME; +use Email::Address; +use File::Path::Expand qw/expand_filename/; +use IPC::Run qw(run); +use PublicInbox::MDA; +use PublicInbox::Filter; +use PublicInbox::Config; + +# n.b: hopefully we can setup the emergency path without bailing due to +# user error, we really want to setup the emergency destination ASAP +# in case there's bugs in our code or user error. +my $emergency = $ENV{PI_EMERGENCY} || '~/.public-inbox/emergency/'; +$emergency = expand_filename($emergency); + +# this reads the message from stdin +my $filter = Email::Filter->new(emergency => $emergency); +my $config = PublicInbox::Config->new; + +my $recipient = $ENV{ORIGINAL_RECIPIENT}; +defined $recipient or die "ORIGINAL_RECIPIENT not defined in ENV\n"; +my $dst = $config->lookup($recipient); # first check +defined $dst or exit(1); +my $main_repo = $dst->{mainrepo} or exit(1); +my $filtered; # string dest + +if (PublicInbox::MDA->precheck($filter, $dst->{address}) && + do_spamc($filter->simple, \$filtered)) { + # update our message with SA headers (in case our filter rejects it) + my $msg = Email::MIME->new(\$filtered); + $filtered = undef; + $filter->simple($msg); + + my $filter_arg; + my $fcfg = $dst->{filter}; + if (!defined $fcfg || $filter eq 'reject') { + $filter_arg = $filter; + } elsif ($fcfg eq 'scrub') { + $filter_arg = undef; # the default for legacy versions + } else { + warn "publicinbox.$dst->{listname}.filter=$fcfg invalid\n"; + warn "must be either 'scrub' or 'reject' (the default)\n"; + } + + if (PublicInbox::Filter->run($msg, $filter_arg)) { + # run spamc again on the HTML-free message + if (do_spamc($msg, \$filtered)) { + $msg = Email::MIME->new(\$filtered); + PublicInbox::MDA->set_list_headers($msg, $dst); + $filter->simple($msg); + + my ($name, $email, $date) = + PublicInbox::MDA->author_info($msg); + + END { + index_sync($main_repo) if ($? == 0); + }; + + local $ENV{GIT_AUTHOR_NAME} = $name; + local $ENV{GIT_AUTHOR_EMAIL} = $email; + local $ENV{GIT_AUTHOR_DATE} = $date; + local $ENV{GIT_COMMITTER_EMAIL} = $recipient; + local $ENV{GIT_COMMITTER_NAME} = $dst->{listname}; + + $filter->pipe(PublicInbox::MDA->cmd, $main_repo); + } + } +} else { + # Ensure emergency spam gets spamassassin headers. + # This makes it easier to prioritize obvious spam from less obvious + if (defined($filtered) && $filtered ne '') { + my $drop = Email::MIME->new(\$filtered); + $filtered = undef; + $filter->simple($drop); + } +} +exit 0; # goes to emergency + +# we depend on "report_safe 0" in /etc/spamassassin/*.cf with --headers +# not using Email::Filter->pipe here since we want the stdout of +# the command even on failure (spamc will set $? on error). +sub do_spamc { + my ($msg, $out) = @_; + eval { + my $orig = $msg->as_string; + run([qw/spamc -E --headers/], \$orig, $out); + }; + + return ($@ || $? || !defined($$out) || $$out eq '') ? 0 : 1; +} + +sub index_sync { + my ($git_dir) = @_; + + # potentially user-visible, ignore errors: + system('git', "--git-dir=$git_dir", 'update-server-info'); + + eval { + require PublicInbox::SearchIdx; + PublicInbox::SearchIdx->new($git_dir, 2)->index_sync; + }; +} diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd new file mode 100755 index 00000000..23d269d4 --- /dev/null +++ b/script/public-inbox-nntpd @@ -0,0 +1,69 @@ +#!/usr/bin/perl -w +# Copyright (C) 2015 all contributors +# License: AGPLv3 or later (https://www.gnu.org/licenses/agpl-3.0.txt) +# +# Standalone NNTP server for public-inbox. +use strict; +use warnings; +require PublicInbox::Daemon; +require PublicInbox::NewsGroup; +require PublicInbox::NNTP; +require PublicInbox::Config; +my $nntpd = PublicInbox::NNTPD->new; +daemon_run('0.0.0.0:119', + sub { $nntpd->refresh_groups }, # refresh + sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }); # post_accept + +1; +package PublicInbox::NNTPD; +use strict; +use warnings; + +sub new { + my ($class) = @_; + bless { + groups => {}, + err => \*STDERR, + out => \*STDOUT, + grouplist => [], + }, $class; +} + +sub refresh_groups () { + my ($self) = @_; + my $pi_config = PublicInbox::Config->new; + my $new = {}; + my @list; + foreach my $k (keys %$pi_config) { + $k =~ /\Apublicinbox\.([^\.]+)\.mainrepo\z/ or next; + my $g = $1; + my $git_dir = $pi_config->{$k}; + my $addr = $pi_config->{"publicinbox.$g.address"}; + my $ngname = $pi_config->{"publicinbox.$g.newsgroup"}; + if (defined $ngname) { + next if ($ngname eq ''); # disabled + $g = $ngname; + } + my $ng = PublicInbox::NewsGroup->new($g, $git_dir, $addr); + my $old_ng = $self->{groups}->{$g}; + + # Reuse the old one if possible since it can hold + # references to valid mm and gcf objects + if ($old_ng) { + $old_ng->update($ng); + $ng = $old_ng; + } + + # Only valid if msgmap and search works + if ($ng->usable) { + $new->{$g} = $ng; + push @list, $ng; + } + } + @list = sort { $a->{name} cmp $b->{name} } @list; + $self->{grouplist} = \@list; + # this will destroy old groups that got deleted + %{$self->{groups}} = %$new; +} + +1; diff --git a/script/public-inbox.cgi b/script/public-inbox.cgi new file mode 100755 index 00000000..ee9510c1 --- /dev/null +++ b/script/public-inbox.cgi @@ -0,0 +1,32 @@ +#!/usr/bin/perl -w +# Copyright (C) 2014-2016 all contributors +# License: AGPL-3.0+ or later +# +# Enables using PublicInbox::WWW as a CGI script +use strict; +use warnings; +use Plack::Loader; +use Plack::Builder; +use Plack::Request; +use Plack::Handler::CGI; +use PublicInbox::WWW; +BEGIN { PublicInbox::WWW->preload if $ENV{MOD_PERL} } +my $www = PublicInbox::WWW->new; +my $have_deflater = eval { require Plack::Middleware::Deflater; 1 }; +my $app = builder { + if ($have_deflater) { + enable 'Deflater', + content_type => [ 'text/html', 'text/plain', + 'application/atom+xml' ]; + } + + # Enable to ensure redirects and Atom feed URLs are generated + # properly when running behind a reverse proxy server which + # sets X-Forwarded-For and X-Forwarded-Proto request headers. + # See Plack::Middleware::ReverseProxy documentation for details + # enable 'ReverseProxy'; + + enable 'Head'; + sub { $www->call(@_) }; +}; +Plack::Handler::CGI->new->run($app); -- cgit v1.2.3-24-ge0c7