diff options
Diffstat (limited to 'script')
-rwxr-xr-x | script/lei | 144 | ||||
-rwxr-xr-x | script/public-inbox-cindex | 102 | ||||
-rwxr-xr-x | script/public-inbox-clone | 70 | ||||
-rwxr-xr-x | script/public-inbox-compact | 27 | ||||
-rwxr-xr-x | script/public-inbox-convert | 79 | ||||
-rwxr-xr-x | script/public-inbox-edit | 12 | ||||
-rwxr-xr-x | script/public-inbox-extindex | 91 | ||||
-rwxr-xr-x | script/public-inbox-fetch | 39 | ||||
-rwxr-xr-x | script/public-inbox-httpd | 48 | ||||
-rwxr-xr-x | script/public-inbox-imapd | 12 | ||||
-rwxr-xr-x | script/public-inbox-index | 88 | ||||
-rwxr-xr-x | script/public-inbox-init | 133 | ||||
-rwxr-xr-x | script/public-inbox-learn | 23 | ||||
-rwxr-xr-x | script/public-inbox-mda | 29 | ||||
-rwxr-xr-x | script/public-inbox-netd | 6 | ||||
-rwxr-xr-x | script/public-inbox-nntpd | 15 | ||||
-rwxr-xr-x | script/public-inbox-pop3d | 8 | ||||
-rwxr-xr-x | script/public-inbox-purge | 9 | ||||
-rwxr-xr-x | script/public-inbox-watch | 25 | ||||
-rwxr-xr-x | script/public-inbox-xcpdb | 33 | ||||
-rwxr-xr-x | script/public-inbox.cgi | 2 |
21 files changed, 740 insertions, 255 deletions
diff --git a/script/lei b/script/lei new file mode 100755 index 00000000..087afc33 --- /dev/null +++ b/script/lei @@ -0,0 +1,144 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un); +use PublicInbox::CmdIPC4; +my $narg = 5; +my $sock; +my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4'); +my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do { + require PublicInbox::Syscall; + $recv_cmd = PublicInbox::Syscall->can('recv_cmd4'); + PublicInbox::Syscall->can('send_cmd4'); +} // do { + my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= ( + $ENV{XDG_CACHE_HOME} // + ( ($ENV{HOME} // '/nonexistent').'/.cache' ) + ).'/public-inbox/inline-c'; + if (!-d $inline_dir) { + require File::Path; + File::Path::make_path($inline_dir); + } + require PublicInbox::Spawn; # takes ~50ms even if built *sigh* + $recv_cmd = PublicInbox::Spawn->can('recv_cmd4'); + PublicInbox::Spawn->can('send_cmd4'); +} // die 'please install Inline::C or Socket::MsgHdr'; + +my %pids; +my $sigchld = sub { + my $flags = scalar(@_) ? POSIX::WNOHANG() : 0; + for my $pid (keys %pids) { + delete($pids{$pid}) if waitpid($pid, $flags) == $pid; + } +}; +my @parent; +my $exec_cmd = sub { + my ($fds, $argc, @argv) = @_; + my $parent = $$; + require POSIX; + my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO}); + my @rdr; + for my $fd (@$fds) { + open(my $newfh, '+<&=', $fd) or die "open +<&=$fd: $!"; + push @rdr, shift(@old), $newfh; + } + my $do_exec = sub { + my @non_std; # ex. $op_p from lei_edit_search + while (my ($io, $newfh) = splice(@rdr, 0, 2)) { + my $old_io = !!$io; + open $io, '+<&', $newfh or die "open +<&=: $!"; + push @non_std, $io unless $old_io; + } + if (@non_std) { + require Fcntl; + fcntl($_, Fcntl::F_SETFD(), 0) for @non_std; + } + my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); + @ENV{keys %env} = values %env; + umask 077; + exec(@argv); + warn "exec: @argv: $!\n"; + POSIX::_exit(1); + }; + $SIG{CHLD} = $sigchld; + my $pid = fork // die "fork: $!"; + if ($pid == 0) { + $do_exec->() if $fds->[1]; # git-credential, pager + + # parent backgrounds on MUA + POSIX::setsid() > 0 or die "setsid: $!"; + @parent = ($parent); + return; # continue $recv_cmd in background + } + if ($fds->[1]) { + $pids{$pid} = undef; + } else { + $do_exec->(); # MUA reuses stdout + } +}; + +my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; +if ($runtime_dir eq '/lei') { + require File::Spec; + $runtime_dir = File::Spec->tmpdir."/lei-$<"; +} +unless (-d $runtime_dir) { + require File::Path; + File::Path::make_path($runtime_dir, { mode => 0700 }); +} +my $path = "$runtime_dir/$narg.seq.sock"; +my $addr = pack_sockaddr_un($path); +socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; +unless (connect($sock, $addr)) { # start the daemon if not started + local $ENV{PERL5LIB} = join(':', @INC); + open(my $daemon, '-|', $^X, $^W ? ('-w') : (), + qw[-MPublicInbox::LEI -e PublicInbox::LEI::lazy_start(@ARGV)], + $path, $! + 0, $narg) or die "popen: $!"; + while (<$daemon>) { warn $_ } # EOF when STDERR is redirected + close($daemon) or warn <<""; +lei-daemon could not start, exited with \$?=$? + + # try connecting again anyways, unlink+bind may be racy + connect($sock, $addr) or die <<""; +connect($path): $! (after attempted daemon start) + +} +# (Socket::MsgHdr|Inline::C), $sock are all available: +open my $dh, '<', '.' or die "open(.) $!"; +my $buf = join("\0", scalar(@ARGV), @ARGV); +while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" } +$buf .= "\0\0"; +$send_cmd->($sock, [0, 1, 2, fileno($dh)], $buf, 0) or die "sendmsg: $!"; +$SIG{TSTP} = sub { send($sock, 'STOP', 0); kill 'STOP', $$ }; +$SIG{CONT} = sub { send($sock, 'CONT', 0) }; + +my $x_it_code = 0; +while (1) { + my (@fds) = $recv_cmd->($sock, my $buf, 4096 * 33); + die "recvmsg: $!" if scalar(@fds) == 1 && !defined($fds[0]); + last if $buf eq ''; + if ($buf =~ /\Aexec (.+)\z/) { + $exec_cmd->(\@fds, split(/\0/, $1)); + } elsif ($buf eq '-WINCH') { + kill($buf, @parent); # for MUA + } elsif ($buf eq 'umask') { + send($sock, 'u'.pack('V', umask), 0) or die "send: $!" + } elsif ($buf =~ /\Ax_it ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + last; + } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + } elsif ($buf eq 'wait') { + $sigchld->(); + } else { + $sigchld->(); + die $buf; + } +} +$sigchld->(); +if (my $sig = ($x_it_code & 127)) { + kill $sig, $$; + sleep(1) while 1; # no self-pipe/signalfd, here, so we loop +} +exit($x_it_code >> 8); diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex new file mode 100755 index 00000000..dd00623a --- /dev/null +++ b/script/public-inbox-cindex @@ -0,0 +1,102 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]... +usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT + + Create and update search indices for code repos + + -d EXTDIR use EXTDIR instead of GIT_DIR/public-inbox-cindex + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `medium', or `full' (default: medium) + --project-list=FILE use a cgit/gitweb-compatible list of projects + --update | -u update previously-indexed code repos with `-d' + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index commit diffs larger than the given size + --prune prune old repos and commits + --reindex reindex previously indexed repos + --verbose | -v increase verbosity (may be repeated) + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-cindex(1) man page for full documentation. +EOF +my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden +GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous + indexlevel|index-level|L=s join:s@ + batch_size|batch-size=s max_size|max-size=s + include|I=s@ only=s@ all show:s@ + project-list=s exclude=s@ project-root|r=s + git-dir|g=s@ + sort-parallel=s sort-compress-program=s sort-buffer-size=s + d=s update|u scan! prune dry-run|n C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); +$SIG{USR1} = 'IGNORE'; # to be overridden in cidx_sync +$SIG{PIPE} = 'IGNORE'; +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $cfg = $opt->{-pi_cfg} = PublicInbox::Config->new; +my $cidx_dir = $opt->{d}; +PublicInbox::Admin::require_or_die('Xapian'); +PublicInbox::Admin::progress_prepare($opt); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +%ENV = (%ENV, %$env) if $env; + +my @git_dirs; +require PublicInbox::CodeSearchIdx; # unstable internal API +if (@ARGV) { + my @g = map { "-g $_" } @ARGV; + die <<EOM; +Specify git directories with `-g' (or --git-dir=): @g +Or use --project-list=... and --project-root=... +EOM +} elsif (defined(my $pl = $opt->{'project-list'})) { + my $pfx = $opt->{'project-root'} // die <<EOM; +PROJECT_ROOT required for --project-list +EOM + $opt->{'git-dir'} and die <<EOM; +--project-list does not accept additional --git-dir directories +(@{$opt->{'git-dir'}}) +EOM + open my $fh, '<', $pl or die "open($pl): $!\n"; + chomp(@git_dirs = <$fh>); + $pfx .= '/'; + $pfx =~ tr!/!/!s; + substr($_, 0, 0, $pfx) for @git_dirs; +} elsif (my $gd = $opt->{'git-dir'}) { + @git_dirs = @$gd; +} elsif (grep defined, @$opt{qw(show update prune)}) { +} else { + warn "No --git-dir= nor --project-list= + --project-root= specified\n"; + die $help; +} + +$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs; +if (defined $cidx_dir) { # external index + die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/; + my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt); + @{$cidx->{git_dirs}} = @git_dirs; # may be empty + $cidx->cidx_run; +} elsif (!@git_dirs) { + die $help +} else { + die <<EOM if $opt->{update}; +--update requires `-d EXTDIR' +EOM + for my $gd (@git_dirs) { + my $cd = "$gd/public-inbox-cindex"; + my $cidx = PublicInbox::CodeSearchIdx->new($cd, { %$opt }); + $cidx->{-cidx_internal} = 1; + @{$cidx->{git_dirs}} = ($gd); + $cidx->cidx_run; + } +} diff --git a/script/public-inbox-clone b/script/public-inbox-clone new file mode 100755 index 00000000..c3e64485 --- /dev/null +++ b/script/public-inbox-clone @@ -0,0 +1,70 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git clone remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-clone [OPTIONS] INBOX_URL [INBOX_DIR] + public-inbox-clone [OPTIONS] ROOT_URL [DESTINATION] + + clone remote public-inboxes or grokmirror manifests + +options: + + --epoch=RANGE range of v2 epochs to clone (e.g `2..5', `~0', `~1..') + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + --dry-run | -n show what would be cloned without cloning + --verbose | -v increase verbosity (may be repeated) + --quiet | -q disable progress reporting + -C DIR chdir to specified directory + +See public-inbox-clone(1) man page for --manifest, --remote-manifest, +--objstore, --project-list, --post-update-hook, --include, --exclude, +--prune, --keep-going, --jobs, --inbox-config +EOF + +# cgit calls it `project-list', grokmirror calls it `projectslist', +# support both :/ +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ + inbox-config=s inbox-version=i objstore=s manifest=s + remote-manifest=s project-list|projectslist=s post-update-hook=s@ + prune|p keep-going|k exit-code purge + dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Admin; # loads Config +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my ($url, $dst, $extra) = @ARGV; +die $help if !defined($url) || defined($extra); +defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!); +index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'"; + +# n.b. this is still a truckload of code... +require File::Spec; +require PublicInbox::LEI; +require PublicInbox::LeiExternal; +require PublicInbox::LeiMirror; + +$url = PublicInbox::LeiExternal::ext_canonicalize($url); +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-clone', + 0 => *STDIN{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +open $lei->{1}, '+<&=', 1 or die "dup: $!"; +open $lei->{3}, '.' or die "open . $!"; +my $mrr = bless { + lei => $lei, + src => $url, + dst => File::Spec->canonpath($dst), +}, 'PublicInbox::LeiMirror'; + +$? = 0; +$mrr->do_mirror; +$mrr->can('_wq_done_wait')->($$, $mrr, $lei); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-compact b/script/public-inbox-compact index dfebac1c..1062be5a 100755 --- a/script/public-inbox-compact +++ b/script/public-inbox-compact @@ -1,12 +1,12 @@ #!perl -w -# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -my $opt = { compact => 1, -coarse_lock => 1 }; +my $opt = { compact => 1, -coarse_lock => 1, + -eidx_ok => 1, -cidx_ok => 1 }; my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-compact INBOX_DIR +usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> Compact Xapian DBs in an inbox @@ -17,7 +17,7 @@ options: See public-inbox-compact(1) man page for full documentation. EOF -GetOptions($opt, qw(all help|h), +GetOptions($opt, qw(all C=s@ help|h), # compact options: qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F), ) or die $help; @@ -25,13 +25,20 @@ if ($opt->{help}) { print $help; exit 0 }; require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); PublicInbox::Admin::progress_prepare($opt); require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); -unless (@ibxs) { print STDERR $help; exit 1 } -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); +my $cfg = PublicInbox::Config->new; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'compact', $opt); +} diff --git a/script/public-inbox-convert b/script/public-inbox-convert index b61c743f..713c2881 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; @@ -33,50 +33,37 @@ my $opt = { quiet => -1, compact => 0, maxsize => undef, fsync => 1, reindex => 1, # we always reindex }; -GetOptions($opt, qw(jobs|j=i index! help|h), +GetOptions($opt, qw(jobs|j=i index! help|h C=s@), # index options qw(verbose|v+ rethread compact|c+ fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - sequential_shard|sequential-shard|seq-shard + sequential-shard|seq-shard )) or die $help; if ($opt->{help}) { print $help; exit 0 }; +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $old_dir = shift(@ARGV) // ''; my $new_dir = shift(@ARGV) // ''; die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require Cwd; -Cwd->import('abs_path'); require PublicInbox::Config; require PublicInbox::InboxWritable; -my $abs = abs_path($old_dir); -die "failed to resolve $old_dir: $!\n" if (!defined($abs)); - my $cfg = PublicInbox::Config->new; -my $old; -$cfg->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; -}); -if ($old) { - $old = PublicInbox::InboxWritable->new($old); -} else { +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = PublicInbox::InboxWritable->new({ - inboxdir => $old_dir, - name => 'ignored', - -primary_address => 'old@example.com', - address => [ 'old@example.com' ], - }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require File::Spec; -require PublicInbox::Admin; -my $detected = PublicInbox::Admin::detect_indexlevel($old); +my $detected = $old->detect_indexlevel; $old->{indexlevel} //= $detected; my $env; if ($opt->{'index'}) { @@ -88,12 +75,11 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = File::Spec->canonpath($new_dir); +$new->{inboxdir} = PublicInbox::Config::rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; -$old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; @@ -103,7 +89,8 @@ sub link_or_copy ($$) { File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; } -$old->with_umask(sub { +{ + my $restore = $old->with_umask; my $old_cfg = "$old->{inboxdir}/config"; local $ENV{GIT_CONFIG} = $old_cfg; my $new_cfg = "$new->{inboxdir}/all.git/config"; @@ -124,18 +111,16 @@ $old->with_umask(sub { my $desc = "$old->{inboxdir}/description"; link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; my $clone = "$old->{inboxdir}/cloneurl"; - if (-e $clone) { - warn <<""; + warn <<"" if -e $clone; $clone may not be valid after migrating to v2, not copying - } -}); +} my $state = ''; my $head = $old->{ref_head} || 'HEAD'; -my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); +my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head); $v2w->idx_init($opt); my $im = $v2w->importer; -my ($r, $w) = $im->gfi_start; +my $io = $im->gfi_start; my $h = '[0-9a-f]'; my %D; my $last; @@ -145,23 +130,17 @@ while (<$rd>) { } elsif (/^commit /) { $state = 'commit'; } elsif (/^data ([0-9]+)/) { - my $len = $1; - print $w $_ or $im->wfail; - while ($len) { - my $n = read($rd, my $tmp, $len) or die "read: $!"; - warn "$n != $len\n" if $n != $len; - $len -= $n; - print $w $tmp or $im->wfail; - } + print $io $_ or $im->wfail; + print $io PublicInbox::IO::read_all($rd, $1) or $im->wfail; next; } elsif ($state eq 'commit') { if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { - print $w "D $last\n" or $im->wfail; + print $io "D $last\n" or $im->wfail; } - print $w "M 100644 :$mark m\n" or $im->wfail; + print $io "M 100644 :$mark m\n" or $im->wfail; $last = 'm'; next; } @@ -169,20 +148,18 @@ while (<$rd>) { my $mark = delete $D{$1}; defined $mark or die "undeleted path: $1\n"; if ($last && $last ne 'd') { - print $w "D $last\n" or $im->wfail; + print $io "D $last\n" or $im->wfail; } - print $w "M 100644 :$mark d\n" or $im->wfail; + print $io "M 100644 :$mark d\n" or $im->wfail; $last = 'd'; next; } } last if $_ eq "done\n"; - print $w $_ or $im->wfail; + print $io $_ or $im->wfail; } -close $rd or die "close fast-export: $!\n"; -waitpid($pid, 0) or die "waitpid failed: $!\n"; -$? == 0 or die "fast-export failed: $?\n"; -$r = $w = undef; # v2w->done does the actual close and error checking +$rd->close or die "fast-export: \$?=$? \$!=$!\n"; +$io = undef; $v2w->done; if (my $old_mm = $old->mm) { $old->cleanup; @@ -193,7 +170,7 @@ if (my $old_mm = $old->mm) { $v2w->idx_init($opt); $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); - my $epoch0 = PublicInbox::Git->new($v2w->git_init(0)); + my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0)); chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); $v2w->last_epoch_commit(0, $cmt); } diff --git a/script/public-inbox-edit b/script/public-inbox-edit index a70614fc..88115d7c 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for editing messages in a public-inbox. @@ -33,9 +33,10 @@ See public-inbox-edit(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; -my @opt = qw(mid|m=s file|F=s raw); +my @opt = qw(mid|m=s file|F=s raw C=s@); GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help; if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" @@ -133,7 +134,7 @@ $mids } my %tmpopt = ( - TEMPLATE => 'public-inbox-edit-XXXXXX', + TEMPLATE => 'public-inbox-edit-XXXX', TMPDIR => 1, SUFFIX => $opt->{raw} ? '.eml' : '.mbox', ); @@ -183,11 +184,10 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + my $new_raw = PublicInbox::IO::read_all $new_fh; if (!$opt->{raw}) { - # get rid of the From we added - $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($new_raw); # check if user forgot to purge (in mutt) after editing if ($new_raw =~ /^From /sm) { diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex new file mode 100755 index 00000000..2e5a5d2c --- /dev/null +++ b/script/public-inbox-extindex @@ -0,0 +1,91 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] + + Create and update external (detached) search indices + + --no-fsync speed up indexing, risk corruption on power outage + --watch run persistently and watch for inbox updates + -L LEVEL `medium', or `full' (default: full) + --all index all configured inboxes + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + --gc perform garbage collection instead of indexing + --dedupe[=MSGID] fix prior deduplication errors (may be repeated) + --reindex index previously indexed inboxes + --fast only reindex unseen/stale messages + --verbose | -v increase verbosity (may be repeated) + --dry-run | -n dry-run on --dedupe + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-extindex(1) man page for full documentation. +EOF +my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i + fsync|sync! fast dangerous + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + dedupe:s@ gc commit-interval=i watch scan! dry-run|n + multi-pack-index! all C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); +local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $cfg = PublicInbox::Config->new; +my $eidx_dir = shift(@ARGV); +unless (defined $eidx_dir) { + if ($opt->{all} && $cfg->ALL) { + $eidx_dir = $cfg->ALL->{topdir}; + } else { + die "E: $help"; + } +} +my @ibxs; +if ($opt->{gc}) { + die "E: inbox paths must not be specified with --gc\n" if @ARGV; + for my $sw (qw(all watch dry-run dedupe)) { + die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; + } +} else { + @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +} +$opt->{'dry-run'} && !$opt->{dedupe} and + die "E: --dry-run only affects --dedupe\n"; +$opt->{fast} && !$opt->{reindex} and + die "E: --fast only affects --reindex\n"; + +PublicInbox::Admin::require_or_die(qw(-search)); +PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; +PublicInbox::Admin::progress_prepare($opt); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +local %ENV = (%ENV, %$env) if $env; +require PublicInbox::ExtSearchIdx; +my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); +if ($opt->{gc}) { + $eidx->attach_config($cfg); + $eidx->eidx_gc($opt); +} else { + if ($opt->{all}) { + $eidx->attach_config($cfg); + } else { + $eidx->attach_config($cfg, \@ibxs); + } + if ($opt->{watch}) { + $cfg = undef; # save memory only after SIGHUP + $eidx->eidx_watch($opt); + } else { + $eidx->eidx_sync($opt); + } +} diff --git a/script/public-inbox-fetch b/script/public-inbox-fetch new file mode 100755 index 00000000..6fd15328 --- /dev/null +++ b/script/public-inbox-fetch @@ -0,0 +1,39 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git fetch remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-fetch -C DESTINATION + + fetch remote public-inboxes + +options: + + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + -T NAME Name of remote(s) to try (may be repeated) + default: `origin' and `_grokmirror' + --exit-code exit with 127 if no updates + --verbose | -v increase verbosity (may be repeated) + --quiet | -q increase verbosity (may be repeated) + -C DIR chdir to specified directory +EOF +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ try-remote|T=s@ + prune|p + no-torsocks torsocks=s exit-code)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Fetch; # loads Admin +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch', + 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +PublicInbox::Fetch->do_fetch($lei, '.'); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd index 3befdab8..caceae20 100755 --- a/script/public-inbox-httpd +++ b/script/public-inbox-httpd @@ -1,48 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone HTTP server for public-inbox. -use strict; +use v5.12; use PublicInbox::Daemon; -BEGIN { - for (qw(Plack::Builder Plack::Util)) { - eval("require $_") or die "E: Plack is required for $0\n"; - } - Plack::Builder->import; - require PublicInbox::HTTP; - require PublicInbox::HTTPD; -} - -my %httpds; -my $app; -my $refresh = sub { - if (@ARGV) { - eval { $app = Plack::Util::load_psgi(@ARGV) }; - if ($@) { - die $@, -"$0 runs in /, command-line paths must be absolute\n"; - } - } else { - require PublicInbox::WWW; - my $www = PublicInbox::WWW->new; - $www->preload; - $app = builder { - eval { enable 'ReverseProxy' }; - $@ and warn -"Plack::Middleware::ReverseProxy missing,\n", -"URL generation for redirects may be wrong if behind a reverse proxy\n"; - - enable 'Head'; - sub { $www->call(@_) }; - }; - } -}; - -PublicInbox::Daemon::run('0.0.0.0:8080', $refresh, - sub ($$$) { # post_accept - my ($client, $addr, $srv) = @_; - my $fd = fileno($srv); - my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app); - PublicInbox::HTTP->new($client, $addr, $h), - }); +PublicInbox::Daemon::run('http://0.0.0.0:8080'); diff --git a/script/public-inbox-imapd b/script/public-inbox-imapd index 60f2e6d8..0c96cdbb 100755 --- a/script/public-inbox-imapd +++ b/script/public-inbox-imapd @@ -1,14 +1,8 @@ #!perl -w -# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone read-only IMAP server for public-inbox. -use strict; +use v5.12; use PublicInbox::Daemon; -use PublicInbox::IMAPdeflate; # loads PublicInbox::IMAP -use PublicInbox::IMAPD; -my $imapd = PublicInbox::IMAPD->new; -PublicInbox::Daemon::run('0.0.0.0:143', - sub { $imapd->refresh_groups(@_) }, # refresh - sub ($$$) { PublicInbox::IMAP->new($_[0], $imapd) }, # post_accept - $imapd); +PublicInbox::Daemon::run('imap://0.0.0.0:143'); diff --git a/script/public-inbox-index b/script/public-inbox-index index 5dad6ecb..a13e44bf 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -1,5 +1,5 @@ #!perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # Basic tool to create a Xapian search index for a public-inbox. # Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/> @@ -11,12 +11,13 @@ use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: usage: public-inbox-index [options] INBOX_DIR - Create and update search indices + Create and update per-inbox search indices options: --no-fsync speed up indexing, risk corruption on power outage -L LEVEL `basic', `medium', or `full' (default: full) + -E EXTINDEX update extindex (default: `all') --all index all configured inboxes --compact | -c run public-inbox-compact(1) after indexing --sequential-shard index Xapian shards sequentially for slow storage @@ -24,6 +25,8 @@ options: --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size --reindex index previously indexed data (if upgrading) + --since=DATE limit --reindex to changes after DATE + --until=DATE limit --reindex to changes before DATE --rethread regenerate thread IDs (if upgrading, use sparingly) --prune prune git storage on discontiguous history --verbose | -v increase verbosity (may be repeated) @@ -31,54 +34,99 @@ options: BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-index(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune - fsync|sync! xapian_only|xapian-only + fsync|sync! xapian_only|xapian-only dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - sequential_shard|seq-shard|sequential-shard - skip-docdata all help|h)) + since|after=s until|before=s + sequential-shard|seq-shard + multi-pack-index! + no-update-extindex update-extindex|E=s@ + fast-noop|F skip-docdata all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } +if ($opt->{reindex} && delete($opt->{'fast-noop'})) { + warn "--fast-noop ignored with --reindex\n"; +} # require lazily to speed up --help require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; # Config is loaded by Admin +$opt->{-use_cwd} = 1; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +require PublicInbox::InboxWritable; +my (@eidx, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + my $o = { %$opt }; + delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; + $eidx_seen{$topdir} //= + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); # detect_indexlevel may also set $ibx->{-skip_docdata} - my $detected = PublicInbox::Admin::detect_indexlevel($ibx); + my $detected = $ibx->detect_indexlevel; # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <<EOF if @eidx_unconfigured; +The following inboxes are unconfigured and will not be updated in +@$update_extindex:\n@eidx_unconfigured +EOF -# "Search::Xapian" includes SWIG "Xapian", too: -$opt->{compact} = 0 if !$mods->{'Search::Xapian'}; +$opt->{compact} = 0 if !$mods->{'Xapian'}; # (or old Search::Xapian) PublicInbox::Admin::require_or_die(keys %$mods); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; -require PublicInbox::InboxWritable; PublicInbox::Xapcmd::check_compact() if $opt->{compact}; PublicInbox::Admin::progress_prepare($opt); for my $ibx (@ibxs) { - $ibx = PublicInbox::InboxWritable->new($ibx); if ($opt->{compact} >= 2) { PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); } $ibx->{-no_fsync} = 1 if !$opt->{fsync}; + $ibx->{-dangerous} = 1 if $opt->{dangerous}; $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; my $ibx_opt = $opt; @@ -86,11 +134,23 @@ for my $ibx (@ibxs) { defined(my $v = $cfg->git_bool($s)) or die <<EOL; publicInbox.$ibx->{name}.indexSequentialShard not boolean EOL - $ibx_opt = { %$opt, sequential_shard => $v }; + $ibx_opt = { %$opt, 'sequential-shard' => $v }; } - PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { - local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; + local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } + last if $ibx_opt->{quit}; + next if $ibx->{-unconfigured} || !$nidx; + for my $eidx (@eidx) { + $eidx->attach_inbox($ibx); + } +} +my $pr = $opt->{-progress}; +for my $eidx (@eidx) { + $pr->("indexing $eidx->{topdir} ...\n") if $pr; + $eidx->eidx_sync($opt); + last if $opt->{quit}; } diff --git a/script/public-inbox-init b/script/public-inbox-init index c775eb31..cf6443f7 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -1,9 +1,10 @@ #!perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; +use autodie qw(open chmod close rename); use Fcntl qw(:DEFAULT); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] @@ -22,9 +23,10 @@ options: -V2 use scalable public-inbox-v2-format(5) -L LEVEL index level `basic', `medium', or `full' (default: full) --ng NEWSGROUP set NNTP newsgroup name + -c KEY=VALUE set additional config option(s) --skip-artnum=NUM NNTP article numbers to skip --skip-epoch=NUM epochs to skip (-V2 only) - -J JOBS number of indexing jobs (-V2 only), (default: 4) + -j JOBS number of indexing jobs (-V2 only), (default: 4) See public-inbox-init(1) man page for full documentation. EOF @@ -35,6 +37,7 @@ PublicInbox::Admin::require_or_die('-base'); my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help); my $skip_docdata; my $ng = ''; +my (@c_extra, @chdir); my %opts = ( 'V|version=i' => \$version, 'L|index-level|indexlevel=s' => \$indexlevel, @@ -44,6 +47,8 @@ my %opts = ( 'ng|newsgroup=s' => \$ng, 'skip-docdata' => \$skip_docdata, 'help|h' => \$show_help, + 'c=s@' => \@c_extra, + 'C=s@' => \@chdir, ); my $usage_cb = sub { print STDERR $help; @@ -51,12 +56,38 @@ my $usage_cb = sub { }; GetOptions(%opts) or $usage_cb->(); if ($show_help) { print $help; exit 0 }; -PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; my $name = shift @ARGV or $usage_cb->(); my $inboxdir = shift @ARGV or $usage_cb->(); my $http_url = shift @ARGV or $usage_cb->(); my (@address) = @ARGV; @address or $usage_cb->(); +PublicInbox::Admin::do_chdir(\@chdir); + +@c_extra = map { + my ($k, $v) = split(/=/, $_, 2); + defined($v) or die "Usage: -c KEY=VALUE\n"; + $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n"; + $k = lc($k); + if ($k eq 'newsgroup') { + die "newsgroup already set ($ng)\n" if $ng ne ''; + $ng = $v; + (); + } elsif ($k eq 'address') { + push @address, $v; # for conflict checking + (); + } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) { + die "$k not allowed via -c $_\n" + } elsif ($k eq 'indexlevel') { + defined($indexlevel) and + die "indexlevel already set ($indexlevel)\n"; + $indexlevel = $v; + (); + } else { + $_ + } +} @c_extra; + +PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and die "--newsgroup `$ng' is not valid\n"; @@ -65,8 +96,7 @@ $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and require PublicInbox::Config; my $pi_config = PublicInbox::Config->default_file; -require File::Basename; -my $dir = File::Basename::dirname($pi_config); +my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!); require File::Path; File::Path::mkpath($dir); # will croak on fatal errors @@ -79,7 +109,7 @@ PublicInbox::Lock::lock_acquire($lock_obj); # git-config will operate on this (and rename on success): require File::Temp; -my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXXXXXX', DIR => $dir); +my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir); # Now, we grab another lock to use git-config(1) locking, so it won't # wait on the lock, unlike some of our internal flock()-based locks. @@ -91,22 +121,18 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { warn "could not open config file: $lockfile: $!\n"; exit(255); }; -my $auto_unlink = UnlinkMe->new($lockfile); -my ($perm, %seen); +require PublicInbox::OnDestroy; +my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); +my $perm = 0644 & ~umask; +my %seen; if (-e $pi_config) { - open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; - my @st = stat($oh); + require PublicInbox::IO; + open(my $oh, '<', $pi_config); + my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n"; $perm = $st[2]; - defined $perm or die "(f)stat failed on $pi_config: $!\n"; - chmod($perm & 07777, $fh) or - die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } - print $fh $old or die "failed to write: $!\n"; - close $oh or die "failed to close $pi_config: $!\n"; + chmod($perm & 07777, $fh); + print $fh PublicInbox::IO::read_all($oh); + close $oh; # yes, this conflict checking is racy if multiple instances of this # script are run by the same $PI_DIR @@ -133,15 +159,14 @@ if (-e $pi_config) { $indexlevel //= $ibx->{indexlevel} if $ibx; } my $pi_config_tmp = $fh->filename; -close($fh) or die "failed to close $pi_config_tmp: $!\n"; +close($fh); my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -require File::Spec; -$inboxdir = File::Spec->canonpath($inboxdir); +$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; @@ -186,47 +211,41 @@ if ($skip_docdata) { $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); -require Cwd; -my $tmp = Cwd::abs_path($inboxdir); -defined($tmp) or die "failed to resolve $inboxdir: $!\n"; -$inboxdir = $tmp; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; + +my $f = "$inboxdir/description"; +if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { + print $fh "public inbox for $address[0]\n"; + close $fh; +} # needed for git prior to v2.1.0 -umask(0077) if defined $perm; +umask(0077); + +require PublicInbox::Spawn; +PublicInbox::Spawn->import(qw(run_die)); foreach my $addr (@address) { next if $seen{lc($addr)}; - PublicInbox::Import::run_die([@x, "--add", "$pfx.address", $addr]); + run_die([@x, "--add", "$pfx.address", $addr]); } -PublicInbox::Import::run_die([@x, "$pfx.url", $http_url]); -PublicInbox::Import::run_die([@x, "$pfx.inboxdir", $inboxdir]); +run_die([@x, "$pfx.url", $http_url]); +run_die([@x, "$pfx.inboxdir", $inboxdir]); if (defined($indexlevel)) { - PublicInbox::Import::run_die([@x, "$pfx.indexlevel", $indexlevel]); + run_die([@x, "$pfx.indexlevel", $indexlevel]); } -PublicInbox::Import::run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; - -# needed for git prior to v2.1.0 -if (defined $perm) { - chmod($perm & 07777, $pi_config_tmp) or - die "(f)chmod failed on future $pi_config: $!\n"; -} - -rename $pi_config_tmp, $pi_config or - die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; -$auto_unlink->DESTROY; - -package UnlinkMe; -use strict; - -sub new { - my ($klass, $file) = @_; - bless { file => $file }, $klass; +run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; + +for my $kv (@c_extra) { + my ($k, $v) = split(/=/, $kv, 2); + # git 2.30+ has --fixed-value for idempotent invocations, + # but that's too new to depend on in 2021. Perl quotemeta + # seems compatible enough for POSIX ERE which git uses + my $re = '^'.quotemeta($v).'$'; + run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]); } -sub DESTROY { - my $f = delete($_[0]->{file}); - unlink($f) if defined($f); -} -1; +# needed for git prior to v2.1.0 +chmod($perm & 07777, $pi_config_tmp); +rename $pi_config_tmp, $pi_config; +undef $auto_unlink; # trigger ->DESTROY diff --git a/script/public-inbox-learn b/script/public-inbox-learn index fb2d86ec..a955cdf6 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for training spam (via SpamAssassin) and removing messages from a @@ -28,6 +28,7 @@ use PublicInbox::Spamcheck::Spamc; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my %opt = (all => 0); GetOptions(\%opt, qw(all help|h)) or die $help; +use PublicInbox::Import; my $train = shift or die $help; if ($train !~ /\A(?:ham|spam|rm)\z/) { @@ -36,12 +37,13 @@ if ($train !~ /\A(?:ham|spam|rm)\z/) { die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; my $spamc = PublicInbox::Spamcheck::Spamc->new; -my $pi_config = PublicInbox::Config->new; +my $pi_cfg = PublicInbox::Config->new; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; +PublicInbox::Import::load_config($pi_cfg); my $err; my $mime = PublicInbox::Eml->new(do{ - local $/; - my $data = <STDIN>; - $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + my $data = PublicInbox::IO::read_all \*STDIN; + PublicInbox::Eml::strip_from($data); if ($train ne 'rm') { eval { @@ -65,6 +67,7 @@ sub remove_or_add ($$$$) { $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; $ibx = PublicInbox::InboxWritable->new($ibx); + $ibx->{indexlevel} = $ibx->detect_indexlevel; my $im = $ibx->importer(0); if ($train eq "rm") { @@ -87,7 +90,7 @@ sub remove_or_add ($$$$) { # spam is removed from all known inboxes since it is often Bcc:-ed if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { - $pi_config->each_inbox(sub { + $pi_cfg->each_inbox(sub { my ($ibx) = @_; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); @@ -102,7 +105,7 @@ if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { for ($mime->header('Cc'), $mime->header('To')) { foreach my $addr (PublicInbox::Address::emails($_)) { $addr = lc($addr); - $dests{$addr} //= $pi_config->lookup($addr) // 0; + $dests{$addr} //= $pi_cfg->lookup($addr) // 0; } } @@ -110,12 +113,12 @@ if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { my %seen; while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 - next if $seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $addr); } - my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime); + my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); for my $ibx (@$dests) { - next if $seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); } } diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 3ed5abb6..b463b07b 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Mail delivery agent for public-inbox, run from your MTA upon mail delivery @@ -16,8 +16,14 @@ use strict; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my ($ems, $emm, $show_help); my $precheck = 1; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # does this need a CLI switch? GetOptions('precheck!' => \$precheck, 'help|h' => \$show_help) or do { print STDERR $help; exit 1 }; +if ($show_help) { + print $help; + exit; +} my $do_exit = sub { my ($code) = shift; @@ -33,29 +39,32 @@ use PublicInbox::Filter::Base; use PublicInbox::InboxWritable; use PublicInbox::Spamcheck; -# n.b: hopefully we can setup the emergency path without bailing due to -# user error, we really want to setup the emergency destination ASAP +# n.b.: Hopefully we can set up the emergency path without bailing due to +# user error, we really want to set up the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); -my $str = do { local $/; <STDIN> }; -$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $str = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($str); $ems->prepare(\$str); my $eml = PublicInbox::Eml->new(\$str); -my $config = PublicInbox::Config->new; +my $cfg = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; -my $spamc = PublicInbox::Spamcheck::get($config, $key, $default); +my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default); my $dests = []; +PublicInbox::Import::load_config($cfg, $do_exit); + my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { - my $ibx = $config->lookup($recipient); # first check + my $ibx = $cfg->lookup($recipient); # first check push @$dests, $ibx if $ibx; } if (!scalar(@$dests)) { - $dests = PublicInbox::MDA->inboxes_for_list_id($config, $eml); + $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml); if (!scalar(@$dests) && !defined($recipient)) { - die "ORIGINAL_RECIPIENT not defined in ENV\n"; + warn "ORIGINAL_RECIPIENT not defined in ENV\n"; + $do_exit->(67); # EX_NOUSER } scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown } diff --git a/script/public-inbox-netd b/script/public-inbox-netd new file mode 100755 index 00000000..e8b1ca69 --- /dev/null +++ b/script/public-inbox-netd @@ -0,0 +1,6 @@ +#!/usr/bin/perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run(); diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd index f42db6fe..aca27383 100755 --- a/script/public-inbox-nntpd +++ b/script/public-inbox-nntpd @@ -1,15 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone NNTP server for public-inbox. -use strict; -use warnings; +use v5.12; use PublicInbox::Daemon; -use PublicInbox::NNTPdeflate; # loads PublicInbox::NNTP -use PublicInbox::NNTPD; -my $nntpd = PublicInbox::NNTPD->new; -PublicInbox::Daemon::run('0.0.0.0:119', - sub { $nntpd->refresh_groups }, # refresh - sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }, # post_accept - $nntpd); +PublicInbox::Daemon::run('nntp://0.0.0.0:119'); diff --git a/script/public-inbox-pop3d b/script/public-inbox-pop3d new file mode 100755 index 00000000..ec944aee --- /dev/null +++ b/script/public-inbox-pop3d @@ -0,0 +1,8 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Standalone POP3 server for public-inbox. +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run('pop3://0.0.0.0:110'); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 7bca11ea..618cfec4 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for purging messages entirely from a public-inbox. Currently @@ -26,14 +26,15 @@ See public-inbox-purge(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 }; -GetOptions($opt, @PublicInbox::AdminEdit::OPT) or die $help; +GetOptions($opt, @PublicInbox::AdminEdit::OPT, 'C=s@') or die $help; if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; <STDIN> }; -$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $data = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($data); my $n_purged = 0; foreach my $ibx (@ibxs) { diff --git a/script/public-inbox-watch b/script/public-inbox-watch index 55183ef2..9bcd42ed 100755 --- a/script/public-inbox-watch +++ b/script/public-inbox-watch @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> my $help = <<EOF; usage: public-inbox-watch @@ -11,15 +11,15 @@ use strict; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use IO::Handle; # ->autoflush use PublicInbox::Watch; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; use PublicInbox::Config; use PublicInbox::DS; -use PublicInbox::Sigfd; -use PublicInbox::Syscall qw($SFD_NONBLOCK); my $do_scan = 1; GetOptions('scan!' => \$do_scan, # undocumented, testing only 'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 }; if ($show_help) { print $help; exit 0 }; -my $oldset = PublicInbox::Sigfd::block_signals(); +PublicInbox::DS::block_signals(); STDOUT->autoflush(1); STDERR->autoflush(1); local $0 = $0; # local since this script may be eval-ed @@ -29,7 +29,8 @@ my $reload = sub { $watch->quit; $watch = PublicInbox::Watch->new(PublicInbox::Config->new); if ($watch) { - warn("I: reloaded\n"); + $watch->{sig} = $prev->{sig}; # prevent redundant signalfd + warn "# reloaded\n"; } else { warn("E: reloading failed\n"); $watch = $prev; @@ -39,10 +40,10 @@ my $reload = sub { if ($watch) { my $scan = sub { return if !$watch; - warn "I: scanning\n"; + warn "# scanning\n"; $watch->trigger_scan('full'); }; - my $quit = sub { + my $quit = sub { # may be called in IMAP/NNTP children $watch->quit if $watch; $watch = undef; $0 .= ' quitting'; @@ -53,15 +54,9 @@ if ($watch) { CHLD => \&PublicInbox::DS::enqueue_reap, }; $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit; + local @SIG{keys %$sig} = values(%$sig); # for non-signalfd/kqueue # --no-scan is only intended for testing atm, undocumented. PublicInbox::DS::requeue($scan) if $do_scan; - - my $sigfd = PublicInbox::Sigfd->new($sig, $SFD_NONBLOCK); - local %SIG = (%SIG, %$sig) if !$sigfd; - if (!$sigfd) { - PublicInbox::Sigfd::sig_setmask($oldset); - PublicInbox::DS->SetLoopTimeout(1000); - } - $watch->watch($sig, $oldset) while ($watch); + $watch->watch($sig) while ($watch); } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 84620175..fac54559 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -1,11 +1,10 @@ #!perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-xcpdb [options] INBOX_DIR +usage: public-inbox-xcpdb [options] <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> upgrade or reshard Xapian DB(s) used by public-inbox @@ -26,38 +25,46 @@ index options (see public-inbox-index(1) man page for full description): See public-inbox-xcpdb(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, fsync => 1 }; +my $opt = { quiet => -1, compact => 0, fsync => 1, + -eidx_ok => 1, -cidx_ok => 1 }; GetOptions($opt, qw( fsync|sync! compact|c reshard|R=i max_size|max-size=s batch_size|batch-size=s - sequential_shard|seq-shard|sequential-shard + sequential-shard|seq-shard jobs|j=i quiet|q verbose|v blocksize|b=s no-full|n fuller|F - all help|h)) or die $help; + all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); require PublicInbox::Config; my $cfg = PublicInbox::Config->new; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg) or - die $help; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); # we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't # know sizes, only doccounts $opt->{-idx_env} = $idx_env; -if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) { +if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) { warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; $opt->{jobs} = 0; } require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); - # we rely on --no-renumber to keep docids synched for NNTP +# we rely on --no-renumber to keep docids synched for NNTP(artnum) + IMAP(UID) +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); } + +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'cpdb', $opt); +} diff --git a/script/public-inbox.cgi b/script/public-inbox.cgi index 42ab17c9..3a430d5b 100755 --- a/script/public-inbox.cgi +++ b/script/public-inbox.cgi @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ or later <https://www.gnu.org/licenses/agpl-3.0.txt> # # Enables using PublicInbox::WWW as a CGI script |