diff options
Diffstat (limited to 'script')
-rwxr-xr-x | script/lei | 214 | ||||
-rwxr-xr-x | script/public-inbox-cindex | 102 | ||||
-rwxr-xr-x | script/public-inbox-clone | 70 | ||||
-rwxr-xr-x | script/public-inbox-compact | 27 | ||||
-rwxr-xr-x | script/public-inbox-convert | 55 | ||||
-rwxr-xr-x | script/public-inbox-edit | 13 | ||||
-rwxr-xr-x[-rw-r--r--] | script/public-inbox-extindex | 26 | ||||
-rwxr-xr-x | script/public-inbox-fetch | 39 | ||||
-rwxr-xr-x | script/public-inbox-httpd | 48 | ||||
-rwxr-xr-x | script/public-inbox-imapd | 12 | ||||
-rwxr-xr-x | script/public-inbox-index | 25 | ||||
-rwxr-xr-x | script/public-inbox-init | 87 | ||||
-rwxr-xr-x | script/public-inbox-learn | 12 | ||||
-rwxr-xr-x | script/public-inbox-mda | 21 | ||||
-rwxr-xr-x | script/public-inbox-netd | 6 | ||||
-rwxr-xr-x | script/public-inbox-nntpd | 15 | ||||
-rwxr-xr-x | script/public-inbox-pop3d | 8 | ||||
-rwxr-xr-x | script/public-inbox-purge | 9 | ||||
-rwxr-xr-x | script/public-inbox-watch | 25 | ||||
-rwxr-xr-x | script/public-inbox-xcpdb | 33 |
20 files changed, 556 insertions, 291 deletions
@@ -1,118 +1,144 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; -use Socket qw(AF_UNIX SOCK_SEQPACKET MSG_EOR pack_sockaddr_un); -use Errno qw(EINTR ECONNRESET); +use v5.12; +use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un); use PublicInbox::CmdIPC4; my $narg = 5; -my ($sock, $pwd); +my $sock; my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4'); my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do { + require PublicInbox::Syscall; + $recv_cmd = PublicInbox::Syscall->can('recv_cmd4'); + PublicInbox::Syscall->can('send_cmd4'); +} // do { + my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= ( + $ENV{XDG_CACHE_HOME} // + ( ($ENV{HOME} // '/nonexistent').'/.cache' ) + ).'/public-inbox/inline-c'; + if (!-d $inline_dir) { + require File::Path; + File::Path::make_path($inline_dir); + } require PublicInbox::Spawn; # takes ~50ms even if built *sigh* $recv_cmd = PublicInbox::Spawn->can('recv_cmd4'); PublicInbox::Spawn->can('send_cmd4'); -}; - -sub sigchld { - my ($sig) = @_; - my $flags = $sig ? POSIX::WNOHANG() : 0; - while (waitpid(-1, $flags) > 0) {} -} +} // die 'please install Inline::C or Socket::MsgHdr'; -sub exec_cmd { +my %pids; +my $sigchld = sub { + my $flags = scalar(@_) ? POSIX::WNOHANG() : 0; + for my $pid (keys %pids) { + delete($pids{$pid}) if waitpid($pid, $flags) == $pid; + } +}; +my @parent; +my $exec_cmd = sub { my ($fds, $argc, @argv) = @_; - my @m = (*STDIN{IO}, '<&=', *STDOUT{IO}, '>&=', *STDERR{IO}, '>&='); + my $parent = $$; + require POSIX; + my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO}); my @rdr; for my $fd (@$fds) { - my ($old_io, $mode) = splice(@m, 0, 2); - open(my $tmpfh, $mode, $fd) or die "open $mode$fd: $!"; - push @rdr, $old_io, $mode, $tmpfh; + open(my $newfh, '+<&=', $fd) or die "open +<&=$fd: $!"; + push @rdr, shift(@old), $newfh; } - require POSIX; # WNOHANG - $SIG{CHLD} = \&sigchld; - my $pid = fork // die "fork: $!"; - if ($pid == 0) { - my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); - while (my ($old_io, $mode, $tmpfh) = splice(@rdr, 0, 3)) { - open $old_io, $mode, $tmpfh or die "open $mode: $!"; + my $do_exec = sub { + my @non_std; # ex. $op_p from lei_edit_search + while (my ($io, $newfh) = splice(@rdr, 0, 2)) { + my $old_io = !!$io; + open $io, '+<&', $newfh or die "open +<&=: $!"; + push @non_std, $io unless $old_io; + } + if (@non_std) { + require Fcntl; + fcntl($_, Fcntl::F_SETFD(), 0) for @non_std; } - %ENV = (%ENV, %env); + my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); + @ENV{keys %env} = values %env; + umask 077; exec(@argv); - die "exec: @argv: $!"; + warn "exec: @argv: $!\n"; + POSIX::_exit(1); + }; + $SIG{CHLD} = $sigchld; + my $pid = fork // die "fork: $!"; + if ($pid == 0) { + $do_exec->() if $fds->[1]; # git-credential, pager + + # parent backgrounds on MUA + POSIX::setsid() > 0 or die "setsid: $!"; + @parent = ($parent); + return; # continue $recv_cmd in background } -} + if ($fds->[1]) { + $pids{$pid} = undef; + } else { + $do_exec->(); # MUA reuses stdout + } +}; -if ($send_cmd && eval { - my $path = do { - my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; - if ($runtime_dir eq '/lei') { - require File::Spec; - $runtime_dir = File::Spec->tmpdir."/lei-$<"; - } - unless (-d $runtime_dir) { - require File::Path; - File::Path::mkpath($runtime_dir, 0, 0700); - } - "$runtime_dir/$narg.seq.sock"; - }; - my $addr = pack_sockaddr_un($path); - socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; - unless (connect($sock, $addr)) { # start the daemon if not started - local $ENV{PERL5LIB} = join(':', @INC); - open(my $daemon, '-|', $^X, qw[-MPublicInbox::LEI - -E PublicInbox::LEI::lazy_start(@ARGV)], - $path, $! + 0, $narg) or die "popen: $!"; - while (<$daemon>) { warn $_ } # EOF when STDERR is redirected - close($daemon) or warn <<""; +my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; +if ($runtime_dir eq '/lei') { + require File::Spec; + $runtime_dir = File::Spec->tmpdir."/lei-$<"; +} +unless (-d $runtime_dir) { + require File::Path; + File::Path::make_path($runtime_dir, { mode => 0700 }); +} +my $path = "$runtime_dir/$narg.seq.sock"; +my $addr = pack_sockaddr_un($path); +socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; +unless (connect($sock, $addr)) { # start the daemon if not started + local $ENV{PERL5LIB} = join(':', @INC); + open(my $daemon, '-|', $^X, $^W ? ('-w') : (), + qw[-MPublicInbox::LEI -e PublicInbox::LEI::lazy_start(@ARGV)], + $path, $! + 0, $narg) or die "popen: $!"; + while (<$daemon>) { warn $_ } # EOF when STDERR is redirected + close($daemon) or warn <<""; lei-daemon could not start, exited with \$?=$? - # try connecting again anyways, unlink+bind may be racy - connect($sock, $addr) or die <<""; + # try connecting again anyways, unlink+bind may be racy + connect($sock, $addr) or die <<""; connect($path): $! (after attempted daemon start) -Falling back to (slow) one-shot mode +} +# (Socket::MsgHdr|Inline::C), $sock are all available: +open my $dh, '<', '.' or die "open(.) $!"; +my $buf = join("\0", scalar(@ARGV), @ARGV); +while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" } +$buf .= "\0\0"; +$send_cmd->($sock, [0, 1, 2, fileno($dh)], $buf, 0) or die "sendmsg: $!"; +$SIG{TSTP} = sub { send($sock, 'STOP', 0); kill 'STOP', $$ }; +$SIG{CONT} = sub { send($sock, 'CONT', 0) }; + +my $x_it_code = 0; +while (1) { + my (@fds) = $recv_cmd->($sock, my $buf, 4096 * 33); + die "recvmsg: $!" if scalar(@fds) == 1 && !defined($fds[0]); + last if $buf eq ''; + if ($buf =~ /\Aexec (.+)\z/) { + $exec_cmd->(\@fds, split(/\0/, $1)); + } elsif ($buf eq '-WINCH') { + kill($buf, @parent); # for MUA + } elsif ($buf eq 'umask') { + send($sock, 'u'.pack('V', umask), 0) or die "send: $!" + } elsif ($buf =~ /\Ax_it ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + last; + } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + } elsif ($buf eq 'wait') { + $sigchld->(); + } else { + $sigchld->(); + die $buf; } - 1; -}) { # (Socket::MsgHdr|Inline::C), $sock, $pwd are all available: - open my $dh, '<', '.' or die "open(.) $!"; - my $buf = join("\0", scalar(@ARGV), @ARGV); - while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" } - $buf .= "\0\0"; - $send_cmd->($sock, [ 0, 1, 2, fileno($dh) ], $buf, MSG_EOR); - $SIG{TERM} = $SIG{INT} = $SIG{QUIT} = sub { - my ($sig) = @_; # 'TERM', not an integer :< - $SIG{$sig} = 'DEFAULT'; - kill($sig, $$); # exit($signo + 128) - }; - my $x_it_code = 0; - while (1) { - my (@fds) = $recv_cmd->($sock, $buf, 4096 * 33); - if (scalar(@fds) == 1 && !defined($fds[0])) { - last if $! == ECONNRESET; - next if $! == EINTR; - die "recvmsg: $!"; - } - last if $buf eq ''; - if ($buf =~ /\Ax_it ([0-9]+)\z/) { - $x_it_code = $1 + 0; - last; - } elsif ($buf =~ /\Aexec (.+)\z/) { - exec_cmd(\@fds, split(/\0/, $1)); - } else { - sigchld(); - die $buf; - } - } - sigchld(); - if (my $sig = ($x_it_code & 127)) { - kill $sig, $$; - sleep; - } - exit($x_it_code >> 8); -} else { # for systems lacking Socket::MsgHdr or Inline::C - warn $@ if $@; - require PublicInbox::LEI; - PublicInbox::LEI::oneshot(__PACKAGE__); } +$sigchld->(); +if (my $sig = ($x_it_code & 127)) { + kill $sig, $$; + sleep(1) while 1; # no self-pipe/signalfd, here, so we loop +} +exit($x_it_code >> 8); diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex new file mode 100755 index 00000000..dd00623a --- /dev/null +++ b/script/public-inbox-cindex @@ -0,0 +1,102 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]... +usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT + + Create and update search indices for code repos + + -d EXTDIR use EXTDIR instead of GIT_DIR/public-inbox-cindex + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `medium', or `full' (default: medium) + --project-list=FILE use a cgit/gitweb-compatible list of projects + --update | -u update previously-indexed code repos with `-d' + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index commit diffs larger than the given size + --prune prune old repos and commits + --reindex reindex previously indexed repos + --verbose | -v increase verbosity (may be repeated) + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-cindex(1) man page for full documentation. +EOF +my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden +GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous + indexlevel|index-level|L=s join:s@ + batch_size|batch-size=s max_size|max-size=s + include|I=s@ only=s@ all show:s@ + project-list=s exclude=s@ project-root|r=s + git-dir|g=s@ + sort-parallel=s sort-compress-program=s sort-buffer-size=s + d=s update|u scan! prune dry-run|n C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); +$SIG{USR1} = 'IGNORE'; # to be overridden in cidx_sync +$SIG{PIPE} = 'IGNORE'; +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $cfg = $opt->{-pi_cfg} = PublicInbox::Config->new; +my $cidx_dir = $opt->{d}; +PublicInbox::Admin::require_or_die('Xapian'); +PublicInbox::Admin::progress_prepare($opt); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +%ENV = (%ENV, %$env) if $env; + +my @git_dirs; +require PublicInbox::CodeSearchIdx; # unstable internal API +if (@ARGV) { + my @g = map { "-g $_" } @ARGV; + die <<EOM; +Specify git directories with `-g' (or --git-dir=): @g +Or use --project-list=... and --project-root=... +EOM +} elsif (defined(my $pl = $opt->{'project-list'})) { + my $pfx = $opt->{'project-root'} // die <<EOM; +PROJECT_ROOT required for --project-list +EOM + $opt->{'git-dir'} and die <<EOM; +--project-list does not accept additional --git-dir directories +(@{$opt->{'git-dir'}}) +EOM + open my $fh, '<', $pl or die "open($pl): $!\n"; + chomp(@git_dirs = <$fh>); + $pfx .= '/'; + $pfx =~ tr!/!/!s; + substr($_, 0, 0, $pfx) for @git_dirs; +} elsif (my $gd = $opt->{'git-dir'}) { + @git_dirs = @$gd; +} elsif (grep defined, @$opt{qw(show update prune)}) { +} else { + warn "No --git-dir= nor --project-list= + --project-root= specified\n"; + die $help; +} + +$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs; +if (defined $cidx_dir) { # external index + die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/; + my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt); + @{$cidx->{git_dirs}} = @git_dirs; # may be empty + $cidx->cidx_run; +} elsif (!@git_dirs) { + die $help +} else { + die <<EOM if $opt->{update}; +--update requires `-d EXTDIR' +EOM + for my $gd (@git_dirs) { + my $cd = "$gd/public-inbox-cindex"; + my $cidx = PublicInbox::CodeSearchIdx->new($cd, { %$opt }); + $cidx->{-cidx_internal} = 1; + @{$cidx->{git_dirs}} = ($gd); + $cidx->cidx_run; + } +} diff --git a/script/public-inbox-clone b/script/public-inbox-clone new file mode 100755 index 00000000..c3e64485 --- /dev/null +++ b/script/public-inbox-clone @@ -0,0 +1,70 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git clone remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-clone [OPTIONS] INBOX_URL [INBOX_DIR] + public-inbox-clone [OPTIONS] ROOT_URL [DESTINATION] + + clone remote public-inboxes or grokmirror manifests + +options: + + --epoch=RANGE range of v2 epochs to clone (e.g `2..5', `~0', `~1..') + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + --dry-run | -n show what would be cloned without cloning + --verbose | -v increase verbosity (may be repeated) + --quiet | -q disable progress reporting + -C DIR chdir to specified directory + +See public-inbox-clone(1) man page for --manifest, --remote-manifest, +--objstore, --project-list, --post-update-hook, --include, --exclude, +--prune, --keep-going, --jobs, --inbox-config +EOF + +# cgit calls it `project-list', grokmirror calls it `projectslist', +# support both :/ +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ + inbox-config=s inbox-version=i objstore=s manifest=s + remote-manifest=s project-list|projectslist=s post-update-hook=s@ + prune|p keep-going|k exit-code purge + dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Admin; # loads Config +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my ($url, $dst, $extra) = @ARGV; +die $help if !defined($url) || defined($extra); +defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!); +index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'"; + +# n.b. this is still a truckload of code... +require File::Spec; +require PublicInbox::LEI; +require PublicInbox::LeiExternal; +require PublicInbox::LeiMirror; + +$url = PublicInbox::LeiExternal::ext_canonicalize($url); +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-clone', + 0 => *STDIN{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +open $lei->{1}, '+<&=', 1 or die "dup: $!"; +open $lei->{3}, '.' or die "open . $!"; +my $mrr = bless { + lei => $lei, + src => $url, + dst => File::Spec->canonpath($dst), +}, 'PublicInbox::LeiMirror'; + +$? = 0; +$mrr->do_mirror; +$mrr->can('_wq_done_wait')->($$, $mrr, $lei); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-compact b/script/public-inbox-compact index ab1d1e5e..1062be5a 100755 --- a/script/public-inbox-compact +++ b/script/public-inbox-compact @@ -1,12 +1,12 @@ #!perl -w -# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -my $opt = { compact => 1, -coarse_lock => 1 }; +my $opt = { compact => 1, -coarse_lock => 1, + -eidx_ok => 1, -cidx_ok => 1 }; my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-compact INBOX_DIR +usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> Compact Xapian DBs in an inbox @@ -17,7 +17,7 @@ options: See public-inbox-compact(1) man page for full documentation. EOF -GetOptions($opt, qw(all help|h), +GetOptions($opt, qw(all C=s@ help|h), # compact options: qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F), ) or die $help; @@ -25,13 +25,20 @@ if ($opt->{help}) { print $help; exit 0 }; require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); PublicInbox::Admin::progress_prepare($opt); require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); -unless (@ibxs) { print STDERR $help; exit 1 } -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); +my $cfg = PublicInbox::Config->new; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'compact', $opt); +} diff --git a/script/public-inbox-convert b/script/public-inbox-convert index 3c627b79..713c2881 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2018-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; @@ -33,21 +33,22 @@ my $opt = { quiet => -1, compact => 0, maxsize => undef, fsync => 1, reindex => 1, # we always reindex }; -GetOptions($opt, qw(jobs|j=i index! help|h), +GetOptions($opt, qw(jobs|j=i index! help|h C=s@), # index options qw(verbose|v+ rethread compact|c+ fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - sequential_shard|sequential-shard|seq-shard + sequential-shard|seq-shard )) or die $help; if ($opt->{help}) { print $help; exit 0 }; +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $old_dir = shift(@ARGV) // ''; my $new_dir = shift(@ARGV) // ''; die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require PublicInbox::Admin; require PublicInbox::Config; require PublicInbox::InboxWritable; @@ -62,8 +63,7 @@ if (delete $old->{-unconfigured}) { } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require PublicInbox::Admin; -my $detected = PublicInbox::Admin::detect_indexlevel($old); +my $detected = $old->detect_indexlevel; $old->{indexlevel} //= $detected; my $env; if ($opt->{'index'}) { @@ -75,7 +75,7 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir); +$new->{inboxdir} = PublicInbox::Config::rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; @@ -89,7 +89,8 @@ sub link_or_copy ($$) { File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; } -$old->with_umask(sub { +{ + my $restore = $old->with_umask; my $old_cfg = "$old->{inboxdir}/config"; local $ENV{GIT_CONFIG} = $old_cfg; my $new_cfg = "$new->{inboxdir}/all.git/config"; @@ -110,18 +111,16 @@ $old->with_umask(sub { my $desc = "$old->{inboxdir}/description"; link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; my $clone = "$old->{inboxdir}/cloneurl"; - if (-e $clone) { - warn <<""; + warn <<"" if -e $clone; $clone may not be valid after migrating to v2, not copying - } -}); +} my $state = ''; my $head = $old->{ref_head} || 'HEAD'; -my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); +my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head); $v2w->idx_init($opt); my $im = $v2w->importer; -my ($r, $w) = $im->gfi_start; +my $io = $im->gfi_start; my $h = '[0-9a-f]'; my %D; my $last; @@ -131,23 +130,17 @@ while (<$rd>) { } elsif (/^commit /) { $state = 'commit'; } elsif (/^data ([0-9]+)/) { - my $len = $1; - print $w $_ or $im->wfail; - while ($len) { - my $n = read($rd, my $tmp, $len) or die "read: $!"; - warn "$n != $len\n" if $n != $len; - $len -= $n; - print $w $tmp or $im->wfail; - } + print $io $_ or $im->wfail; + print $io PublicInbox::IO::read_all($rd, $1) or $im->wfail; next; } elsif ($state eq 'commit') { if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { - print $w "D $last\n" or $im->wfail; + print $io "D $last\n" or $im->wfail; } - print $w "M 100644 :$mark m\n" or $im->wfail; + print $io "M 100644 :$mark m\n" or $im->wfail; $last = 'm'; next; } @@ -155,20 +148,18 @@ while (<$rd>) { my $mark = delete $D{$1}; defined $mark or die "undeleted path: $1\n"; if ($last && $last ne 'd') { - print $w "D $last\n" or $im->wfail; + print $io "D $last\n" or $im->wfail; } - print $w "M 100644 :$mark d\n" or $im->wfail; + print $io "M 100644 :$mark d\n" or $im->wfail; $last = 'd'; next; } } last if $_ eq "done\n"; - print $w $_ or $im->wfail; + print $io $_ or $im->wfail; } -close $rd or die "close fast-export: $!\n"; -waitpid($pid, 0) or die "waitpid failed: $!\n"; -$? == 0 or die "fast-export failed: $?\n"; -$r = $w = undef; # v2w->done does the actual close and error checking +$rd->close or die "fast-export: \$?=$? \$!=$!\n"; +$io = undef; $v2w->done; if (my $old_mm = $old->mm) { $old->cleanup; @@ -179,7 +170,7 @@ if (my $old_mm = $old->mm) { $v2w->idx_init($opt); $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); - my $epoch0 = PublicInbox::Git->new($v2w->git_init(0)); + my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0)); chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); $v2w->last_epoch_commit(0, $cmt); } diff --git a/script/public-inbox-edit b/script/public-inbox-edit index 1c6c4e4a..88115d7c 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for editing messages in a public-inbox. @@ -33,9 +33,10 @@ See public-inbox-edit(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; -my @opt = qw(mid|m=s file|F=s raw); +my @opt = qw(mid|m=s file|F=s raw C=s@); GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help; if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" @@ -133,7 +134,7 @@ $mids } my %tmpopt = ( - TEMPLATE => 'public-inbox-edit-XXXXXX', + TEMPLATE => 'public-inbox-edit-XXXX', TMPDIR => 1, SUFFIX => $opt->{raw} ? '.eml' : '.mbox', ); @@ -183,12 +184,10 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - defined(my $new_raw = do { local $/; <$new_fh> }) or die - "read $edit_fn: $!\n"; + my $new_raw = PublicInbox::IO::read_all $new_fh; if (!$opt->{raw}) { - # get rid of the From we added - $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($new_raw); # check if user forgot to purge (in mutt) after editing if ($new_raw =~ /^From /sm) { diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 15ac20eb..2e5a5d2c 100644..100755 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -1,7 +1,6 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# Basic tool to create a Xapian search index for a public-inbox. use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); @@ -18,18 +17,22 @@ usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size --gc perform garbage collection instead of indexing + --dedupe[=MSGID] fix prior deduplication errors (may be repeated) + --reindex index previously indexed inboxes + --fast only reindex unseen/stale messages --verbose | -v increase verbosity (may be repeated) + --dry-run | -n dry-run on --dedupe BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. EOF my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i - fsync|sync! + fsync|sync! fast dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc commit-interval=i watch scan! - all help|h)) + dedupe:s@ gc commit-interval=i watch scan! dry-run|n + multi-pack-index! all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; @@ -39,6 +42,7 @@ STDERR->autoflush(1); local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync # require lazily to speed up --help require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; my $eidx_dir = shift(@ARGV); unless (defined $eidx_dir) { @@ -51,11 +55,17 @@ unless (defined $eidx_dir) { my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible with --gc\n" if $opt->{all}; - die "E: --watch is not compatible with --gc\n" if $opt->{watch}; + for my $sw (qw(all watch dry-run dedupe)) { + die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; + } } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } +$opt->{'dry-run'} && !$opt->{dedupe} and + die "E: --dry-run only affects --dedupe\n"; +$opt->{fast} && !$opt->{reindex} and + die "E: --fast only affects --reindex\n"; + PublicInbox::Admin::require_or_die(qw(-search)); PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; PublicInbox::Admin::progress_prepare($opt); @@ -70,7 +80,7 @@ if ($opt->{gc}) { if ($opt->{all}) { $eidx->attach_config($cfg); } else { - $eidx->attach_inbox($_) for @ibxs; + $eidx->attach_config($cfg, \@ibxs); } if ($opt->{watch}) { $cfg = undef; # save memory only after SIGHUP diff --git a/script/public-inbox-fetch b/script/public-inbox-fetch new file mode 100755 index 00000000..6fd15328 --- /dev/null +++ b/script/public-inbox-fetch @@ -0,0 +1,39 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git fetch remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-fetch -C DESTINATION + + fetch remote public-inboxes + +options: + + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + -T NAME Name of remote(s) to try (may be repeated) + default: `origin' and `_grokmirror' + --exit-code exit with 127 if no updates + --verbose | -v increase verbosity (may be repeated) + --quiet | -q increase verbosity (may be repeated) + -C DIR chdir to specified directory +EOF +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ try-remote|T=s@ + prune|p + no-torsocks torsocks=s exit-code)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Fetch; # loads Admin +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch', + 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +PublicInbox::Fetch->do_fetch($lei, '.'); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd index b31b896d..caceae20 100755 --- a/script/public-inbox-httpd +++ b/script/public-inbox-httpd @@ -1,48 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone HTTP server for public-inbox. -use strict; +use v5.12; use PublicInbox::Daemon; -BEGIN { - for (qw(Plack::Builder Plack::Util)) { - eval("require $_") or die "E: Plack is required for $0\n"; - } - Plack::Builder->import; - require PublicInbox::HTTP; - require PublicInbox::HTTPD; -} - -my %httpds; -my $app; -my $refresh = sub { - if (@ARGV) { - eval { $app = Plack::Util::load_psgi(@ARGV) }; - if ($@) { - die $@, -"$0 runs in /, command-line paths must be absolute\n"; - } - } else { - require PublicInbox::WWW; - my $www = PublicInbox::WWW->new; - $www->preload; - $app = builder { - eval { enable 'ReverseProxy' }; - $@ and warn -"Plack::Middleware::ReverseProxy missing,\n", -"URL generation for redirects may be wrong if behind a reverse proxy\n"; - - enable 'Head'; - sub { $www->call(@_) }; - }; - } -}; - -PublicInbox::Daemon::run('0.0.0.0:8080', $refresh, - sub ($$$) { # post_accept - my ($client, $addr, $srv) = @_; - my $fd = fileno($srv); - my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app); - PublicInbox::HTTP->new($client, $addr, $h), - }); +PublicInbox::Daemon::run('http://0.0.0.0:8080'); diff --git a/script/public-inbox-imapd b/script/public-inbox-imapd index 6b755938..0c96cdbb 100755 --- a/script/public-inbox-imapd +++ b/script/public-inbox-imapd @@ -1,14 +1,8 @@ #!perl -w -# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone read-only IMAP server for public-inbox. -use strict; +use v5.12; use PublicInbox::Daemon; -use PublicInbox::IMAPdeflate; # loads PublicInbox::IMAP -use PublicInbox::IMAPD; -my $imapd = PublicInbox::IMAPD->new; -PublicInbox::Daemon::run('0.0.0.0:143', - sub { $imapd->refresh_groups(@_) }, # refresh - sub ($$$) { PublicInbox::IMAP->new($_[0], $imapd) }, # post_accept - $imapd); +PublicInbox::Daemon::run('imap://0.0.0.0:143'); diff --git a/script/public-inbox-index b/script/public-inbox-index index 33169bd0..a13e44bf 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -25,6 +25,8 @@ options: --batch-size=BYTES flush changes to OS after a given number of bytes --max-size=BYTES do not index messages larger than the given size --reindex index previously indexed data (if upgrading) + --since=DATE limit --reindex to changes after DATE + --until=DATE limit --reindex to changes before DATE --rethread regenerate thread IDs (if upgrading, use sparingly) --prune prune git storage on discontiguous history --verbose | -v increase verbosity (may be repeated) @@ -37,12 +39,14 @@ my $opt = { 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune - fsync|sync! xapian_only|xapian-only + fsync|sync! xapian_only|xapian-only dangerous indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - sequential_shard|seq-shard|sequential-shard + since|after=s until|before=s + sequential-shard|seq-shard + multi-pack-index! no-update-extindex update-extindex|E=s@ - fast-noop|F skip-docdata all help|h)) + fast-noop|F skip-docdata all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; @@ -56,12 +60,14 @@ if ($opt->{reindex} && delete($opt->{'fast-noop'})) { # require lazily to speed up --help require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); my $cfg = PublicInbox::Config->new; # Config is loaded by Admin $opt->{-use_cwd} = 1; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +require PublicInbox::InboxWritable; my (@eidx, %eidx_seen); my $update_extindex = $opt->{'update-extindex'}; @@ -92,8 +98,9 @@ for my $ei_name (@$update_extindex) { my $mods = {}; my @eidx_unconfigured; foreach my $ibx (@ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); # detect_indexlevel may also set $ibx->{-skip_docdata} - my $detected = PublicInbox::Admin::detect_indexlevel($ibx); + my $detected = $ibx->detect_indexlevel; # XXX: users can shoot themselves in the foot, with opt->{indexlevel} $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); @@ -107,21 +114,19 @@ The following inboxes are unconfigured and will not be updated in @$update_extindex:\n@eidx_unconfigured EOF -# "Search::Xapian" includes SWIG "Xapian", too: -$opt->{compact} = 0 if !$mods->{'Search::Xapian'}; +$opt->{compact} = 0 if !$mods->{'Xapian'}; # (or old Search::Xapian) PublicInbox::Admin::require_or_die(keys %$mods); my $env = PublicInbox::Admin::index_prepare($opt, $cfg); local %ENV = (%ENV, %$env) if $env; -require PublicInbox::InboxWritable; PublicInbox::Xapcmd::check_compact() if $opt->{compact}; PublicInbox::Admin::progress_prepare($opt); for my $ibx (@ibxs) { - $ibx = PublicInbox::InboxWritable->new($ibx); if ($opt->{compact} >= 2) { PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); } $ibx->{-no_fsync} = 1 if !$opt->{fsync}; + $ibx->{-dangerous} = 1 if $opt->{dangerous}; $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; my $ibx_opt = $opt; @@ -129,12 +134,12 @@ for my $ibx (@ibxs) { defined(my $v = $cfg->git_bool($s)) or die <<EOL; publicInbox.$ibx->{name}.indexSequentialShard not boolean EOL - $ibx_opt = { %$opt, sequential_shard => $v }; + $ibx_opt = { %$opt, 'sequential-shard' => $v }; } my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { - local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; + local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } last if $ibx_opt->{quit}; diff --git a/script/public-inbox-init b/script/public-inbox-init index 6a867a22..cf6443f7 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -1,9 +1,10 @@ #!perl -w -# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> use strict; use v5.10.1; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; +use autodie qw(open chmod close rename); use Fcntl qw(:DEFAULT); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] @@ -22,9 +23,10 @@ options: -V2 use scalable public-inbox-v2-format(5) -L LEVEL index level `basic', `medium', or `full' (default: full) --ng NEWSGROUP set NNTP newsgroup name + -c KEY=VALUE set additional config option(s) --skip-artnum=NUM NNTP article numbers to skip --skip-epoch=NUM epochs to skip (-V2 only) - -J JOBS number of indexing jobs (-V2 only), (default: 4) + -j JOBS number of indexing jobs (-V2 only), (default: 4) See public-inbox-init(1) man page for full documentation. EOF @@ -35,6 +37,7 @@ PublicInbox::Admin::require_or_die('-base'); my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help); my $skip_docdata; my $ng = ''; +my (@c_extra, @chdir); my %opts = ( 'V|version=i' => \$version, 'L|index-level|indexlevel=s' => \$indexlevel, @@ -44,6 +47,8 @@ my %opts = ( 'ng|newsgroup=s' => \$ng, 'skip-docdata' => \$skip_docdata, 'help|h' => \$show_help, + 'c=s@' => \@c_extra, + 'C=s@' => \@chdir, ); my $usage_cb = sub { print STDERR $help; @@ -51,12 +56,38 @@ my $usage_cb = sub { }; GetOptions(%opts) or $usage_cb->(); if ($show_help) { print $help; exit 0 }; -PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; my $name = shift @ARGV or $usage_cb->(); my $inboxdir = shift @ARGV or $usage_cb->(); my $http_url = shift @ARGV or $usage_cb->(); my (@address) = @ARGV; @address or $usage_cb->(); +PublicInbox::Admin::do_chdir(\@chdir); + +@c_extra = map { + my ($k, $v) = split(/=/, $_, 2); + defined($v) or die "Usage: -c KEY=VALUE\n"; + $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n"; + $k = lc($k); + if ($k eq 'newsgroup') { + die "newsgroup already set ($ng)\n" if $ng ne ''; + $ng = $v; + (); + } elsif ($k eq 'address') { + push @address, $v; # for conflict checking + (); + } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) { + die "$k not allowed via -c $_\n" + } elsif ($k eq 'indexlevel') { + defined($indexlevel) and + die "indexlevel already set ($indexlevel)\n"; + $indexlevel = $v; + (); + } else { + $_ + } +} @c_extra; + +PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and die "--newsgroup `$ng' is not valid\n"; @@ -65,8 +96,7 @@ $ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and require PublicInbox::Config; my $pi_config = PublicInbox::Config->default_file; -require File::Basename; -my $dir = File::Basename::dirname($pi_config); +my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!); require File::Path; File::Path::mkpath($dir); # will croak on fatal errors @@ -79,7 +109,7 @@ PublicInbox::Lock::lock_acquire($lock_obj); # git-config will operate on this (and rename on success): require File::Temp; -my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXXXXXX', DIR => $dir); +my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir); # Now, we grab another lock to use git-config(1) locking, so it won't # wait on the lock, unlike some of our internal flock()-based locks. @@ -92,18 +122,17 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { exit(255); }; require PublicInbox::OnDestroy; -my $auto_unlink = PublicInbox::OnDestroy->new($$, sub { unlink $lockfile }); -my ($perm, %seen); +my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); +my $perm = 0644 & ~umask; +my %seen; if (-e $pi_config) { - open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; - my @st = stat($oh); + require PublicInbox::IO; + open(my $oh, '<', $pi_config); + my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n"; $perm = $st[2]; - defined $perm or die "(f)stat failed on $pi_config: $!\n"; - chmod($perm & 07777, $fh) or - die "(f)chmod failed on future $pi_config: $!\n"; - defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; - print $fh $old or die "failed to write: $!\n"; - close $oh or die "failed to close $pi_config: $!\n"; + chmod($perm & 07777, $fh); + print $fh PublicInbox::IO::read_all($oh); + close $oh; # yes, this conflict checking is racy if multiple instances of this # script are run by the same $PI_DIR @@ -130,7 +159,7 @@ if (-e $pi_config) { $indexlevel //= $ibx->{indexlevel} if $ibx; } my $pi_config_tmp = $fh->filename; -close($fh) or die "failed to close $pi_config_tmp: $!\n"; +close($fh); my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); @@ -183,8 +212,14 @@ if ($skip_docdata) { } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); +my $f = "$inboxdir/description"; +if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { + print $fh "public inbox for $address[0]\n"; + close $fh; +} + # needed for git prior to v2.1.0 -umask(0077) if defined $perm; +umask(0077); require PublicInbox::Spawn; PublicInbox::Spawn->import(qw(run_die)); @@ -201,12 +236,16 @@ if (defined($indexlevel)) { } run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; -# needed for git prior to v2.1.0 -if (defined $perm) { - chmod($perm & 07777, $pi_config_tmp) or - die "(f)chmod failed on future $pi_config: $!\n"; +for my $kv (@c_extra) { + my ($k, $v) = split(/=/, $kv, 2); + # git 2.30+ has --fixed-value for idempotent invocations, + # but that's too new to depend on in 2021. Perl quotemeta + # seems compatible enough for POSIX ERE which git uses + my $re = '^'.quotemeta($v).'$'; + run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]); } -rename $pi_config_tmp, $pi_config or - die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; +# needed for git prior to v2.1.0 +chmod($perm & 07777, $pi_config_tmp); +rename $pi_config_tmp, $pi_config; undef $auto_unlink; # trigger ->DESTROY diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 8b8e1b77..a955cdf6 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -28,6 +28,7 @@ use PublicInbox::Spamcheck::Spamc; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my %opt = (all => 0); GetOptions(\%opt, qw(all help|h)) or die $help; +use PublicInbox::Import; my $train = shift or die $help; if ($train !~ /\A(?:ham|spam|rm)\z/) { @@ -37,10 +38,12 @@ die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; +PublicInbox::Import::load_config($pi_cfg); my $err; my $mime = PublicInbox::Eml->new(do{ - defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; - $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + my $data = PublicInbox::IO::read_all \*STDIN; + PublicInbox::Eml::strip_from($data); if ($train ne 'rm') { eval { @@ -64,6 +67,7 @@ sub remove_or_add ($$$$) { $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; $ibx = PublicInbox::InboxWritable->new($ibx); + $ibx->{indexlevel} = $ibx->detect_indexlevel; my $im = $ibx->importer(0); if ($train eq "rm") { @@ -109,12 +113,12 @@ if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { my %seen; while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 - next if $seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $addr); } my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); for my $ibx (@$dests) { - next if $seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); } } diff --git a/script/public-inbox-mda b/script/public-inbox-mda index 7e2bee92..b463b07b 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2013-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Mail delivery agent for public-inbox, run from your MTA upon mail delivery @@ -16,8 +16,14 @@ use strict; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my ($ems, $emm, $show_help); my $precheck = 1; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # does this need a CLI switch? GetOptions('precheck!' => \$precheck, 'help|h' => \$show_help) or do { print STDERR $help; exit 1 }; +if ($show_help) { + print $help; + exit; +} my $do_exit = sub { my ($code) = shift; @@ -33,13 +39,13 @@ use PublicInbox::Filter::Base; use PublicInbox::InboxWritable; use PublicInbox::Spamcheck; -# n.b: hopefully we can setup the emergency path without bailing due to -# user error, we really want to setup the emergency destination ASAP +# n.b.: Hopefully we can set up the emergency path without bailing due to +# user error, we really want to set up the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); -my $str = do { local $/; <STDIN> }; -$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $str = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($str); $ems->prepare(\$str); my $eml = PublicInbox::Eml->new(\$str); my $cfg = PublicInbox::Config->new; @@ -47,6 +53,8 @@ my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default); my $dests = []; +PublicInbox::Import::load_config($cfg, $do_exit); + my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { my $ibx = $cfg->lookup($recipient); # first check @@ -55,7 +63,8 @@ if (defined $recipient) { if (!scalar(@$dests)) { $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml); if (!scalar(@$dests) && !defined($recipient)) { - die "ORIGINAL_RECIPIENT not defined in ENV\n"; + warn "ORIGINAL_RECIPIENT not defined in ENV\n"; + $do_exit->(67); # EX_NOUSER } scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown } diff --git a/script/public-inbox-netd b/script/public-inbox-netd new file mode 100755 index 00000000..e8b1ca69 --- /dev/null +++ b/script/public-inbox-netd @@ -0,0 +1,6 @@ +#!/usr/bin/perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run(); diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd index 9fb0a8d9..aca27383 100755 --- a/script/public-inbox-nntpd +++ b/script/public-inbox-nntpd @@ -1,15 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone NNTP server for public-inbox. -use strict; -use warnings; +use v5.12; use PublicInbox::Daemon; -use PublicInbox::NNTPdeflate; # loads PublicInbox::NNTP -use PublicInbox::NNTPD; -my $nntpd = PublicInbox::NNTPD->new; -PublicInbox::Daemon::run('0.0.0.0:119', - sub { $nntpd->refresh_groups }, # refresh - sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }, # post_accept - $nntpd); +PublicInbox::Daemon::run('nntp://0.0.0.0:119'); diff --git a/script/public-inbox-pop3d b/script/public-inbox-pop3d new file mode 100755 index 00000000..ec944aee --- /dev/null +++ b/script/public-inbox-pop3d @@ -0,0 +1,8 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Standalone POP3 server for public-inbox. +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run('pop3://0.0.0.0:110'); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 59c03150..618cfec4 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for purging messages entirely from a public-inbox. Currently @@ -26,14 +26,15 @@ See public-inbox-purge(1) man page for full documentation. EOF my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 }; -GetOptions($opt, @PublicInbox::AdminEdit::OPT) or die $help; +GetOptions($opt, @PublicInbox::AdminEdit::OPT, 'C=s@') or die $help; if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; -$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $data = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($data); my $n_purged = 0; foreach my $ibx (@ibxs) { diff --git a/script/public-inbox-watch b/script/public-inbox-watch index 86349d71..9bcd42ed 100755 --- a/script/public-inbox-watch +++ b/script/public-inbox-watch @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> my $help = <<EOF; usage: public-inbox-watch @@ -11,15 +11,15 @@ use strict; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use IO::Handle; # ->autoflush use PublicInbox::Watch; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; use PublicInbox::Config; use PublicInbox::DS; -use PublicInbox::Sigfd; -use PublicInbox::Syscall qw(SFD_NONBLOCK); my $do_scan = 1; GetOptions('scan!' => \$do_scan, # undocumented, testing only 'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 }; if ($show_help) { print $help; exit 0 }; -my $oldset = PublicInbox::DS::block_signals(); +PublicInbox::DS::block_signals(); STDOUT->autoflush(1); STDERR->autoflush(1); local $0 = $0; # local since this script may be eval-ed @@ -29,7 +29,8 @@ my $reload = sub { $watch->quit; $watch = PublicInbox::Watch->new(PublicInbox::Config->new); if ($watch) { - warn("I: reloaded\n"); + $watch->{sig} = $prev->{sig}; # prevent redundant signalfd + warn "# reloaded\n"; } else { warn("E: reloading failed\n"); $watch = $prev; @@ -39,10 +40,10 @@ my $reload = sub { if ($watch) { my $scan = sub { return if !$watch; - warn "I: scanning\n"; + warn "# scanning\n"; $watch->trigger_scan('full'); }; - my $quit = sub { + my $quit = sub { # may be called in IMAP/NNTP children $watch->quit if $watch; $watch = undef; $0 .= ' quitting'; @@ -53,15 +54,9 @@ if ($watch) { CHLD => \&PublicInbox::DS::enqueue_reap, }; $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit; + local @SIG{keys %$sig} = values(%$sig); # for non-signalfd/kqueue # --no-scan is only intended for testing atm, undocumented. PublicInbox::DS::requeue($scan) if $do_scan; - - my $sigfd = PublicInbox::Sigfd->new($sig, SFD_NONBLOCK); - local @SIG{keys %$sig} = values(%$sig) unless $sigfd; - if (!$sigfd) { - PublicInbox::DS::sig_setmask($oldset); - PublicInbox::DS->SetLoopTimeout(1000); - } - $watch->watch($sig, $oldset) while ($watch); + $watch->watch($sig) while ($watch); } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 3c99fde8..fac54559 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -1,11 +1,10 @@ #!perl -w -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use v5.10.1; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-xcpdb [options] INBOX_DIR +usage: public-inbox-xcpdb [options] <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> upgrade or reshard Xapian DB(s) used by public-inbox @@ -26,38 +25,46 @@ index options (see public-inbox-index(1) man page for full description): See public-inbox-xcpdb(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, fsync => 1 }; +my $opt = { quiet => -1, compact => 0, fsync => 1, + -eidx_ok => 1, -cidx_ok => 1 }; GetOptions($opt, qw( fsync|sync! compact|c reshard|R=i max_size|max-size=s batch_size|batch-size=s - sequential_shard|seq-shard|sequential-shard + sequential-shard|seq-shard jobs|j=i quiet|q verbose|v blocksize|b=s no-full|n fuller|F - all help|h)) or die $help; + all C=s@ help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); require PublicInbox::Config; my $cfg = PublicInbox::Config->new; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg) or - die $help; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); # we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't # know sizes, only doccounts $opt->{-idx_env} = $idx_env; -if ($opt->{sequential_shard} && ($opt->{jobs} // 1) > 1) { +if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) { warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; $opt->{jobs} = 0; } require PublicInbox::InboxWritable; require PublicInbox::Xapcmd; -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); - # we rely on --no-renumber to keep docids synched for NNTP +# we rely on --no-renumber to keep docids synched for NNTP(artnum) + IMAP(UID) +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); } + +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'cpdb', $opt); +} |