diff options
Diffstat (limited to 'script')
-rwxr-xr-x | script/lei | 144 | ||||
-rwxr-xr-x | script/public-inbox-cindex | 102 | ||||
-rwxr-xr-x | script/public-inbox-clone | 70 | ||||
-rwxr-xr-x | script/public-inbox-compact | 53 | ||||
-rwxr-xr-x | script/public-inbox-convert | 176 | ||||
-rwxr-xr-x | script/public-inbox-edit | 73 | ||||
-rwxr-xr-x | script/public-inbox-extindex | 91 | ||||
-rwxr-xr-x | script/public-inbox-fetch | 39 | ||||
-rwxr-xr-x | script/public-inbox-httpd | 56 | ||||
-rwxr-xr-x | script/public-inbox-imapd | 8 | ||||
-rwxr-xr-x | script/public-inbox-index | 155 | ||||
-rwxr-xr-x | script/public-inbox-init | 240 | ||||
-rwxr-xr-x[-rw-r--r--] | script/public-inbox-learn | 59 | ||||
-rwxr-xr-x | script/public-inbox-mda | 63 | ||||
-rwxr-xr-x | script/public-inbox-netd | 6 | ||||
-rwxr-xr-x | script/public-inbox-nntpd | 15 | ||||
-rwxr-xr-x | script/public-inbox-pop3d | 8 | ||||
-rwxr-xr-x | script/public-inbox-purge | 28 | ||||
-rwxr-xr-x | script/public-inbox-watch | 72 | ||||
-rwxr-xr-x | script/public-inbox-xcpdb | 77 | ||||
-rwxr-xr-x | script/public-inbox.cgi | 9 |
21 files changed, 1164 insertions, 380 deletions
diff --git a/script/lei b/script/lei new file mode 100755 index 00000000..087afc33 --- /dev/null +++ b/script/lei @@ -0,0 +1,144 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use Socket qw(AF_UNIX SOCK_SEQPACKET pack_sockaddr_un); +use PublicInbox::CmdIPC4; +my $narg = 5; +my $sock; +my $recv_cmd = PublicInbox::CmdIPC4->can('recv_cmd4'); +my $send_cmd = PublicInbox::CmdIPC4->can('send_cmd4') // do { + require PublicInbox::Syscall; + $recv_cmd = PublicInbox::Syscall->can('recv_cmd4'); + PublicInbox::Syscall->can('send_cmd4'); +} // do { + my $inline_dir = $ENV{PERL_INLINE_DIRECTORY} //= ( + $ENV{XDG_CACHE_HOME} // + ( ($ENV{HOME} // '/nonexistent').'/.cache' ) + ).'/public-inbox/inline-c'; + if (!-d $inline_dir) { + require File::Path; + File::Path::make_path($inline_dir); + } + require PublicInbox::Spawn; # takes ~50ms even if built *sigh* + $recv_cmd = PublicInbox::Spawn->can('recv_cmd4'); + PublicInbox::Spawn->can('send_cmd4'); +} // die 'please install Inline::C or Socket::MsgHdr'; + +my %pids; +my $sigchld = sub { + my $flags = scalar(@_) ? POSIX::WNOHANG() : 0; + for my $pid (keys %pids) { + delete($pids{$pid}) if waitpid($pid, $flags) == $pid; + } +}; +my @parent; +my $exec_cmd = sub { + my ($fds, $argc, @argv) = @_; + my $parent = $$; + require POSIX; + my @old = (*STDIN{IO}, *STDOUT{IO}, *STDERR{IO}); + my @rdr; + for my $fd (@$fds) { + open(my $newfh, '+<&=', $fd) or die "open +<&=$fd: $!"; + push @rdr, shift(@old), $newfh; + } + my $do_exec = sub { + my @non_std; # ex. $op_p from lei_edit_search + while (my ($io, $newfh) = splice(@rdr, 0, 2)) { + my $old_io = !!$io; + open $io, '+<&', $newfh or die "open +<&=: $!"; + push @non_std, $io unless $old_io; + } + if (@non_std) { + require Fcntl; + fcntl($_, Fcntl::F_SETFD(), 0) for @non_std; + } + my %env = map { split(/=/, $_, 2) } splice(@argv, $argc); + @ENV{keys %env} = values %env; + umask 077; + exec(@argv); + warn "exec: @argv: $!\n"; + POSIX::_exit(1); + }; + $SIG{CHLD} = $sigchld; + my $pid = fork // die "fork: $!"; + if ($pid == 0) { + $do_exec->() if $fds->[1]; # git-credential, pager + + # parent backgrounds on MUA + POSIX::setsid() > 0 or die "setsid: $!"; + @parent = ($parent); + return; # continue $recv_cmd in background + } + if ($fds->[1]) { + $pids{$pid} = undef; + } else { + $do_exec->(); # MUA reuses stdout + } +}; + +my $runtime_dir = ($ENV{XDG_RUNTIME_DIR} // '') . '/lei'; +if ($runtime_dir eq '/lei') { + require File::Spec; + $runtime_dir = File::Spec->tmpdir."/lei-$<"; +} +unless (-d $runtime_dir) { + require File::Path; + File::Path::make_path($runtime_dir, { mode => 0700 }); +} +my $path = "$runtime_dir/$narg.seq.sock"; +my $addr = pack_sockaddr_un($path); +socket($sock, AF_UNIX, SOCK_SEQPACKET, 0) or die "socket: $!"; +unless (connect($sock, $addr)) { # start the daemon if not started + local $ENV{PERL5LIB} = join(':', @INC); + open(my $daemon, '-|', $^X, $^W ? ('-w') : (), + qw[-MPublicInbox::LEI -e PublicInbox::LEI::lazy_start(@ARGV)], + $path, $! + 0, $narg) or die "popen: $!"; + while (<$daemon>) { warn $_ } # EOF when STDERR is redirected + close($daemon) or warn <<""; +lei-daemon could not start, exited with \$?=$? + + # try connecting again anyways, unlink+bind may be racy + connect($sock, $addr) or die <<""; +connect($path): $! (after attempted daemon start) + +} +# (Socket::MsgHdr|Inline::C), $sock are all available: +open my $dh, '<', '.' or die "open(.) $!"; +my $buf = join("\0", scalar(@ARGV), @ARGV); +while (my ($k, $v) = each %ENV) { $buf .= "\0$k=$v" } +$buf .= "\0\0"; +$send_cmd->($sock, [0, 1, 2, fileno($dh)], $buf, 0) or die "sendmsg: $!"; +$SIG{TSTP} = sub { send($sock, 'STOP', 0); kill 'STOP', $$ }; +$SIG{CONT} = sub { send($sock, 'CONT', 0) }; + +my $x_it_code = 0; +while (1) { + my (@fds) = $recv_cmd->($sock, my $buf, 4096 * 33); + die "recvmsg: $!" if scalar(@fds) == 1 && !defined($fds[0]); + last if $buf eq ''; + if ($buf =~ /\Aexec (.+)\z/) { + $exec_cmd->(\@fds, split(/\0/, $1)); + } elsif ($buf eq '-WINCH') { + kill($buf, @parent); # for MUA + } elsif ($buf eq 'umask') { + send($sock, 'u'.pack('V', umask), 0) or die "send: $!" + } elsif ($buf =~ /\Ax_it ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + last; + } elsif ($buf =~ /\Achild_error ([0-9]+)\z/) { + $x_it_code ||= $1 + 0; + } elsif ($buf eq 'wait') { + $sigchld->(); + } else { + $sigchld->(); + die $buf; + } +} +$sigchld->(); +if (my $sig = ($x_it_code & 127)) { + kill $sig, $$; + sleep(1) while 1; # no self-pipe/signalfd, here, so we loop +} +exit($x_it_code >> 8); diff --git a/script/public-inbox-cindex b/script/public-inbox-cindex new file mode 100755 index 00000000..dd00623a --- /dev/null +++ b/script/public-inbox-cindex @@ -0,0 +1,102 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-cindex [options] -g GIT_DIR [-g GIT_DIR]... +usage: public-inbox-cindex [options] --project-list=FILE -r PROJECT_ROOT + + Create and update search indices for code repos + + -d EXTDIR use EXTDIR instead of GIT_DIR/public-inbox-cindex + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `medium', or `full' (default: medium) + --project-list=FILE use a cgit/gitweb-compatible list of projects + --update | -u update previously-indexed code repos with `-d' + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index commit diffs larger than the given size + --prune prune old repos and commits + --reindex reindex previously indexed repos + --verbose | -v increase verbosity (may be repeated) + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-cindex(1) man page for full documentation. +EOF +my $opt = { fsync => 1, scan => 1 }; # --no-scan is hidden +GetOptions($opt, qw(quiet|q verbose|v+ reindex jobs|j=i fsync|sync! dangerous + indexlevel|index-level|L=s join:s@ + batch_size|batch-size=s max_size|max-size=s + include|I=s@ only=s@ all show:s@ + project-list=s exclude=s@ project-root|r=s + git-dir|g=s@ + sort-parallel=s sort-compress-program=s sort-buffer-size=s + d=s update|u scan! prune dry-run|n C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); +$SIG{USR1} = 'IGNORE'; # to be overridden in cidx_sync +$SIG{PIPE} = 'IGNORE'; +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $cfg = $opt->{-pi_cfg} = PublicInbox::Config->new; +my $cidx_dir = $opt->{d}; +PublicInbox::Admin::require_or_die('Xapian'); +PublicInbox::Admin::progress_prepare($opt); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +%ENV = (%ENV, %$env) if $env; + +my @git_dirs; +require PublicInbox::CodeSearchIdx; # unstable internal API +if (@ARGV) { + my @g = map { "-g $_" } @ARGV; + die <<EOM; +Specify git directories with `-g' (or --git-dir=): @g +Or use --project-list=... and --project-root=... +EOM +} elsif (defined(my $pl = $opt->{'project-list'})) { + my $pfx = $opt->{'project-root'} // die <<EOM; +PROJECT_ROOT required for --project-list +EOM + $opt->{'git-dir'} and die <<EOM; +--project-list does not accept additional --git-dir directories +(@{$opt->{'git-dir'}}) +EOM + open my $fh, '<', $pl or die "open($pl): $!\n"; + chomp(@git_dirs = <$fh>); + $pfx .= '/'; + $pfx =~ tr!/!/!s; + substr($_, 0, 0, $pfx) for @git_dirs; +} elsif (my $gd = $opt->{'git-dir'}) { + @git_dirs = @$gd; +} elsif (grep defined, @$opt{qw(show update prune)}) { +} else { + warn "No --git-dir= nor --project-list= + --project-root= specified\n"; + die $help; +} + +$_ = PublicInbox::Admin::resolve_git_dir($_) for @git_dirs; +if (defined $cidx_dir) { # external index + die "`%' is not allowed in $cidx_dir\n" if $cidx_dir =~ /\%/; + my $cidx = PublicInbox::CodeSearchIdx->new($cidx_dir, $opt); + @{$cidx->{git_dirs}} = @git_dirs; # may be empty + $cidx->cidx_run; +} elsif (!@git_dirs) { + die $help +} else { + die <<EOM if $opt->{update}; +--update requires `-d EXTDIR' +EOM + for my $gd (@git_dirs) { + my $cd = "$gd/public-inbox-cindex"; + my $cidx = PublicInbox::CodeSearchIdx->new($cd, { %$opt }); + $cidx->{-cidx_internal} = 1; + @{$cidx->{git_dirs}} = ($gd); + $cidx->cidx_run; + } +} diff --git a/script/public-inbox-clone b/script/public-inbox-clone new file mode 100755 index 00000000..c3e64485 --- /dev/null +++ b/script/public-inbox-clone @@ -0,0 +1,70 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git clone remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-clone [OPTIONS] INBOX_URL [INBOX_DIR] + public-inbox-clone [OPTIONS] ROOT_URL [DESTINATION] + + clone remote public-inboxes or grokmirror manifests + +options: + + --epoch=RANGE range of v2 epochs to clone (e.g `2..5', `~0', `~1..') + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + --dry-run | -n show what would be cloned without cloning + --verbose | -v increase verbosity (may be repeated) + --quiet | -q disable progress reporting + -C DIR chdir to specified directory + +See public-inbox-clone(1) man page for --manifest, --remote-manifest, +--objstore, --project-list, --post-update-hook, --include, --exclude, +--prune, --keep-going, --jobs, --inbox-config +EOF + +# cgit calls it `project-list', grokmirror calls it `projectslist', +# support both :/ +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ include|I=s@ exclude=s@ + inbox-config=s inbox-version=i objstore=s manifest=s + remote-manifest=s project-list|projectslist=s post-update-hook=s@ + prune|p keep-going|k exit-code purge + dry-run|n jobs|j=i no-torsocks torsocks=s epoch=s)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Admin; # loads Config +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my ($url, $dst, $extra) = @ARGV; +die $help if !defined($url) || defined($extra); +defined($dst) or ($dst) = ($url =~ m!/([^/]+)/?\z!); +index($dst, "\n") >= 0 and die "`\\n' not allowed in `$dst'"; + +# n.b. this is still a truckload of code... +require File::Spec; +require PublicInbox::LEI; +require PublicInbox::LeiExternal; +require PublicInbox::LeiMirror; + +$url = PublicInbox::LeiExternal::ext_canonicalize($url); +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-clone', + 0 => *STDIN{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +open $lei->{1}, '+<&=', 1 or die "dup: $!"; +open $lei->{3}, '.' or die "open . $!"; +my $mrr = bless { + lei => $lei, + src => $url, + dst => File::Spec->canonpath($dst), +}, 'PublicInbox::LeiMirror'; + +$? = 0; +$mrr->do_mirror; +$mrr->can('_wq_done_wait')->($$, $mrr, $lei); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-compact b/script/public-inbox-compact index 5c681466..1062be5a 100755 --- a/script/public-inbox-compact +++ b/script/public-inbox-compact @@ -1,19 +1,44 @@ -#!/usr/bin/perl -w -# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -use strict; -use warnings; +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::InboxWritable; -use PublicInbox::Xapcmd; -use PublicInbox::Admin; +my $opt = { compact => 1, -coarse_lock => 1, + -eidx_ok => 1, -cidx_ok => 1 }; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-compact <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> + + Compact Xapian DBs in an inbox + +options: + + --all index all configured inboxes + --jobs=NUM control parallelization + +See public-inbox-compact(1) man page for full documentation. +EOF +GetOptions($opt, qw(all C=s@ help|h), + # compact options: + qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F), +) or die $help; +if ($opt->{help}) { print $help; exit 0 }; + +require PublicInbox::Admin; PublicInbox::Admin::require_or_die('-index'); -my $usage = "Usage: public-inbox-compact INBOX_DIR\n"; -my $opt = { compact => 1, -coarse_lock => 1 }; -GetOptions($opt, @PublicInbox::Xapcmd::COMPACT_OPT) or - die "bad command-line args\n$usage"; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::progress_prepare($opt); + +require PublicInbox::InboxWritable; +require PublicInbox::Xapcmd; +my $cfg = PublicInbox::Config->new; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'compact', $opt); } +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'compact', $opt); +} diff --git a/script/public-inbox-convert b/script/public-inbox-convert index e13c13f4..713c2881 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -1,106 +1,126 @@ #!/usr/bin/perl -w -# Copyright (C) 2018-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <http://www.gnu.org/licenses/agpl-3.0.txt> use strict; -use warnings; +use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::InboxWritable; -use PublicInbox::Config; -use PublicInbox::Admin; -use PublicInbox::V2Writable; -use PublicInbox::Git; -use PublicInbox::Spawn qw(spawn); -use Cwd 'abs_path'; -use File::Copy 'cp'; # preserves permissions: -my $usage = "Usage: public-inbox-convert OLD NEW\n"; -my $jobs; -my $index = 1; -my %opts = ( - '--jobs|j=i' => \$jobs, - '--index!' => \$index, -); -GetOptions(%opts) or die "bad command-line args\n$usage"; -my $old_dir = shift(@ARGV) or die $usage; -my $new_dir = shift(@ARGV) or die $usage; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-convert [options] OLD NEW + + convert v1 format inboxes to v2 + +options: + + --no-index do not index after conversion + --jobs=NUM set shards (NUM=0) + --verbose | -v increase verbosity (may be repeated) + +index options (see public-inbox-index(1) man page for full description): + + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `basic', `medium', or `full' (default: full) + --compact | -c run public-inbox-compact(1) after indexing + --sequential-shard index Xapian shards sequentially for slow storage + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + +See public-inbox-convert(1) man page for full documentation. +EOF + +my $opt = { + index => 1, + # index defaults: + quiet => -1, compact => 0, maxsize => undef, fsync => 1, + reindex => 1, # we always reindex +}; +GetOptions($opt, qw(jobs|j=i index! help|h C=s@), + # index options + qw(verbose|v+ rethread compact|c+ fsync|sync! + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + sequential-shard|seq-shard + )) or die $help; +if ($opt->{help}) { print $help; exit 0 }; +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $old_dir = shift(@ARGV) // ''; +my $new_dir = shift(@ARGV) // ''; +die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -my $config = eval { PublicInbox::Config->new }; -$old_dir = abs_path($old_dir); -my $old; -if ($config) { - $config->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; - }); -} -unless ($old) { + +require PublicInbox::Config; +require PublicInbox::InboxWritable; + +my $cfg = PublicInbox::Config->new; +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = { - inboxdir => $old_dir, - name => 'ignored', - address => [ 'old@example.com' ], - }; - $old = PublicInbox::Inbox->new($old); -} -$old = PublicInbox::InboxWritable->new($old); -if ($old->version >= 2) { - die "Only conversion from v1 inboxes is supported\n"; } +die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -$old->{indexlevel} //= PublicInbox::Admin::detect_indexlevel($old); -if ($index) { +my $detected = $old->detect_indexlevel; +$old->{indexlevel} //= $detected; +my $env; +if ($opt->{'index'}) { my $mods = {}; PublicInbox::Admin::scan_ibx_modules($mods, $old); PublicInbox::Admin::require_or_die(keys %$mods); + PublicInbox::Admin::progress_prepare($opt); + $env = PublicInbox::Admin::index_prepare($opt, $cfg); } - +local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = abs_path($new_dir); +$new->{inboxdir} = PublicInbox::Config::rel2abs_collapsed($new_dir); $new->{version} = 2; -$new = PublicInbox::InboxWritable->new($new); +$new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); +$new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; -$old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; link($src, $dst) and return; $!{EXDEV} or warn "link $src, $dst failed: $!, trying cp\n"; - cp($src, $dst) or die "cp $src, $dst failed: $!\n"; + require File::Copy; # preserves permissions: + File::Copy::cp($src, $dst) or die "cp $src, $dst failed: $!\n"; } -$old->with_umask(sub { +{ + my $restore = $old->with_umask; my $old_cfg = "$old->{inboxdir}/config"; local $ENV{GIT_CONFIG} = $old_cfg; my $new_cfg = "$new->{inboxdir}/all.git/config"; - $v2w = PublicInbox::V2Writable->new($new, 1); - $v2w->init_inbox($jobs); + $v2w = $new->importer(1); + $v2w->init_inbox(delete $opt->{jobs}); unlink $new_cfg; link_or_copy($old_cfg, $new_cfg); if (my $alt = $new->{altid}) { require PublicInbox::AltId; foreach my $i (0..$#$alt) { my $src = PublicInbox::AltId->new($old, $alt->[$i], 0); - $src->mm_alt or next; + $src = $src->mm_alt or next; + $src = $src->{dbh}->sqlite_db_filename; my $dst = PublicInbox::AltId->new($new, $alt->[$i], 1); - $dst = $dst->{filename}; - $src->mm_alt->{dbh}->sqlite_backup_to_file($dst); + $dst->mm_alt->{dbh}->sqlite_backup_from_file($src); } } my $desc = "$old->{inboxdir}/description"; link_or_copy($desc, "$new->{inboxdir}/description") if -e $desc; my $clone = "$old->{inboxdir}/cloneurl"; - if (-e $clone) { - warn <<""; + warn <<"" if -e $clone; $clone may not be valid after migrating to v2, not copying - } -}); +} my $state = ''; my $head = $old->{ref_head} || 'HEAD'; -my ($rd, $pid) = $old->git->popen(qw(fast-export --use-done-feature), $head); -$v2w->idx_init; +my $rd = $old->git->popen(qw(fast-export --use-done-feature), $head); +$v2w->idx_init($opt); my $im = $v2w->importer; -my ($r, $w) = $im->gfi_start; +my $io = $im->gfi_start; my $h = '[0-9a-f]'; my %D; my $last; @@ -110,23 +130,17 @@ while (<$rd>) { } elsif (/^commit /) { $state = 'commit'; } elsif (/^data ([0-9]+)/) { - my $len = $1; - $w->print($_) or $im->wfail; - while ($len) { - my $n = read($rd, my $tmp, $len) or die "read: $!"; - warn "$n != $len\n" if $n != $len; - $len -= $n; - $w->print($tmp) or $im->wfail; - } + print $io $_ or $im->wfail; + print $io PublicInbox::IO::read_all($rd, $1) or $im->wfail; next; } elsif ($state eq 'commit') { if (m{^M 100644 :([0-9]+) (${h}{2}/${h}{38})}o) { my ($mark, $path) = ($1, $2); $D{$path} = $mark; if ($last && $last ne 'm') { - $w->print("D $last\n") or $im->wfail; + print $io "D $last\n" or $im->wfail; } - $w->print("M 100644 :$mark m\n") or $im->wfail; + print $io "M 100644 :$mark m\n" or $im->wfail; $last = 'm'; next; } @@ -134,31 +148,31 @@ while (<$rd>) { my $mark = delete $D{$1}; defined $mark or die "undeleted path: $1\n"; if ($last && $last ne 'd') { - $w->print("D $last\n") or $im->wfail; + print $io "D $last\n" or $im->wfail; } - $w->print("M 100644 :$mark d\n") or $im->wfail; + print $io "M 100644 :$mark d\n" or $im->wfail; $last = 'd'; next; } } last if $_ eq "done\n"; - $w->print($_) or $im->wfail; + print $io $_ or $im->wfail; } -$w = $r = undef; -close $rd or die "close fast-export: $!\n"; -waitpid($pid, 0) or die "waitpid failed: $!\n"; -$? == 0 or die "fast-export failed: $?\n"; +$rd->close or die "fast-export: \$?=$? \$!=$!\n"; +$io = undef; $v2w->done; -if (my $mm = $old->mm) { +if (my $old_mm = $old->mm) { $old->cleanup; - $mm->{dbh}->sqlite_backup_to_file("$new_dir/msgmap.sqlite3"); + $old_mm = $old_mm->{dbh}->sqlite_db_filename; # we want to trigger a reindex, not a from scratch index if # we're reusing the msgmap from an existing v1 installation. - $v2w->idx_init; - my $epoch0 = PublicInbox::Git->new($v2w->git_init(0)); + $v2w->idx_init($opt); + $v2w->{mm}->{dbh}->sqlite_backup_from_file($old_mm); + + my $epoch0 = PublicInbox::Git->new($v2w->{mg}->add_epoch(0)); chop(my $cmt = $epoch0->qx(qw(rev-parse --verify), $head)); $v2w->last_epoch_commit(0, $cmt); } -$v2w->index_sync({reindex => 1}) if $index; +$v2w->index_sync($opt) if delete $opt->{'index'}; $v2w->done; diff --git a/script/public-inbox-edit b/script/public-inbox-edit index ae5d8289..88115d7c 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for editing messages in a public-inbox. @@ -9,20 +9,36 @@ use warnings; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; use File::Temp 0.19 (); # 0.19 for TMPDIR -use PublicInbox::ContentId qw(content_id); +use PublicInbox::ContentHash qw(content_hash); use PublicInbox::MID qw(mid_clean mids); PublicInbox::Admin::check_require('-index'); -use PublicInbox::MIME; -use PublicInbox::InboxWritable; +use PublicInbox::Eml; +use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::Import; -my $usage = "$0 -m MESSAGE_ID [--all] [INBOX_DIRS]"; +my $help = <<'EOF'; +usage: public-inbox-edit -m MESSAGE-ID [--all] [INBOX_DIRS] + + destructively edit messages in a public inbox + +options: + + --all edit all configured inboxes + -m MESSAGE-ID edit the message with a given Message-ID + -F FILE edit the message matching the contents of FILE + --force forcibly edit even if Message-ID is ambiguous + --raw do not perform "From " line escaping + +See public-inbox-edit(1) man page for full documentation. +EOF + my $opt = { verbose => 1, all => 0, -min_inbox_version => 2, raw => 0 }; -my @opt = qw(mid|m=s file|F=s raw); -GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or - die "bad command-line args\n$usage\n"; +my @opt = qw(mid|m=s file|F=s raw C=s@); +GetOptions($opt, @PublicInbox::AdminEdit::OPT, @opt) or die $help; +if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); -my $cfg = eval { PublicInbox::Config->new }; +my $cfg = PublicInbox::Config->new; my $editor = $ENV{MAIL_EDITOR}; # e.g. "mutt -f" unless (defined $editor) { my $k = 'publicinbox.mailEditor'; @@ -43,7 +59,7 @@ if (defined $mid && defined $file) { my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $found = {}; # cid => [ [ibx, smsg] [, [ibx, smsg] ] ] +my $found = {}; # chash => [ [ibx, smsg] [, [ibx, smsg] ] ] sub find_mid ($$$) { my ($found, $mid, $ibxs) = @_; @@ -52,10 +68,10 @@ sub find_mid ($$$) { my ($id, $prev); while (my $smsg = $over->next_by_mid($mid, \$id, \$prev)) { my $ref = $ibx->msg_by_smsg($smsg); - my $mime = PublicInbox::MIME->new($ref); - my $cid = content_id($mime); + my $mime = PublicInbox::Eml->new($ref); + my $chash = content_hash($mime); my $tuple = [ $ibx, $smsg ]; - push @{$found->{$cid} ||= []}, $tuple + push @{$found->{$chash} ||= []}, $tuple } PublicInbox::InboxWritable::cleanup($ibx); } @@ -92,13 +108,11 @@ Multiple messages with different content found matching warn "Will edit all of them\n"; } } else { - open my $fh, '<', $file or die "open($file) failed: $!"; - my $orig = do { local $/; <$fh> }; - my $mime = PublicInbox::MIME->new(\$orig); - my $mids = mids($mime->header_obj); + my $eml = eml_from_path($file) or die "open($file) failed: $!"; + my $mids = mids($eml); find_mid($found, $_, \@ibxs) for (@$mids); # populates $found - my $cid = content_id($mime); - my $to_edit = $found->{$cid}; + my $chash = content_hash($eml); + my $to_edit = $found->{$chash}; unless ($to_edit) { my $nr = scalar(keys %$found); if ($nr > 0) { @@ -116,11 +130,11 @@ $mids } exit 1; } - $found = { $cid => $to_edit }; + $found = { $chash => $to_edit }; } my %tmpopt = ( - TEMPLATE => 'public-inbox-edit-XXXXXX', + TEMPLATE => 'public-inbox-edit-XXXX', TMPDIR => 1, SUFFIX => $opt->{raw} ? '.eml' : '.mbox', ); @@ -170,11 +184,10 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + my $new_raw = PublicInbox::IO::read_all $new_fh; if (!$opt->{raw}) { - # get rid of the From we added - $new_raw =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; + PublicInbox::Eml::strip_from($new_raw); # check if user forgot to purge (in mutt) after editing if ($new_raw =~ /^From /sm) { @@ -206,8 +219,8 @@ W: possible message boundary splitting error $new_raw =~ s/^>(>*From )/$1/gm; } - my $new_mime = PublicInbox::MIME->new(\$new_raw); - my $old_mime = PublicInbox::MIME->new($old_raw); + my $new_mime = PublicInbox::Eml->new(\$new_raw); + my $old_mime = PublicInbox::Eml->new($old_raw); # make sure we don't compare unwanted headers, since mutt adds # Content-Length, Status, and Lines headers: @@ -216,10 +229,10 @@ W: possible message boundary splitting error # allow changing Received: and maybe other headers which can # contain sensitive info. - my $nhdr = $new_mime->header_obj; - my $ohdr = $old_mime->header_obj; - if (($nhdr->as_string eq $ohdr->as_string) && - (content_id($new_mime) eq content_id($old_mime))) { + my $nhdr = $new_mime->header_obj->as_string; + my $ohdr = $old_mime->header_obj->as_string; + if (($nhdr eq $ohdr) && + (content_hash($new_mime) eq content_hash($old_mime))) { warn "No change detected to:\n", show_cmd($ibx, $smsg); next unless $opt->{verbose}; diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex new file mode 100755 index 00000000..2e5a5d2c --- /dev/null +++ b/script/public-inbox-extindex @@ -0,0 +1,91 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use v5.10.1; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] + + Create and update external (detached) search indices + + --no-fsync speed up indexing, risk corruption on power outage + --watch run persistently and watch for inbox updates + -L LEVEL `medium', or `full' (default: full) + --all index all configured inboxes + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + --gc perform garbage collection instead of indexing + --dedupe[=MSGID] fix prior deduplication errors (may be repeated) + --reindex index previously indexed inboxes + --fast only reindex unseen/stale messages + --verbose | -v increase verbosity (may be repeated) + --dry-run | -n dry-run on --dedupe + +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-extindex(1) man page for full documentation. +EOF +my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i + fsync|sync! fast dangerous + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + dedupe:s@ gc commit-interval=i watch scan! dry-run|n + multi-pack-index! all C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); +local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::do_chdir(delete $opt->{C}); +my $cfg = PublicInbox::Config->new; +my $eidx_dir = shift(@ARGV); +unless (defined $eidx_dir) { + if ($opt->{all} && $cfg->ALL) { + $eidx_dir = $cfg->ALL->{topdir}; + } else { + die "E: $help"; + } +} +my @ibxs; +if ($opt->{gc}) { + die "E: inbox paths must not be specified with --gc\n" if @ARGV; + for my $sw (qw(all watch dry-run dedupe)) { + die "E: --$sw is not compatible with --gc\n" if $opt->{$sw}; + } +} else { + @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +} +$opt->{'dry-run'} && !$opt->{dedupe} and + die "E: --dry-run only affects --dedupe\n"; +$opt->{fast} && !$opt->{reindex} and + die "E: --fast only affects --reindex\n"; + +PublicInbox::Admin::require_or_die(qw(-search)); +PublicInbox::Config::json() or die "Cpanel::JSON::XS or similar missing\n"; +PublicInbox::Admin::progress_prepare($opt); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +local %ENV = (%ENV, %$env) if $env; +require PublicInbox::ExtSearchIdx; +my $eidx = PublicInbox::ExtSearchIdx->new($eidx_dir, $opt); +if ($opt->{gc}) { + $eidx->attach_config($cfg); + $eidx->eidx_gc($opt); +} else { + if ($opt->{all}) { + $eidx->attach_config($cfg); + } else { + $eidx->attach_config($cfg, \@ibxs); + } + if ($opt->{watch}) { + $cfg = undef; # save memory only after SIGHUP + $eidx->eidx_watch($opt); + } else { + $eidx->eidx_sync($opt); + } +} diff --git a/script/public-inbox-fetch b/script/public-inbox-fetch new file mode 100755 index 00000000..6fd15328 --- /dev/null +++ b/script/public-inbox-fetch @@ -0,0 +1,39 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# Wrapper to git fetch remote public-inboxes +use v5.12; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my $opt = {}; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-fetch -C DESTINATION + + fetch remote public-inboxes + +options: + + --torsocks VAL whether or not to wrap git and curl commands with + torsocks (default: `auto') + Must be one of: `auto', `no' or `yes' + -T NAME Name of remote(s) to try (may be repeated) + default: `origin' and `_grokmirror' + --exit-code exit with 127 if no updates + --verbose | -v increase verbosity (may be repeated) + --quiet | -q increase verbosity (may be repeated) + -C DIR chdir to specified directory +EOF +GetOptions($opt, qw(help|h quiet|q verbose|v+ C=s@ c=s@ try-remote|T=s@ + prune|p + no-torsocks torsocks=s exit-code)) or die $help; +if ($opt->{help}) { print $help; exit }; +require PublicInbox::Fetch; # loads Admin +PublicInbox::Admin::do_chdir(delete $opt->{C}); +PublicInbox::Admin::setup_signals(); +$SIG{PIPE} = 'IGNORE'; + +my $lei = bless { + env => \%ENV, opt => $opt, cmd => 'public-inbox-fetch', + 0 => *STDIN{GLOB}, 1 => *STDOUT{GLOB}, 2 => *STDERR{GLOB}, +}, 'PublicInbox::LEI'; +PublicInbox::Fetch->do_fetch($lei, '.'); +exit(($lei->{child_error} // 0) >> 8); diff --git a/script/public-inbox-httpd b/script/public-inbox-httpd index 09da505e..caceae20 100755 --- a/script/public-inbox-httpd +++ b/script/public-inbox-httpd @@ -1,56 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone HTTP server for public-inbox. -use strict; +use v5.12; use PublicInbox::Daemon; -BEGIN { - for (qw(Plack::Builder Plack::Util)) { - eval("require $_") or die "E: Plack is required for $0\n"; - } - Plack::Builder->import; - require PublicInbox::HTTP; - require PublicInbox::HTTPD; -} -my %httpds; -my $app; -my $refresh = sub { - if (@ARGV) { - eval { $app = Plack::Util::load_psgi(@ARGV) }; - if ($@) { - die $@, -"$0 runs in /, command-line paths must be absolute\n"; - } - } else { - require PublicInbox::WWW; - my $www = PublicInbox::WWW->new; - $www->preload; - $app = builder { - eval { - enable 'Deflater', - content_type => [ qw( - text/html - text/plain - application/atom+xml - )] - }; - - eval { enable 'ReverseProxy' }; - $@ and warn -"Plack::Middleware::ReverseProxy missing,\n", -"URL generation for redirects may be wrong if behind a reverse proxy\n"; - - enable 'Head'; - sub { $www->call(@_) }; - }; - } -}; - -PublicInbox::Daemon::run('0.0.0.0:8080', $refresh, - sub ($$$) { # post_accept - my ($client, $addr, $srv) = @_; - my $fd = fileno($srv); - my $h = $httpds{$fd} ||= PublicInbox::HTTPD->new($srv, $app); - PublicInbox::HTTP->new($client, $addr, $h), - }); +PublicInbox::Daemon::run('http://0.0.0.0:8080'); diff --git a/script/public-inbox-imapd b/script/public-inbox-imapd new file mode 100755 index 00000000..0c96cdbb --- /dev/null +++ b/script/public-inbox-imapd @@ -0,0 +1,8 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Standalone read-only IMAP server for public-inbox. +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run('imap://0.0.0.0:143'); diff --git a/script/public-inbox-index b/script/public-inbox-index index 2c7c4f13..a13e44bf 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -1,35 +1,156 @@ -#!/usr/bin/perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) 2015-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# Basic tool to create a Xapian search index for a git repository -# configured for public-inbox. +# Basic tool to create a Xapian search index for a public-inbox. # Usage with libeatmydata <https://www.flamingspork.com/projects/libeatmydata/> # highly recommended: eatmydata public-inbox-index INBOX_DIR use strict; -use warnings; +use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -my $usage = "public-inbox-index INBOX_DIR"; -use PublicInbox::Admin; -PublicInbox::Admin::require_or_die('-index'); +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-index [options] INBOX_DIR + + Create and update per-inbox search indices + +options: + + --no-fsync speed up indexing, risk corruption on power outage + -L LEVEL `basic', `medium', or `full' (default: full) + -E EXTINDEX update extindex (default: `all') + --all index all configured inboxes + --compact | -c run public-inbox-compact(1) after indexing + --sequential-shard index Xapian shards sequentially for slow storage + --jobs=NUM set or disable parallelization (NUM=0) + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + --reindex index previously indexed data (if upgrading) + --since=DATE limit --reindex to changes after DATE + --until=DATE limit --reindex to changes before DATE + --rethread regenerate thread IDs (if upgrading, use sparingly) + --prune prune git storage on discontiguous history + --verbose | -v increase verbosity (may be repeated) -my $opt = { quiet => -1 }; -GetOptions($opt, qw(verbose|v+ reindex jobs|j=i prune indexlevel|L=s)) - or die "bad command-line args\n$usage"; -die "--jobs must be positive\n" if defined $opt->{jobs} && $opt->{jobs} <= 0; +BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) +See public-inbox-index(1) man page for full documentation. +EOF +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; +GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune + fsync|sync! xapian_only|xapian-only dangerous + indexlevel|index-level|L=s max_size|max-size=s + batch_size|batch-size=s + since|after=s until|before=s + sequential-shard|seq-shard + multi-pack-index! + no-update-extindex update-extindex|E=s@ + fast-noop|F skip-docdata all C=s@ help|h)) + or die $help; +if ($opt->{help}) { print $help; exit 0 }; +die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; +if ($opt->{xapian_only} && !$opt->{reindex}) { + die "--xapian-only requires --reindex\n"; +} +if ($opt->{reindex} && delete($opt->{'fast-noop'})) { + warn "--fast-noop ignored with --reindex\n"; +} +# require lazily to speed up --help +require PublicInbox::Admin; +PublicInbox::Admin::require_or_die('-index'); +PublicInbox::Admin::do_chdir(delete $opt->{C}); -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV); +my $cfg = PublicInbox::Config->new; # Config is loaded by Admin +$opt->{-use_cwd} = 1; +my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); -unless (@ibxs) { print STDERR "Usage: $usage\n"; exit 1 } +unless (@ibxs) { print STDERR $help; exit 1 } +require PublicInbox::InboxWritable; + +my (@eidx, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + my $o = { %$opt }; + delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; + $eidx_seen{$topdir} //= + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); + # detect_indexlevel may also set $ibx->{-skip_docdata} + my $detected = $ibx->detect_indexlevel; # XXX: users can shoot themselves in the foot, with opt->{indexlevel} - $ibx->{indexlevel} //= $opt->{indexlevel} // - PublicInbox::Admin::detect_indexlevel($ibx); + $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? + 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <<EOF if @eidx_unconfigured; +The following inboxes are unconfigured and will not be updated in +@$update_extindex:\n@eidx_unconfigured +EOF + +$opt->{compact} = 0 if !$mods->{'Xapian'}; # (or old Search::Xapian) PublicInbox::Admin::require_or_die(keys %$mods); +my $env = PublicInbox::Admin::index_prepare($opt, $cfg); +local %ENV = (%ENV, %$env) if $env; +PublicInbox::Xapcmd::check_compact() if $opt->{compact}; PublicInbox::Admin::progress_prepare($opt); -PublicInbox::Admin::index_inbox($_, undef, $opt) for @ibxs; +for my $ibx (@ibxs) { + if ($opt->{compact} >= 2) { + PublicInbox::Xapcmd::run($ibx, 'compact', $opt->{compact_opt}); + } + $ibx->{-no_fsync} = 1 if !$opt->{fsync}; + $ibx->{-dangerous} = 1 if $opt->{dangerous}; + $ibx->{-skip_docdata} //= $opt->{'skip-docdata'}; + + my $ibx_opt = $opt; + if (defined(my $s = $ibx->{lc('indexSequentialShard')})) { + defined(my $v = $cfg->git_bool($s)) or die <<EOL; +publicInbox.$ibx->{name}.indexSequentialShard not boolean +EOL + $ibx_opt = { %$opt, 'sequential-shard' => $v }; + } + my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + last if $ibx_opt->{quit}; + if (my $copt = $opt->{compact_opt}) { + local $copt->{jobs} = 0 if $ibx_opt->{'sequential-shard'}; + PublicInbox::Xapcmd::run($ibx, 'compact', $copt); + } + last if $ibx_opt->{quit}; + next if $ibx->{-unconfigured} || !$nidx; + for my $eidx (@eidx) { + $eidx->attach_inbox($ibx); + } +} +my $pr = $opt->{-progress}; +for my $eidx (@eidx) { + $pr->("indexing $eidx->{topdir} ...\n") if $pr; + $eidx->eidx_sync($opt); + last if $opt->{quit}; +} diff --git a/script/public-inbox-init b/script/public-inbox-init index 10d3ad45..cf6443f7 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -1,58 +1,115 @@ -#!/usr/bin/perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# -# Initializes a public-inbox, basically a wrapper for git-init(1) use strict; -use warnings; -sub usage { - print STDERR <<EOF; -Usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] -EOF - exit 1; -} +use v5.10.1; use Getopt::Long qw/:config gnu_getopt no_ignore_case auto_abbrev/; -use PublicInbox::Admin; -PublicInbox::Admin::require_or_die('-base'); -use PublicInbox::Config; -use PublicInbox::InboxWritable; -use PublicInbox::Import; -use File::Temp qw/tempfile/; -use PublicInbox::Lock; -use File::Basename qw/dirname/; -use File::Path qw/mkpath/; +use autodie qw(open chmod close rename); use Fcntl qw(:DEFAULT); -use Cwd qw/abs_path/; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-init NAME INBOX_DIR HTTP_URL ADDRESS [ADDRESS..] + + Initialize a public-inbox + +required arguments: + + NAME the name of the inbox + INBOX_DIR pathname the inbox + HTTP_URL HTTP (or HTTPS) URL + ADDRESS email address(es), may be specified multiple times + +options: + + -V2 use scalable public-inbox-v2-format(5) + -L LEVEL index level `basic', `medium', or `full' (default: full) + --ng NEWSGROUP set NNTP newsgroup name + -c KEY=VALUE set additional config option(s) + --skip-artnum=NUM NNTP article numbers to skip + --skip-epoch=NUM epochs to skip (-V2 only) + -j JOBS number of indexing jobs (-V2 only), (default: 4) + +See public-inbox-init(1) man page for full documentation. +EOF -my $version = undef; -my $indexlevel = undef; -my $skip_epoch; +require PublicInbox::Admin; +PublicInbox::Admin::require_or_die('-base'); + +my ($version, $indexlevel, $skip_epoch, $skip_artnum, $jobs, $show_help); +my $skip_docdata; +my $ng = ''; +my (@c_extra, @chdir); my %opts = ( 'V|version=i' => \$version, - 'L|indexlevel=s' => \$indexlevel, + 'L|index-level|indexlevel=s' => \$indexlevel, 'S|skip|skip-epoch=i' => \$skip_epoch, + 'skip-artnum=i' => \$skip_artnum, + 'j|jobs=i' => \$jobs, + 'ng|newsgroup=s' => \$ng, + 'skip-docdata' => \$skip_docdata, + 'help|h' => \$show_help, + 'c=s@' => \@c_extra, + 'C=s@' => \@chdir, ); -GetOptions(%opts) or usage(); -PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; -my $name = shift @ARGV or usage(); -my $inboxdir = shift @ARGV or usage(); -my $http_url = shift @ARGV or usage(); +my $usage_cb = sub { + print STDERR $help; + exit 1; +}; +GetOptions(%opts) or $usage_cb->(); +if ($show_help) { print $help; exit 0 }; +my $name = shift @ARGV or $usage_cb->(); +my $inboxdir = shift @ARGV or $usage_cb->(); +my $http_url = shift @ARGV or $usage_cb->(); my (@address) = @ARGV; -@address or usage(); -my %seen; +@address or $usage_cb->(); +PublicInbox::Admin::do_chdir(\@chdir); + +@c_extra = map { + my ($k, $v) = split(/=/, $_, 2); + defined($v) or die "Usage: -c KEY=VALUE\n"; + $k =~ /\A[a-z]+\z/i or die "$k contains invalid characters\n"; + $k = lc($k); + if ($k eq 'newsgroup') { + die "newsgroup already set ($ng)\n" if $ng ne ''; + $ng = $v; + (); + } elsif ($k eq 'address') { + push @address, $v; # for conflict checking + (); + } elsif ($k =~ /\A(?:inboxdir|mainrepo)\z/) { + die "$k not allowed via -c $_\n" + } elsif ($k eq 'indexlevel') { + defined($indexlevel) and + die "indexlevel already set ($indexlevel)\n"; + $indexlevel = $v; + (); + } else { + $_ + } +} @c_extra; + +PublicInbox::Admin::indexlevel_ok_or_die($indexlevel) if defined $indexlevel; + +$ng =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! and + die "--newsgroup `$ng' is not valid\n"; +($ng =~ m!\A\.! || $ng =~ m!\.\z!) and + die "--newsgroup `$ng' must not start or end with `.'\n"; +require PublicInbox::Config; my $pi_config = PublicInbox::Config->default_file; -my $dir = dirname($pi_config); -mkpath($dir); # will croak on fatal errors +my ($dir) = ($pi_config =~ m!(.*?/)[^/]+\z!); +require File::Path; +File::Path::mkpath($dir); # will croak on fatal errors # first, we grab a flock to prevent simultaneous public-inbox-init # processes from trampling over each other, or exiting with 255 on # O_EXCL failure below. This gets unlocked automatically on exit: +require PublicInbox::Lock; my $lock_obj = { lock_path => "$pi_config.flock" }; PublicInbox::Lock::lock_acquire($lock_obj); # git-config will operate on this (and rename on success): -my ($fh, $pi_config_tmp) = tempfile('pi-init-XXXXXXXX', DIR => $dir); +require File::Temp; +my $fh = File::Temp->new(TEMPLATE => 'pi-init-XXXX', DIR => $dir); # Now, we grab another lock to use git-config(1) locking, so it won't # wait on the lock, unlike some of our internal flock()-based locks. @@ -64,22 +121,18 @@ sysopen($lockfh, $lockfile, O_RDWR|O_CREAT|O_EXCL) or do { warn "could not open config file: $lockfile: $!\n"; exit(255); }; -my $auto_unlink = UnlinkMe->new($lockfile); -my $perm; +require PublicInbox::OnDestroy; +my $auto_unlink = PublicInbox::OnDestroy::on_destroy(sub { unlink $lockfile }); +my $perm = 0644 & ~umask; +my %seen; if (-e $pi_config) { - open(my $oh, '<', $pi_config) or die "unable to read $pi_config: $!\n"; - my @st = stat($oh); + require PublicInbox::IO; + open(my $oh, '<', $pi_config); + my @st = stat($oh) or die "(f)stat failed on $pi_config: $!\n"; $perm = $st[2]; - defined $perm or die "(f)stat failed on $pi_config: $!\n"; - chmod($perm & 07777, $fh) or - die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } - print $fh $old or die "failed to write: $!\n"; - close $oh or die "failed to close $pi_config: $!\n"; + chmod($perm & 07777, $fh); + print $fh PublicInbox::IO::read_all($oh); + close $oh; # yes, this conflict checking is racy if multiple instances of this # script are run by the same $PI_DIR @@ -103,36 +156,35 @@ if (-e $pi_config) { exit(1) if $conflict; my $ibx = $cfg->lookup_name($name); - if ($ibx) { - if (!defined($indexlevel) && $ibx->{indexlevel}) { - $indexlevel = $ibx->{indexlevel}; - } - } + $indexlevel //= $ibx->{indexlevel} if $ibx; } -close $fh or die "failed to close $pi_config_tmp: $!\n"; +my $pi_config_tmp = $fh->filename; +close($fh); my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -$inboxdir = abs_path($inboxdir); +$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; + if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; } elsif ($version != 2) { - die "$inboxdir is a -V2 repo, -V$version specified\n" + die "$inboxdir is a -V2 inbox, -V$version specified\n" } } elsif (-d "$inboxdir/objects") { if (!defined $version) { $version = 1; } elsif ($version != 1) { - die "$inboxdir is a -V1 repo, -V$version specified\n" + die "$inboxdir is a -V1 inbox, -V$version specified\n" } } $version = 1 unless defined $version; if ($version == 1 && defined $skip_epoch) { - die "--skip-epoch is only supported for -V2 repos\n"; + die "--skip-epoch is only supported for -V2 inboxes\n"; } my $ibx = PublicInbox::Inbox->new({ @@ -144,42 +196,56 @@ my $ibx = PublicInbox::Inbox->new({ }); my $creat_opt = {}; -PublicInbox::InboxWritable->new($ibx, $creat_opt)->init_inbox(0, $skip_epoch); - -# needed for git prior to v2.1.0 -umask(0077) if defined $perm; - -foreach my $addr (@address) { - next if $seen{lc($addr)}; - PublicInbox::Import::run_die([@x, "--add", "$pfx.address", $addr]); +if (defined $jobs) { + die "--jobs is only supported for -V2 inboxes\n" if $version == 1; + die "--jobs=$jobs must be >= 1\n" if $jobs <= 0; + $creat_opt->{nproc} = $jobs; } -PublicInbox::Import::run_die([@x, "$pfx.url", $http_url]); -PublicInbox::Import::run_die([@x, "$pfx.inboxdir", $inboxdir]); -if (defined($indexlevel)) { - PublicInbox::Import::run_die([@x, "$pfx.indexlevel", $indexlevel]); +require PublicInbox::InboxWritable; +$ibx = PublicInbox::InboxWritable->new($ibx, $creat_opt); +if ($skip_docdata) { + $ibx->{indexlevel} //= 'full'; # ensure init_inbox writes xdb + $ibx->{indexlevel} eq 'basic' and + die "--skip-docdata ignored with --indexlevel=basic\n"; + $ibx->{-skip_docdata} = $skip_docdata; } +$ibx->init_inbox(0, $skip_epoch, $skip_artnum); -# needed for git prior to v2.1.0 -if (defined $perm) { - chmod($perm & 07777, $pi_config_tmp) or - die "(f)chmod failed on future $pi_config: $!\n"; +my $f = "$inboxdir/description"; +if (sysopen $fh, $f, O_CREAT|O_EXCL|O_WRONLY) { + print $fh "public inbox for $address[0]\n"; + close $fh; } -rename $pi_config_tmp, $pi_config or - die "failed to rename `$pi_config_tmp' to `$pi_config': $!\n"; -$auto_unlink->DESTROY; +# needed for git prior to v2.1.0 +umask(0077); -package UnlinkMe; -use strict; +require PublicInbox::Spawn; +PublicInbox::Spawn->import(qw(run_die)); -sub new { - my ($klass, $file) = @_; - bless { file => $file }, $klass; +foreach my $addr (@address) { + next if $seen{lc($addr)}; + run_die([@x, "--add", "$pfx.address", $addr]); } +run_die([@x, "$pfx.url", $http_url]); +run_die([@x, "$pfx.inboxdir", $inboxdir]); -sub DESTROY { - my $f = delete($_[0]->{file}); - unlink($f) if defined($f); +if (defined($indexlevel)) { + run_die([@x, "$pfx.indexlevel", $indexlevel]); +} +run_die([@x, "$pfx.newsgroup", $ng]) if $ng ne ''; + +for my $kv (@c_extra) { + my ($k, $v) = split(/=/, $kv, 2); + # git 2.30+ has --fixed-value for idempotent invocations, + # but that's too new to depend on in 2021. Perl quotemeta + # seems compatible enough for POSIX ERE which git uses + my $re = '^'.quotemeta($v).'$'; + run_die([@x, qw(--replace-all), "$pfx.$k", $v, $re]); } -1; + +# needed for git prior to v2.1.0 +chmod($perm & 07777, $pi_config_tmp); +rename $pi_config_tmp, $pi_config; +undef $auto_unlink; # trigger ->DESTROY diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 0d6c989b..a955cdf6 100644..100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -1,29 +1,49 @@ #!/usr/bin/perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for training spam (via SpamAssassin) and removing messages from a # public-inbox -my $usage = "$0 <spam|ham|rm> </path/to/message"; +my $help = <<EOF; +usage: public-inbox-learn [OPTIONS] [spam|ham|rm] </path/to/RFC2822_message + +required action argument: + + spam unindex the message and train as spam + rm remove the message without training as spam + ham index the message (based on To:/Cc: headers) and train as ham + +options: + + --all scan all inboxes on `rm' + +See public-inbox-learn(1) man page for full documentation. +EOF use strict; -use warnings; use PublicInbox::Config; use PublicInbox::InboxWritable; -use PublicInbox::MIME; +use PublicInbox::Eml; use PublicInbox::Address; use PublicInbox::Spamcheck::Spamc; -my $train = shift or die "usage: $usage\n"; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my %opt = (all => 0); +GetOptions(\%opt, qw(all help|h)) or die $help; +use PublicInbox::Import; + +my $train = shift or die $help; if ($train !~ /\A(?:ham|spam|rm)\z/) { - die "`$train' not recognized.\nusage: $usage\n"; + die "`$train' not recognized.\n$help"; } +die "--all only works with `rm'\n" if $opt{all} && $train ne 'rm'; my $spamc = PublicInbox::Spamcheck::Spamc->new; -my $pi_config = PublicInbox::Config->new; +my $pi_cfg = PublicInbox::Config->new; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; +PublicInbox::Import::load_config($pi_cfg); my $err; -my $mime = PublicInbox::MIME->new(eval { - local $/; - my $data = scalar <STDIN>; - $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $mime = PublicInbox::Eml->new(do{ + my $data = PublicInbox::IO::read_all \*STDIN; + PublicInbox::Eml::strip_from($data); if ($train ne 'rm') { eval { @@ -36,7 +56,7 @@ my $mime = PublicInbox::MIME->new(eval { }; $err = $@; } - $data + \$data }); sub remove_or_add ($$$$) { @@ -47,6 +67,7 @@ sub remove_or_add ($$$$) { $ibx->{name} = $ENV{GIT_COMMITTER_NAME} // $ibx->{name}; $ibx->{-primary_address} = $ENV{GIT_COMMITTER_EMAIL} // $addr; $ibx = PublicInbox::InboxWritable->new($ibx); + $ibx->{indexlevel} = $ibx->detect_indexlevel; my $im = $ibx->importer(0); if ($train eq "rm") { @@ -68,12 +89,12 @@ sub remove_or_add ($$$$) { } # spam is removed from all known inboxes since it is often Bcc:-ed -if ($train eq 'spam') { - $pi_config->each_inbox(sub { +if ($train eq 'spam' || ($train eq 'rm' && $opt{all})) { + $pi_cfg->each_inbox(sub { my ($ibx) = @_; $ibx = PublicInbox::InboxWritable->new($ibx); my $im = $ibx->importer(0); - $im->remove($mime, 'spam'); + $im->remove($mime, $train); $im->done; }); } else { @@ -84,7 +105,7 @@ if ($train eq 'spam') { for ($mime->header('Cc'), $mime->header('To')) { foreach my $addr (PublicInbox::Address::emails($_)) { $addr = lc($addr); - $dests{$addr} //= $pi_config->lookup($addr) // 0; + $dests{$addr} //= $pi_cfg->lookup($addr) // 0; } } @@ -92,12 +113,12 @@ if ($train eq 'spam') { my %seen; while (my ($addr, $ibx) = each %dests) { next unless ref($ibx); # $ibx may be 0 - next if $seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $addr); } - my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_config, $mime); + my $dests = PublicInbox::MDA->inboxes_for_list_id($pi_cfg, $mime); for my $ibx (@$dests) { - next if !$seen{"$ibx"}++; + next if $seen{0 + $ibx}++; remove_or_add($ibx, $train, $mime, $ibx->{-primary_address}); } } diff --git a/script/public-inbox-mda b/script/public-inbox-mda index f37c7492..b463b07b 100755 --- a/script/public-inbox-mda +++ b/script/public-inbox-mda @@ -1,13 +1,29 @@ #!/usr/bin/perl -w -# Copyright (C) 2013-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Mail delivery agent for public-inbox, run from your MTA upon mail delivery +my $help = <<EOF; +usage: public-inbox-mda [OPTIONS] </path/to/RFC2822_message + +options: + + --no-precheck skip internal checks for spam messages + +See public-inbox-mda(1) man page for full documentation. +EOF use strict; -use warnings; -my $usage = 'public-inbox-mda [OPTIONS] < rfc2822_message'; -my $precheck = grep(/\A--no-precheck\z/, @ARGV) ? 0 : 1; -my ($ems, $emm); +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +my ($ems, $emm, $show_help); +my $precheck = 1; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; # does this need a CLI switch? +GetOptions('precheck!' => \$precheck, 'help|h' => \$show_help) or + do { print STDERR $help; exit 1 }; +if ($show_help) { + print $help; + exit; +} my $do_exit = sub { my ($code) = shift; @@ -15,8 +31,7 @@ my $do_exit = sub { exit $code; }; -use Email::Simple; -use PublicInbox::MIME; +use PublicInbox::Eml; use PublicInbox::MDA; use PublicInbox::Config; use PublicInbox::Emergency; @@ -24,29 +39,32 @@ use PublicInbox::Filter::Base; use PublicInbox::InboxWritable; use PublicInbox::Spamcheck; -# n.b: hopefully we can setup the emergency path without bailing due to -# user error, we really want to setup the emergency destination ASAP +# n.b.: Hopefully we can set up the emergency path without bailing due to +# user error, we really want to set up the emergency destination ASAP # in case there's bugs in our code or user error. my $emergency = $ENV{PI_EMERGENCY} || "$ENV{HOME}/.public-inbox/emergency/"; $ems = PublicInbox::Emergency->new($emergency); -my $str = eval { local $/; <STDIN> }; -$str =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $str = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($str); $ems->prepare(\$str); -my $simple = Email::Simple->new(\$str); -my $config = PublicInbox::Config->new; +my $eml = PublicInbox::Eml->new(\$str); +my $cfg = PublicInbox::Config->new; my $key = 'publicinboxmda.spamcheck'; my $default = 'PublicInbox::Spamcheck::Spamc'; -my $spamc = PublicInbox::Spamcheck::get($config, $key, $default); +my $spamc = PublicInbox::Spamcheck::get($cfg, $key, $default); my $dests = []; +PublicInbox::Import::load_config($cfg, $do_exit); + my $recipient = $ENV{ORIGINAL_RECIPIENT}; if (defined $recipient) { - my $ibx = $config->lookup($recipient); # first check + my $ibx = $cfg->lookup($recipient); # first check push @$dests, $ibx if $ibx; } if (!scalar(@$dests)) { - $dests = PublicInbox::MDA->inboxes_for_list_id($config, $simple); + $dests = PublicInbox::MDA->inboxes_for_list_id($cfg, $eml); if (!scalar(@$dests) && !defined($recipient)) { - die "ORIGINAL_RECIPIENT not defined in ENV\n"; + warn "ORIGINAL_RECIPIENT not defined in ENV\n"; + $do_exit->(67); # EX_NOUSER } scalar(@$dests) or $do_exit->(67); # EX_NOUSER 5.1.1 user unknown } @@ -61,7 +79,7 @@ my $err; 0; # pre-check, MDA has stricter rules than an importer might; } elsif ($precheck) { - !!PublicInbox::MDA->precheck($simple, $ibx->{address}); + !!PublicInbox::MDA->precheck($eml, $ibx->{address}); } else { 1; } @@ -69,7 +87,7 @@ my $err; $do_exit->(67) if $err && scalar(@$dests) == 0; -$simple = undef; +$eml = undef; my $spam_ok; if ($spamc) { $str = ''; @@ -101,9 +119,10 @@ my @rejects; for my $ibx (@$dests) { mda_filter_adjust($ibx); my $filter = $ibx->filter; - my $mime = PublicInbox::MIME->new($str); + my $mime = PublicInbox::Eml->new($str); my $ret = $filter->delivery($mime); - if (ref($ret) && $ret->isa('Email::MIME')) { # filter altered message + if (ref($ret) && ($ret->isa('PublicInbox::Eml') || + $ret->isa('Email::MIME'))) { # filter altered message $mime = $ret; } elsif ($ret == PublicInbox::Filter::Base::IGNORE) { next; # nothing, keep looping @@ -119,7 +138,7 @@ for my $ibx (@$dests) { # destination succeeds $emm->abort; } else { # v1-only - my $mid = $mime->header_obj->header_raw('Message-ID'); + my $mid = $mime->header_raw('Message-ID'); # this message is similar to what ssoma-mda shows: print STDERR "CONFLICT: Message-ID: $mid exists\n"; } diff --git a/script/public-inbox-netd b/script/public-inbox-netd new file mode 100755 index 00000000..e8b1ca69 --- /dev/null +++ b/script/public-inbox-netd @@ -0,0 +1,6 @@ +#!/usr/bin/perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run(); diff --git a/script/public-inbox-nntpd b/script/public-inbox-nntpd index ce42de2d..aca27383 100755 --- a/script/public-inbox-nntpd +++ b/script/public-inbox-nntpd @@ -1,15 +1,8 @@ -#!/usr/bin/perl -w -# Copyright (C) 2015-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Standalone NNTP server for public-inbox. -use strict; -use warnings; +use v5.12; use PublicInbox::Daemon; -use PublicInbox::NNTP; # need to call import -use PublicInbox::NNTPD; -my $nntpd = PublicInbox::NNTPD->new; -PublicInbox::Daemon::run('0.0.0.0:119', - sub { $nntpd->refresh_groups }, # refresh - sub ($$$) { PublicInbox::NNTP->new($_[0], $nntpd) }, # post_accept - $nntpd); +PublicInbox::Daemon::run('nntp://0.0.0.0:119'); diff --git a/script/public-inbox-pop3d b/script/public-inbox-pop3d new file mode 100755 index 00000000..ec944aee --- /dev/null +++ b/script/public-inbox-pop3d @@ -0,0 +1,8 @@ +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +# +# Standalone POP3 server for public-inbox. +use v5.12; +use PublicInbox::Daemon; +PublicInbox::Daemon::run('pop3://0.0.0.0:110'); diff --git a/script/public-inbox-purge b/script/public-inbox-purge index c9b69c3d..618cfec4 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # # Used for purging messages entirely from a public-inbox. Currently @@ -10,23 +10,35 @@ use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); use PublicInbox::AdminEdit; PublicInbox::Admin::check_require('-index'); use PublicInbox::Filter::Base qw(REJECT); -use PublicInbox::MIME; +use PublicInbox::Eml; require PublicInbox::V2Writable; -my $usage = "$0 [--all] [INBOX_DIRS] </path/to/message"; +my $help = <<EOF; +usage: public-inbox-purge [--all] [INBOX_DIRS] </path/to/message + + erase message entirely from an inbox (including history) + +options: + + --all purge from all configured inboxes + +See public-inbox-purge(1) man page for full documentation. +EOF + my $opt = { verbose => 1, all => 0, -min_inbox_version => 2 }; -GetOptions($opt, @PublicInbox::AdminEdit::OPT) or - die "bad command-line args\n$usage\n"; +GetOptions($opt, @PublicInbox::AdminEdit::OPT, 'C=s@') or die $help; +if ($opt->{help}) { print $help; exit 0 }; +PublicInbox::Admin::do_chdir(delete $opt->{C}); my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; scalar <STDIN> }; -$data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; +my $data = PublicInbox::IO::read_all \*STDIN; +PublicInbox::Eml::strip_from($data); my $n_purged = 0; foreach my $ibx (@ibxs) { - my $mime = PublicInbox::MIME->new($data); + my $mime = PublicInbox::Eml->new($data); my $v2w = PublicInbox::V2Writable->new($ibx, 0); my $commits = $v2w->purge($mime) || []; diff --git a/script/public-inbox-watch b/script/public-inbox-watch index 645abeda..9bcd42ed 100755 --- a/script/public-inbox-watch +++ b/script/public-inbox-watch @@ -1,26 +1,62 @@ #!/usr/bin/perl -w -# Copyright (C) 2016-2020 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +my $help = <<EOF; +usage: public-inbox-watch + +See public-inbox-watch(1) man page for full documentation. +EOF + use strict; -use warnings; -use PublicInbox::WatchMaildir; +use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); +use IO::Handle; # ->autoflush +use PublicInbox::Watch; +use PublicInbox::Import; +local $PublicInbox::Import::DROP_UNIQUE_UNSUB; use PublicInbox::Config; -my ($config, $watch_md); +use PublicInbox::DS; +my $do_scan = 1; +GetOptions('scan!' => \$do_scan, # undocumented, testing only + 'help|h' => \(my $show_help)) or do { print STDERR $help; exit 1 }; +if ($show_help) { print $help; exit 0 }; +PublicInbox::DS::block_signals(); +STDOUT->autoflush(1); +STDERR->autoflush(1); +local $0 = $0; # local since this script may be eval-ed +my $watch = PublicInbox::Watch->new(PublicInbox::Config->new); my $reload = sub { - $config = PublicInbox::Config->new; - $watch_md->quit if $watch_md; - $watch_md = PublicInbox::WatchMaildir->new($config); + my $prev = $watch or return; # SIGQUIT issued + $watch->quit; + $watch = PublicInbox::Watch->new(PublicInbox::Config->new); + if ($watch) { + $watch->{sig} = $prev->{sig}; # prevent redundant signalfd + warn "# reloaded\n"; + } else { + warn("E: reloading failed\n"); + $watch = $prev; + } }; -$reload->(); -if ($watch_md) { - my $scan = sub { $watch_md->trigger_scan('full') if $watch_md }; - $SIG{HUP} = $reload; - $SIG{USR1} = $scan; - $SIG{ALRM} = sub { $SIG{ALRM} = 'DEFAULT'; $scan->() }; - $SIG{QUIT} = $SIG{TERM} = $SIG{INT} = sub { - $watch_md->quit if $watch_md; - $watch_md = undef; + +if ($watch) { + my $scan = sub { + return if !$watch; + warn "# scanning\n"; + $watch->trigger_scan('full'); }; - alarm(1); - $watch_md->watch while ($watch_md); + my $quit = sub { # may be called in IMAP/NNTP children + $watch->quit if $watch; + $watch = undef; + $0 .= ' quitting'; + }; + my $sig = { + HUP => $reload, + USR1 => $scan, + CHLD => \&PublicInbox::DS::enqueue_reap, + }; + $sig->{QUIT} = $sig->{TERM} = $sig->{INT} = $quit; + local @SIG{keys %$sig} = values(%$sig); # for non-signalfd/kqueue + + # --no-scan is only intended for testing atm, undocumented. + PublicInbox::DS::requeue($scan) if $do_scan; + $watch->watch($sig) while ($watch); } diff --git a/script/public-inbox-xcpdb b/script/public-inbox-xcpdb index 2b9f032c..fac54559 100755 --- a/script/public-inbox-xcpdb +++ b/script/public-inbox-xcpdb @@ -1,19 +1,70 @@ -#!/usr/bin/perl -w -# Copyright (C) 2019-2020 all contributors <meta@public-inbox.org> +#!perl -w +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> -# xcpdb: Xapian copy database, a wrapper around Xapian's copydatabase(1) +use v5.12; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); -use PublicInbox::InboxWritable; -use PublicInbox::Xapcmd; +my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: +usage: public-inbox-xcpdb [options] <INBOX_DIR|EXTINDEX_DIR|CINDEX_DIR> + + upgrade or reshard Xapian DB(s) used by public-inbox + +options: + + --compact | -c run public-inbox-compact(1) after indexing + --all copy all configured inboxes + --reshard=NUM change number the number of shards + --jobs=NUM limit parallelism to JOBS count + --verbose | -v increase verbosity (may be repeated) + --sequential-shard copy+index Xapian shards sequentially (for slow HDD) + +index options (see public-inbox-index(1) man page for full description): + + --no-fsync speed up indexing, risk corruption on power outage + --batch-size=BYTES flush changes to OS after a given number of bytes + --max-size=BYTES do not index messages larger than the given size + +See public-inbox-xcpdb(1) man page for full documentation. +EOF +my $opt = { quiet => -1, compact => 0, fsync => 1, + -eidx_ok => 1, -cidx_ok => 1 }; +GetOptions($opt, qw( + fsync|sync! compact|c reshard|R=i + max_size|max-size=s batch_size|batch-size=s + sequential-shard|seq-shard + jobs|j=i quiet|q verbose|v + blocksize|b=s no-full|n fuller|F + all C=s@ help|h)) or die $help; +if ($opt->{help}) { print $help; exit 0 }; + use PublicInbox::Admin; PublicInbox::Admin::require_or_die('-search'); -my $usage = "Usage: public-inbox-xcpdb [--compact] INBOX_DIR\n"; -my $opt = {}; -my @opt = (qw(compact reshard|R=i), @PublicInbox::Xapcmd::COMPACT_OPT); -GetOptions($opt, @opt) or die "bad command-line args\n$usage"; -my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV) or die $usage; -foreach (@ibxs) { - my $ibx = PublicInbox::InboxWritable->new($_); - # we rely on --no-renumber to keep docids synched to NNTP +PublicInbox::Admin::do_chdir(delete $opt->{C}); + +require PublicInbox::Config; +my $cfg = PublicInbox::Config->new; +my ($ibxs, $eidxs, $cidxs) = + PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); +unless (@$ibxs || @$eidxs || @$cidxs) { print STDERR $help; exit 1 } +my $idx_env = PublicInbox::Admin::index_prepare($opt, $cfg); + +# we only set XAPIAN_FLUSH_THRESHOLD for index, since cpdb doesn't +# know sizes, only doccounts +$opt->{-idx_env} = $idx_env; + +if ($opt->{'sequential-shard'} && ($opt->{jobs} // 1) > 1) { + warn "W: --jobs=$opt->{jobs} ignored with --sequential-shard\n"; + $opt->{jobs} = 0; +} + +require PublicInbox::InboxWritable; +require PublicInbox::Xapcmd; +# we rely on --no-renumber to keep docids synched for NNTP(artnum) + IMAP(UID) +for my $ibx (@$ibxs) { + $ibx = PublicInbox::InboxWritable->new($ibx); PublicInbox::Xapcmd::run($ibx, 'cpdb', $opt); } + +for my $ibxish (@$eidxs, @$cidxs) { + my $restore = $ibxish->can('prep_umask') ? $ibxish->prep_umask : undef; + PublicInbox::Xapcmd::run($ibxish, 'cpdb', $opt); +} diff --git a/script/public-inbox.cgi b/script/public-inbox.cgi index c766483a..3a430d5b 100755 --- a/script/public-inbox.cgi +++ b/script/public-inbox.cgi @@ -1,5 +1,5 @@ #!/usr/bin/perl -w -# Copyright (C) 2014-2020 all contributors <meta@public-inbox.org> +# Copyright (C) 2014-2021 all contributors <meta@public-inbox.org> # License: AGPL-3.0+ or later <https://www.gnu.org/licenses/agpl-3.0.txt> # # Enables using PublicInbox::WWW as a CGI script @@ -13,14 +13,7 @@ BEGIN { PublicInbox::WWW->preload if $ENV{MOD_PERL}; } my $www = PublicInbox::WWW->new; -my $have_deflater = eval { require Plack::Middleware::Deflater; 1 }; my $app = builder { - if ($have_deflater) { - enable 'Deflater', - content_type => [ 'text/html', 'text/plain', - 'application/atom+xml' ]; - } - # Enable to ensure redirects and Atom feed URLs are generated # properly when running behind a reverse proxy server which # sets the X-Forwarded-Proto request header. |