diff options
author | Eric Wong <e@80x24.org> | 2020-12-31 13:24:36 +0000 |
---|---|---|
committer | Eric Wong <e@80x24.org> | 2020-12-31 13:24:36 +0000 |
commit | 0c8106d44f317175e122744b43407bf067183175 (patch) | |
tree | f59296dc3c63c8ca1dd8445821f43777333ba30e /script | |
parent | 9d29ceda4eb8c9973749d74b928416f5c3cc78f8 (diff) | |
parent | 0f461dcd3317f44670e2fc50346f87ff41e80127 (diff) | |
download | public-inbox-0c8106d44f317175e122744b43407bf067183175.tar.gz |
* origin/master: (58 commits) ds: flatten + reuse @events, epoll_wait style fixes ds: simplify EventLoop implementation check defined return value for localized slurp errors import: check for git->qx errors, clearer return values git: qx: avoid extra "local" for scalar context case search: remove {mset} option for ->mset method search: remove pointless {relevance} setting miscsearch: take reopen from Search and use it extsearch: unconditionally reopen on access extindex: allow using --all without EXTINDEX_DIR extindex: add undocumented --no-scan switch extindex: enable autoflush on STDOUT/STDERR extindex: various --watch signal handling fixes extindex: --watch for inotify-based updates eml: fix undefined vars on <Perl 5.28 t/config: test --get-urlmatch for git <2.26 default to CORE::warn in $SIG{__WARN__} handlers inbox: name variable for values loop iterator inboxidle: avoid needless syscalls on refresh inboxidle: clue users into resolving ENOSPC from inotify ...
Diffstat (limited to 'script')
-rwxr-xr-x | script/public-inbox-convert | 28 | ||||
-rwxr-xr-x | script/public-inbox-edit | 3 | ||||
-rw-r--r-- | script/public-inbox-extindex | 38 | ||||
-rwxr-xr-x | script/public-inbox-index | 60 | ||||
-rwxr-xr-x | script/public-inbox-init | 16 | ||||
-rwxr-xr-x | script/public-inbox-learn | 3 | ||||
-rwxr-xr-x | script/public-inbox-purge | 2 |
7 files changed, 99 insertions, 51 deletions
diff --git a/script/public-inbox-convert b/script/public-inbox-convert index b61c743f..e6ee6529 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -47,34 +47,21 @@ die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require Cwd; -Cwd->import('abs_path'); +require PublicInbox::Admin; require PublicInbox::Config; require PublicInbox::InboxWritable; -my $abs = abs_path($old_dir); -die "failed to resolve $old_dir: $!\n" if (!defined($abs)); - my $cfg = PublicInbox::Config->new; -my $old; -$cfg->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; -}); -if ($old) { - $old = PublicInbox::InboxWritable->new($old); -} else { +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = PublicInbox::InboxWritable->new({ - inboxdir => $old_dir, - name => 'ignored', - -primary_address => 'old@example.com', - address => [ 'old@example.com' ], - }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require File::Spec; require PublicInbox::Admin; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; @@ -88,12 +75,11 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = File::Spec->canonpath($new_dir); +$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; -$old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; diff --git a/script/public-inbox-edit b/script/public-inbox-edit index a70614fc..81f023bc 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -183,7 +183,8 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + defined(my $new_raw = do { local $/; <$new_fh> }) or die + "read $edit_fn: $!\n"; if (!$opt->{raw}) { # get rid of the From we added diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 17ad59fa..5f27988f 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -6,11 +6,12 @@ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] +usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] Create and update external (detached) search indices --no-fsync speed up indexing, risk corruption on power outage + --watch run persistently and watch for inbox updates -L LEVEL `medium', or `full' (default: full) --all index all configured inboxes --jobs=NUM set or disable parallelization (NUM=0) @@ -22,26 +23,36 @@ usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc + gc commit-interval=i watch scan! all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; - -# require lazily to speed up --help -my $eidx_dir = shift(@ARGV) // die "E: $help"; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync +# require lazily to speed up --help require PublicInbox::Admin; my $cfg = PublicInbox::Config->new; +my $eidx_dir = shift(@ARGV); +unless (defined $eidx_dir) { + if ($opt->{all} && $cfg->ALL) { + $eidx_dir = $cfg->ALL->{topdir}; + } else { + die "E: $help"; + } +} my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible --gc\n" if $opt->{all}; + die "E: --all not compatible with --gc\n" if $opt->{all}; + die "E: --watch is not compatible with --gc\n" if $opt->{watch}; } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } @@ -56,6 +67,15 @@ if ($opt->{gc}) { $eidx->attach_config($cfg); $eidx->eidx_gc($opt); } else { - $eidx->attach_inbox($_) for @ibxs; - $eidx->eidx_sync($opt); + if ($opt->{all}) { + $eidx->attach_config($cfg); + } else { + $eidx->attach_inbox($_) for @ibxs; + } + if ($opt->{watch}) { + $cfg = undef; # save memory only after SIGHUP + $eidx->eidx_watch($opt); + } else { + $eidx->eidx_sync($opt); + } } diff --git a/script/public-inbox-index b/script/public-inbox-index index 8a61817c..0fdfddc0 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -17,7 +17,7 @@ options: --no-fsync speed up indexing, risk corruption on power outage -L LEVEL `basic', `medium', or `full' (default: full) - -E EIDX update EIDX (e.g. `all') + -E EXTINDEX update extindex (default: `all') --all index all configured inboxes --compact | -c run public-inbox-compact(1) after indexing --sequential-shard index Xapian shards sequentially for slow storage @@ -32,19 +32,26 @@ options: BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-index(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard - skip-docdata all help|h)) + no-update-extindex update-extindex|E=s@ + fast-noop|F skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } +if ($opt->{reindex} && delete($opt->{'fast-noop'})) { + warn "--fast-noop ignored with --reindex\n"; +} # require lazily to speed up --help require PublicInbox::Admin; @@ -56,7 +63,34 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +my (@eidx, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + my $o = { %$opt }; + delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; + $eidx_seen{$topdir} //= + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); @@ -64,7 +98,14 @@ foreach my $ibx (@ibxs) { $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <<EOF if @eidx_unconfigured; +The following inboxes are unconfigured and will not be updated in +@$update_extindex:\n@eidx_unconfigured +EOF # "Search::Xapian" includes SWIG "Xapian", too: $opt->{compact} = 0 if !$mods->{'Search::Xapian'}; @@ -90,10 +131,21 @@ publicInbox.$ibx->{name}.indexSequentialShard not boolean EOL $ibx_opt = { %$opt, sequential_shard => $v }; } - PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } + last if $ibx_opt->{quit}; + next if $ibx->{-unconfigured} || !$nidx; + for my $eidx (@eidx) { + $eidx->attach_inbox($ibx); + } +} +my $pr = $opt->{-progress}; +for my $eidx (@eidx) { + $pr->("indexing $eidx->{topdir} ...\n") if $pr; + $eidx->eidx_sync($opt); + last if $opt->{quit}; } diff --git a/script/public-inbox-init b/script/public-inbox-init index c775eb31..7ac77830 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -100,11 +100,7 @@ if (-e $pi_config) { defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } + defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; @@ -138,10 +134,9 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -require File::Spec; -$inboxdir = File::Spec->canonpath($inboxdir); +$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; @@ -186,11 +181,6 @@ if ($skip_docdata) { $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); -require Cwd; -my $tmp = Cwd::abs_path($inboxdir); -defined($tmp) or die "failed to resolve $inboxdir: $!\n"; -$inboxdir = $tmp; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; # needed for git prior to v2.1.0 umask(0077) if defined $perm; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 9352c8ff..1731a4ba 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -39,8 +39,7 @@ my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ - local $/; - my $data = <STDIN>; + defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 7bca11ea..52f1f18a 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -32,7 +32,7 @@ if ($opt->{help}) { print $help; exit 0 }; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; <STDIN> }; +defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; |