diff options
Diffstat (limited to 'lib/PublicInbox/Admin.pm')
-rw-r--r-- | lib/PublicInbox/Admin.pm | 157 |
1 files changed, 104 insertions, 53 deletions
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index b21fb241..a1b1fc07 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -1,15 +1,15 @@ -# Copyright (C) 2019-2021 all contributors <meta@public-inbox.org> +# Copyright (C) all contributors <meta@public-inbox.org> # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> # common stuff for administrative command-line tools # Unstable internal API package PublicInbox::Admin; -use strict; +use v5.12; use parent qw(Exporter); -our @EXPORT_OK = qw(setup_signals); +our @EXPORT_OK = qw(setup_signals fmt_localtime); use PublicInbox::Config; use PublicInbox::Inbox; -use PublicInbox::Spawn qw(popen_rd); +use PublicInbox::Spawn qw(run_qx); use PublicInbox::Eml; *rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed; @@ -28,13 +28,12 @@ sub setup_signals { }; } -sub resolve_inboxdir { - my ($cd, $ver) = @_; +sub resolve_any_idxdir ($$) { + my ($cd, $lock_bn) = @_; my $try = $cd // '.'; my $root_dev_ino; - while (1) { # favor v2, first - if (-f "$try/inbox.lock") { - $$ver = 2 if $ver; + while (1) { + if (-f "$try/$lock_bn") { # inbox.lock, ei.lock, cidx.lock return rel2abs_collapsed($try); } elsif (-d $try) { my @try = stat _; @@ -42,47 +41,55 @@ sub resolve_inboxdir { my @root = stat('/') or die "stat /: $!\n"; "$root[0]\0$root[1]"; }; - last if "$try[0]\0$try[1]" eq $root_dev_ino; + return undef if "$try[0]\0$try[1]" eq $root_dev_ino; $try .= '/..'; # continue, cd up } else { die "`$try' is not a directory\n"; } } - # try v1 bare git dirs - my $cmd = [ qw(git rev-parse --git-dir) ]; - my $fh = popen_rd($cmd, undef, {-C => $cd}); - my $dir = do { local $/; <$fh> }; - close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; - chomp $dir; - $$ver = 1 if $ver; - rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } -# for unconfigured inboxes -sub detect_indexlevel ($) { - my ($ibx) = @_; +sub resolve_eidxdir ($) { resolve_any_idxdir($_[0], 'ei.lock') } +sub resolve_cidxdir ($) { resolve_any_idxdir($_[0], 'cidx.lock') } - my $over = $ibx->over; - my $srch = $ibx->search; - delete @$ibx{qw(over search)}; # don't leave open FDs lying around +sub resolve_inboxdir { + my ($cd, $ver) = @_; + my $dir; + if (defined($dir = resolve_any_idxdir($cd, 'inbox.lock'))) { # try v2 + $$ver = 2 if $ver; + } elsif (defined($dir = resolve_git_dir($cd))) { # try v1 + $$ver = 1 if $ver; + } # else: not an inbox at all + $dir; +} - # brand new or never before indexed inboxes default to full - return 'full' unless $over; - my $l = 'basic'; - return $l unless $srch; - if (my $xdb = $srch->xdb) { - $l = 'full'; - my $m = $xdb->get_metadata('indexlevel'); - if ($m eq 'medium') { - $l = $m; - } elsif ($m ne '') { - warn <<""; -$ibx->{inboxdir} has unexpected indexlevel in Xapian: $m +sub valid_pwd { + my $pwd = $ENV{PWD} // return; + my @st_pwd = stat $pwd or return; + my @st_cwd = stat '.' or die "stat(.): $!"; + "@st_pwd[1,0]" eq "@st_cwd[1,0]" ? $pwd : undef; +} - } - $ibx->{-skip_docdata} = 1 if $xdb->get_metadata('skip_docdata'); +sub resolve_git_dir { + my ($cd) = @_; # cd may be `undef' for cwd + # try v1 bare git dirs + my $pwd = valid_pwd(); + my $env; + defined($pwd) && substr($cd // '/', 0, 1) ne '/' and + $env->{PWD} = "$pwd/$cd"; + my $cmd = [ qw(git rev-parse --git-dir) ]; + my $dir = run_qx($cmd, $env, { -C => $cd }); + die "error in @$cmd (cwd:${\($cd // '.')}): $?\n" if $?; + chomp $dir; + # --absolute-git-dir requires git v2.13.0+, and we want to + # respect symlinks when $ENV{PWD} if $ENV{PWD} ne abs_path('.') + # since we store absolute GIT_DIR paths in cindex. + if (substr($dir, 0, 1) ne '/') { + substr($cd // '/', 0, 1) eq '/' or + $cd = File::Spec->rel2abs($cd, $pwd); + $dir = rel2abs_collapsed($dir, $cd); } - $l; + $dir; } sub unconfigured_ibx ($$) { @@ -107,11 +114,34 @@ sub resolve_inboxes ($;$$) { $cfg or die "--all specified, but $cfgfile not readable\n"; @$argv and die "--all specified, but directories specified\n"; } - + my (@old, @ibxs, @eidx, @cidx); + if ($opt->{-cidx_ok}) { + require PublicInbox::CodeSearchIdx; + @$argv = grep { + if (defined(my $d = resolve_cidxdir($_))) { + push @cidx, PublicInbox::CodeSearchIdx->new( + $d, $opt); + undef; + } else { + 1; + } + } @$argv; + } + if ($opt->{-eidx_ok}) { + require PublicInbox::ExtSearchIdx; + @$argv = grep { + if (defined(my $ei = resolve_eidxdir($_))) { + $ei = PublicInbox::ExtSearchIdx->new($ei, $opt); + push @eidx, $ei; + undef; + } else { + 1; + } + } @$argv; + } my $min_ver = $opt->{-min_inbox_version} || 0; # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, # pathnames are not unique due to symlinks and bind mounts - my (@old, @ibxs); if ($opt->{all}) { $cfg->each_inbox(sub { my ($ibx) = @_; @@ -121,6 +151,7 @@ sub resolve_inboxes ($;$$) { warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); + # TODO: no way to configure cindex in config file, yet } else { # directories specified on the command-line my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; @@ -161,22 +192,22 @@ sub resolve_inboxes ($;$$) { die "-V$min_ver inboxes not supported by $0\n\t", join("\n\t", @old), "\n"; } - @ibxs; + ($opt->{-eidx_ok} || $opt->{-cidx_ok}) ? (\@ibxs, \@eidx, \@cidx) + : @ibxs; } -# TODO: make Devel::Peek optional, only used for daemon -my @base_mod = qw(Devel::Peek); +my @base_mod = (); my @over_mod = qw(DBD::SQLite DBI); my %mod_groups = ( -index => [ @base_mod, @over_mod ], -base => \@base_mod, - -search => [ @base_mod, @over_mod, 'Search::Xapian' ], + -search => [ @base_mod, @over_mod, 'Xapian' ], ); sub scan_ibx_modules ($$) { my ($mods, $ibx) = @_; if (!$ibx->{indexlevel} || $ibx->{indexlevel} ne 'basic') { - $mods->{'Search::Xapian'} = 1; + $mods->{'Xapian'} = 1; } else { $mods->{$_} = 1 foreach @over_mod; } @@ -188,10 +219,10 @@ sub check_require { while (my $mod = shift @mods) { if (my $groups = $mod_groups{$mod}) { push @mods, @$groups; - } elsif ($mod eq 'Search::Xapian') { + } elsif ($mod eq 'Xapian') { require PublicInbox::Search; PublicInbox::Search::load_xapian() or - $err->{'Search::Xapian || Xapian'} = $@; + $err->{'Xapian || Search::Xapian'} = $@; } else { eval "require $mod"; $err->{$mod} = $@ if $@; @@ -240,14 +271,14 @@ sub index_inbox { if (my $pr = $opt->{-progress}) { $pr->("indexing $ibx->{inboxdir} ...\n"); } - local %SIG = %SIG; + local @SIG{keys %SIG} = values %SIG; setup_signals(\&index_terminate, $ibx); my $idx = { current_info => $ibx->{inboxdir} }; local $SIG{__WARN__} = sub { return if PublicInbox::Eml::warn_ignore(@_); warn($idx->{current_info}, ': ', @_); }; - if (ref($ibx) && $ibx->version == 2) { + if ($ibx->version == 2) { eval { require PublicInbox::V2Writable }; die "v2 requirements not met: $@\n" if $@; $ibx->{-creat_opt}->{nproc} = $jobs; @@ -287,7 +318,7 @@ sub progress_prepare ($;$) { } else { $opt->{verbose} ||= 1; $dst //= *STDERR{GLOB}; - $opt->{-progress} = sub { print $dst @_ }; + $opt->{-progress} = sub { print $dst '# ', @_ }; } } @@ -327,15 +358,35 @@ sub index_prepare ($$) { $opt->{batch_size} and $env = { XAPIAN_FLUSH_THRESHOLD => '4294967295' }; - for my $k (qw(sequential_shard)) { + for my $k (qw(sequential-shard)) { my $git_key = "publicInbox.index".ucfirst($k); - $git_key =~ s/_([a-z])/\U$1/g; + $git_key =~ s/-([a-z])/\U$1/g; defined(my $s = $opt->{$k} // $cfg->{lc($git_key)}) or next; defined(my $v = $cfg->git_bool($s)) or die "`$git_key=$s' not boolean\n"; $opt->{$k} = $v; } + for my $k (qw(since until)) { + my $v = $opt->{$k} // next; + $opt->{reindex} or die "--$k=$v requires --reindex\n"; + } $env; } +sub do_chdir ($) { + my $chdir = $_[0] // return; + for my $d (@$chdir) { + next if $d eq ''; # same as git(1) + chdir $d or die "cd $d: $!"; + } +} + +sub fmt_localtime ($) { + require POSIX; + my @lt = localtime $_[0]; + my (undef, $M, $H, $d, $m, $Y) = @lt; + sprintf('%u-%02u-%02u % 2u:%02u ', $Y + 1900, $m + 1, $d, $H, $M) + .POSIX::strftime('%z', @lt); +} + 1; |