From 9fcce78e40b0a7c61797be2aff6c5afeb474568e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 20 Dec 2020 06:30:11 +0000 Subject: script/public-inbox-*: favor caller-provided pathnames We'll try to avoid calling Cwd::abs_path and use File::Spec->rel2abs instead, since abs_path will resolve symlinks the user specified on the command-line. Unfortunately, ->rel2abs still leaves "/.." and "/../" uncollapsed, so we still need to fall back to Cwd::abs_path in those cases. While we are at it, we'll also resolve inboxdir from deep inside v2 directories instead of misdetecting them as v1 bare git repos. In any case, stop matching directories by name and instead rely on the unique combination of st_dev + st_ino on stat() as we started doing in the extindex code. --- lib/PublicInbox/Admin.pm | 102 +++++++++++++++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 34 deletions(-) (limited to 'lib/PublicInbox/Admin.pm') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 3977d812..ea82133a 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -6,15 +6,15 @@ package PublicInbox::Admin; use strict; use parent qw(Exporter); -use Cwd qw(abs_path); -use POSIX (); our @EXPORT_OK = qw(setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); +use File::Spec (); sub setup_signals { my ($cb, $arg) = @_; # optional + require POSIX; # we call exit() here instead of _exit() so DESTROY methods # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) @@ -27,21 +27,43 @@ sub setup_signals { }; } +# abs_path resolves symlinks, so we want to avoid it if rel2abs +# is sufficient and doesn't leave "/.." or "/../" +sub rel2abs_collapsed ($) { + my $p = File::Spec->rel2abs($_[0]); + return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; # likely + require Cwd; + Cwd::abs_path($p); +} + sub resolve_inboxdir { my ($cd, $ver) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - $$ver = 2 if $ver; - return abs_path($prefix); + my $try = $cd // '.'; + my $root_dev_ino; + while (1) { # favor v2, first + if (-f "$try/inbox.lock") { + $$ver = 2 if $ver; + return rel2abs_collapsed($try); + } elsif (-d $try) { + my @try = stat _; + $root_dev_ino //= do { + my @root = stat('/') or die "stat /: $!\n"; + "$root[0]\0$root[1]"; + }; + last if "$try[0]\0$try[1]" eq $root_dev_ino; + $try .= '/..'; # continue, cd up + } else { + die "`$try' is not a directory\n"; + } } + # try v1 bare git dirs my $cmd = [ qw(git rev-parse --git-dir) ]; my $fh = popen_rd($cmd, undef, {-C => $cd}); my $dir = do { local $/; <$fh> }; - close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n"; + close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; chomp $dir; $$ver = 1 if $ver; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); + rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } # for unconfigured inboxes @@ -78,8 +100,8 @@ sub unconfigured_ibx ($$) { name => $name, address => [ "$name\@example.com" ], inboxdir => $dir, - # TODO: consumers may want to warn on this: - #-unconfigured => 1, + # consumers (-convert) warn on this: + -unconfigured => 1, }); } @@ -95,41 +117,53 @@ sub resolve_inboxes ($;$$) { } my $min_ver = $opt->{-min_inbox_version} || 0; + # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, + # pathnames are not unique due to symlinks and bind mounts my (@old, @ibxs); - my %dir2ibx; - my $all = $opt->{all} ? [] : undef; - if ($cfg) { + if ($opt->{all}) { $cfg->each_inbox(sub { my ($ibx) = @_; - my $path = abs_path($ibx->{inboxdir}); - if (defined($path)) { - $dir2ibx{$path} = $ibx; - push @$all, $ibx if $all; + if (-e $ibx->{inboxdir}) { + push(@ibxs, $ibx) if $ibx->version >= $min_ver; } else { - warn <{name} $ibx->{inboxdir}: $! -EOF + warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); - } - if ($all) { - @$all = grep { $_->version >= $min_ver } @$all; - @ibxs = @$all; } else { # directories specified on the command-line - my $i = 0; my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; - foreach (@dirs) { - my $v; - my $dir = resolve_inboxdir($_, \$v); - if ($v < $min_ver) { + my %s2i; # "st_dev\0st_ino" => array index + for (my $i = 0; $i <= $#dirs; $i++) { + my $dir = $dirs[$i]; + my @st = stat($dir) or die "stat($dir): $!\n"; + $dir = resolve_inboxdir($dir, \(my $ver)); + if ($ver >= $min_ver) { + $s2i{"$st[0]\0$st[1]"} //= $i; + } else { push @old, $dir; - next; } - my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i); - $i++; - push @ibxs, $ibx; } + my $done = \'done'; + eval { + $cfg->each_inbox(sub { + my ($ibx) = @_; + return if $ibx->version < $min_ver; + my $dir = $ibx->{inboxdir}; + if (my @s = stat $dir) { + my $i = delete($s2i{"$s[0]\0$s[1]"}) + // return; + $ibxs[$i] = $ibx; + die $done if !keys(%s2i); + } else { + warn "W: $ibx->{name} $dir: $!\n"; + } + }); + }; + die $@ if $@ && $@ ne $done; + for my $i (sort { $a <=> $b } values %s2i) { + $ibxs[$i] = unconfigured_ibx($dirs[$i], $i); + } + @ibxs = grep { defined } @ibxs; # duplicates are undef } if (@old) { die "-V$min_ver inboxes not supported by $0\n\t", -- cgit v1.2.3-24-ge0c7 From 3e9888ed30b7fe092b03789d19a8020d4bc0fb39 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Dec 2020 07:51:20 +0000 Subject: use rel2abs_collapsed when loading Inbox objects We need to canonicalize paths for inboxes which do not have a newsgroup defined, otherwise ->eidx_key matches can fail in unexpected ways. --- lib/PublicInbox/Admin.pm | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'lib/PublicInbox/Admin.pm') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index ea82133a..c972fb68 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -10,7 +10,7 @@ our @EXPORT_OK = qw(setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); -use File::Spec (); +*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed; sub setup_signals { my ($cb, $arg) = @_; # optional @@ -27,15 +27,6 @@ sub setup_signals { }; } -# abs_path resolves symlinks, so we want to avoid it if rel2abs -# is sufficient and doesn't leave "/.." or "/../" -sub rel2abs_collapsed ($) { - my $p = File::Spec->rel2abs($_[0]); - return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; # likely - require Cwd; - Cwd::abs_path($p); -} - sub resolve_inboxdir { my ($cd, $ver) = @_; my $try = $cd // '.'; -- cgit v1.2.3-24-ge0c7 From 949e8b4a65a2dbb99d8923ebb4715a8724ca8bf2 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 22 Dec 2020 06:01:44 +0000 Subject: admin: resolve inboxes to absolute paths for index Some of my ancient v1-only scripts called public-inbox-index to operate on GIT_DIR: GIT_DIR=/path/to/foo.git public-inbox-index This change ensures they keep working, otherwise "." will be passed to the --git-dir= switch of git(1) because that's the default directory if no inboxes are specified on the command-line. Fixes: 9fcce78e40b0a7c6 ("script/public-inbox-*: favor caller-provided pathnames") --- lib/PublicInbox/Admin.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox/Admin.pm') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index c972fb68..9a86d206 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -127,7 +127,7 @@ sub resolve_inboxes ($;$$) { for (my $i = 0; $i <= $#dirs; $i++) { my $dir = $dirs[$i]; my @st = stat($dir) or die "stat($dir): $!\n"; - $dir = resolve_inboxdir($dir, \(my $ver)); + $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver)); if ($ver >= $min_ver) { $s2i{"$st[0]\0$st[1]"} //= $i; } else { -- cgit v1.2.3-24-ge0c7 From fb4dd7fdeeed8478cda9b7e63e56564da8cbdacf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:12 +0000 Subject: index: do not attach inbox to extindex unless updated We'll count the number of log changes (regardless of index or unindex) and only attach inboxes to ExtSearchIdx objects when they get new work. We'll also reduce lock bouncing and only update external indices after all per-inbox indexing is done. This also updates existing v2 indexing/unindexing callers to be more consistent and ensures unindex log entries update per-inbox last commit information. --- lib/PublicInbox/Admin.pm | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/PublicInbox/Admin.pm') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 9a86d206..b468108e 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -271,6 +271,7 @@ EOM $idx = PublicInbox::SearchIdx->new($ibx, 1); } $idx->index_sync($opt); + $idx->{nidx} // 0; # returns number processed } sub progress_prepare ($) { -- cgit v1.2.3-24-ge0c7 From 0b018bebe7d8ee807ab07b570cf33669da4875b0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 01:44:36 +0000 Subject: default to CORE::warn in $SIG{__WARN__} handlers As with CORE::die and $SIG{__DIE__}, it turns out CORE::warn is safe to use inside $SIG{__WARN__} handlers without triggering infinite recursion. So fall back to reusing CORE::warn instead of creating a new sub. --- lib/PublicInbox/Admin.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox/Admin.pm') diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index b468108e..d414e4e2 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -241,7 +241,7 @@ sub index_inbox { } local %SIG = %SIG; setup_signals(\&index_terminate, $ibx); - my $warn_cb = $SIG{__WARN__} // sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} // \&CORE::warn; my $idx = { current_info => $ibx->{inboxdir} }; my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore'); local $SIG{__WARN__} = sub { -- cgit v1.2.3-24-ge0c7