diff options
55 files changed, 851 insertions, 416 deletions
diff --git a/Documentation/mknews.perl b/Documentation/mknews.perl index d87c2609..a11dd5f0 100755 --- a/Documentation/mknews.perl +++ b/Documentation/mknews.perl @@ -119,10 +119,10 @@ sub html_start { } sub html_end { - print $out <<EOF or die; - git clone $PublicInbox::WwwStream::CODE_URL -</pre></body></html> -EOF + for (@$PublicInbox::WwwStream::CODE_URL) { + print $out " git clone $_\n" or die; + } + print $out "</pre></body></html>\n" or die; } sub atom_start { diff --git a/Documentation/public-inbox-index.pod b/Documentation/public-inbox-index.pod index 0848e860..2d5df930 100644 --- a/Documentation/public-inbox-index.pod +++ b/Documentation/public-inbox-index.pod @@ -162,6 +162,23 @@ See L<public-inbox-init(1)/--skip-docdata> for description and caveats. Available in public-inbox 1.6.0+. +=item --update-extindex=EXTINDEX, -E + +Update the given external index (L<public-inbox-extindex-format(5)>. +Either the configured section name (e.g. C<all>) or a directory name +may be specified. + +Defaults to C<all> if C<[extindex "all"]> is configured, +otherwise no external indices are updated. + +May be specified multiple times in rare cases where multiple +external indices are configured. + +=item --no-update-extindex + +Do not update the C<all> external index by default. This negates +all uses of C<-E> / C<--update-extindex=> on the command-line. + =back =head1 FILES @@ -297,4 +314,4 @@ License: AGPL-3.0+ L<https://www.gnu.org/licenses/agpl-3.0.txt> =head1 SEE ALSO -L<Search::Xapian>, L<DBD::SQLite> +L<Search::Xapian>, L<DBD::SQLite>, L<public-inbox-extindex-format(5)> diff --git a/Documentation/public-inbox-v1-format.pod b/Documentation/public-inbox-v1-format.pod index e5b1dd06..da19d2c9 100644 --- a/Documentation/public-inbox-v1-format.pod +++ b/Documentation/public-inbox-v1-format.pod @@ -2,7 +2,7 @@ =head1 NAME -public-inbox v1 git repository and tree description (aka "ssoma") +public-inbox-v1-format - git repository and tree description (aka "ssoma") =head1 DESCRIPTION diff --git a/Documentation/public-inbox-v2-format.pod b/Documentation/public-inbox-v2-format.pod index d6282cb4..3c89f13e 100644 --- a/Documentation/public-inbox-v2-format.pod +++ b/Documentation/public-inbox-v2-format.pod @@ -2,7 +2,7 @@ =head1 NAME -public-inbox v2 format description +public-inbox-v2-format - structure of public inbox v2 archives =head1 DESCRIPTION @@ -408,6 +408,7 @@ t/x-unknown-alpine.eml t/xcpdb-reshard.t xt/cmp-msgstr.t xt/cmp-msgview.t +xt/create-many-inboxes.t xt/eml_check_limits.t xt/git-http-backend.t xt/git_async_cmp.t @@ -94,6 +94,7 @@ AGPL source code is available via git: git clone https://public-inbox.org/public-inbox.git git clone https://repo.or.cz/public-inbox.git + torsocks git clone http://ou63pmih66umazou.onion/public-inbox.git torsocks git clone http://hjrcffqmbrq6wope.onion/public-inbox See below for contact info. diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm index 3977d812..d414e4e2 100644 --- a/lib/PublicInbox/Admin.pm +++ b/lib/PublicInbox/Admin.pm @@ -6,15 +6,15 @@ package PublicInbox::Admin; use strict; use parent qw(Exporter); -use Cwd qw(abs_path); -use POSIX (); our @EXPORT_OK = qw(setup_signals); use PublicInbox::Config; use PublicInbox::Inbox; use PublicInbox::Spawn qw(popen_rd); +*rel2abs_collapsed = \&PublicInbox::Config::rel2abs_collapsed; sub setup_signals { my ($cb, $arg) = @_; # optional + require POSIX; # we call exit() here instead of _exit() so DESTROY methods # get called (e.g. File::Temp::Dir and PublicInbox::Msgmap) @@ -29,19 +29,32 @@ sub setup_signals { sub resolve_inboxdir { my ($cd, $ver) = @_; - my $prefix = defined $cd ? $cd : './'; - if (-d $prefix && -f "$prefix/inbox.lock") { # v2 - $$ver = 2 if $ver; - return abs_path($prefix); + my $try = $cd // '.'; + my $root_dev_ino; + while (1) { # favor v2, first + if (-f "$try/inbox.lock") { + $$ver = 2 if $ver; + return rel2abs_collapsed($try); + } elsif (-d $try) { + my @try = stat _; + $root_dev_ino //= do { + my @root = stat('/') or die "stat /: $!\n"; + "$root[0]\0$root[1]"; + }; + last if "$try[0]\0$try[1]" eq $root_dev_ino; + $try .= '/..'; # continue, cd up + } else { + die "`$try' is not a directory\n"; + } } + # try v1 bare git dirs my $cmd = [ qw(git rev-parse --git-dir) ]; my $fh = popen_rd($cmd, undef, {-C => $cd}); my $dir = do { local $/; <$fh> }; - close $fh or die "error in ".join(' ', @$cmd)." (cwd:$cd): $!\n"; + close $fh or die "error in @$cmd (cwd:${\($cd // '.')}): $!\n"; chomp $dir; $$ver = 1 if $ver; - return abs_path($cd) if ($dir eq '.' && defined $cd); - abs_path($dir); + rel2abs_collapsed($dir eq '.' ? ($cd // $dir) : $dir); } # for unconfigured inboxes @@ -78,8 +91,8 @@ sub unconfigured_ibx ($$) { name => $name, address => [ "$name\@example.com" ], inboxdir => $dir, - # TODO: consumers may want to warn on this: - #-unconfigured => 1, + # consumers (-convert) warn on this: + -unconfigured => 1, }); } @@ -95,41 +108,53 @@ sub resolve_inboxes ($;$$) { } my $min_ver = $opt->{-min_inbox_version} || 0; + # lookup inboxes by st_dev + st_ino instead of {inboxdir} pathnames, + # pathnames are not unique due to symlinks and bind mounts my (@old, @ibxs); - my %dir2ibx; - my $all = $opt->{all} ? [] : undef; - if ($cfg) { + if ($opt->{all}) { $cfg->each_inbox(sub { my ($ibx) = @_; - my $path = abs_path($ibx->{inboxdir}); - if (defined($path)) { - $dir2ibx{$path} = $ibx; - push @$all, $ibx if $all; + if (-e $ibx->{inboxdir}) { + push(@ibxs, $ibx) if $ibx->version >= $min_ver; } else { - warn <<EOF; -W: $ibx->{name} $ibx->{inboxdir}: $! -EOF + warn "W: $ibx->{name} $ibx->{inboxdir}: $!\n"; } }); - } - if ($all) { - @$all = grep { $_->version >= $min_ver } @$all; - @ibxs = @$all; } else { # directories specified on the command-line - my $i = 0; my @dirs = @$argv; push @dirs, '.' if !@dirs && $opt->{-use_cwd}; - foreach (@dirs) { - my $v; - my $dir = resolve_inboxdir($_, \$v); - if ($v < $min_ver) { + my %s2i; # "st_dev\0st_ino" => array index + for (my $i = 0; $i <= $#dirs; $i++) { + my $dir = $dirs[$i]; + my @st = stat($dir) or die "stat($dir): $!\n"; + $dir = $dirs[$i] = resolve_inboxdir($dir, \(my $ver)); + if ($ver >= $min_ver) { + $s2i{"$st[0]\0$st[1]"} //= $i; + } else { push @old, $dir; - next; } - my $ibx = $dir2ibx{$dir} ||= unconfigured_ibx($dir, $i); - $i++; - push @ibxs, $ibx; } + my $done = \'done'; + eval { + $cfg->each_inbox(sub { + my ($ibx) = @_; + return if $ibx->version < $min_ver; + my $dir = $ibx->{inboxdir}; + if (my @s = stat $dir) { + my $i = delete($s2i{"$s[0]\0$s[1]"}) + // return; + $ibxs[$i] = $ibx; + die $done if !keys(%s2i); + } else { + warn "W: $ibx->{name} $dir: $!\n"; + } + }); + }; + die $@ if $@ && $@ ne $done; + for my $i (sort { $a <=> $b } values %s2i) { + $ibxs[$i] = unconfigured_ibx($dirs[$i], $i); + } + @ibxs = grep { defined } @ibxs; # duplicates are undef } if (@old) { die "-V$min_ver inboxes not supported by $0\n\t", @@ -216,7 +241,7 @@ sub index_inbox { } local %SIG = %SIG; setup_signals(\&index_terminate, $ibx); - my $warn_cb = $SIG{__WARN__} // sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} // \&CORE::warn; my $idx = { current_info => $ibx->{inboxdir} }; my $warn_ignore = PublicInbox::InboxWritable->can('warn_ignore'); local $SIG{__WARN__} = sub { @@ -246,6 +271,7 @@ EOM $idx = PublicInbox::SearchIdx->new($ibx, 1); } $idx->index_sync($opt); + $idx->{nidx} // 0; # returns number processed } sub progress_prepare ($) { diff --git a/lib/PublicInbox/Config.pm b/lib/PublicInbox/Config.pm index cafd9c3b..21f2161a 100644 --- a/lib/PublicInbox/Config.pm +++ b/lib/PublicInbox/Config.pm @@ -132,20 +132,16 @@ sub default_file { sub config_fh_parse ($$$) { my ($fh, $rs, $fs) = @_; - my %rv; - my (%section_seen, @section_order); + my (%rv, %seen, @section_order, $line, $k, $v, $section, $cur, $i); local $/ = $rs; - while (defined(my $line = <$fh>)) { - chomp $line; - my ($k, $v) = split($fs, $line, 2); - my ($section) = ($k =~ /\A(\S+)\.[^\.]+\z/); - unless (defined $section_seen{$section}) { - $section_seen{$section} = 1; - push @section_order, $section; - } - - my $cur = $rv{$k}; - if (defined $cur) { + while (defined($line = <$fh>)) { # perf critical with giant configs + $i = index($line, $fs); + $k = substr($line, 0, $i); + $v = substr($line, $i + 1, -1); # chop off $fs + $section = substr($k, 0, rindex($k, '.')); + $seen{$section} //= push(@section_order, $section); + + if (defined($cur = $rv{$k})) { if (ref($cur) eq "ARRAY") { push @$cur, $v; } else { @@ -163,11 +159,10 @@ sub config_fh_parse ($$$) { sub git_config_dump { my ($file) = @_; return {} unless -e $file; - my @cmd = (qw/git config -z -l --includes/, "--file=$file"); - my $cmd = join(' ', @cmd); - my $fh = popen_rd(\@cmd); + my $cmd = [ qw(git config -z -l --includes), "--file=$file" ]; + my $fh = popen_rd($cmd); my $rv = config_fh_parse($fh, "\0", "\n"); - close $fh or die "failed to close ($cmd) pipe: $?"; + close $fh or die "failed to close (@$cmd) pipe: $?"; $rv; } @@ -369,6 +364,16 @@ sub git_bool { } } +# abs_path resolves symlinks, so we want to avoid it if rel2abs +# is sufficient and doesn't leave "/.." or "/../" +sub rel2abs_collapsed { + require File::Spec; + my $p = File::Spec->rel2abs($_[-1]); + return $p if substr($p, -3, 3) ne '/..' && index($p, '/../') < 0; + require Cwd; + Cwd::abs_path($p); +} + sub _fill { my ($self, $pfx) = @_; my $ibx = {}; @@ -391,10 +396,10 @@ EOF } } - # backwards compatibility: - $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"}; - if (($ibx->{inboxdir} // '') =~ /\n/s) { - warn "E: `$ibx->{inboxdir}' must not contain `\\n'\n"; + # "mainrepo" is backwards compatibility: + my $dir = $ibx->{inboxdir} //= $self->{"$pfx.mainrepo"} // return; + if (index($dir, "\n") >= 0) { + warn "E: `$dir' must not contain `\\n'\n"; return; } foreach my $k (qw(obfuscate)) { @@ -415,10 +420,7 @@ EOF } } - return unless defined($ibx->{inboxdir}); - my $name = $pfx; - $name =~ s/\Apublicinbox\.//; - + my $name = substr($pfx, length('publicinbox.')); if (!valid_inbox_name($name)) { warn "invalid inbox name: '$name'\n"; return; @@ -438,7 +440,7 @@ EOF $self->{-by_list_id}->{lc($list_id)} = $ibx; } } - if (my $ngname = $ibx->{newsgroup}) { + if (defined(my $ngname = $ibx->{newsgroup})) { if (ref($ngname)) { delete $ibx->{newsgroup}; warn 'multiple newsgroups not supported: '. @@ -447,7 +449,8 @@ EOF # wildmat-exact and RFC 3501 (IMAP) ATOM-CHAR. # Leave out a few chars likely to cause problems or conflicts: # '|', '<', '>', ';', '#', '$', '&', - } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]!) { + } elsif ($ngname =~ m![^A-Za-z0-9/_\.\-\~\@\+\=:]! || + $ngname eq '') { delete $ibx->{newsgroup}; warn "newsgroup name invalid: `$ngname'\n"; } else { @@ -456,6 +459,13 @@ EOF $self->{-by_newsgroup}->{$ngname} = $ibx; } } + unless (defined $ibx->{newsgroup}) { # for ->eidx_key + my $abs = rel2abs_collapsed($dir); + if ($abs ne $dir) { + warn "W: `$dir' canonicalized to `$abs'\n"; + $ibx->{inboxdir} = $abs; + } + } $self->{-by_name}->{$name} = $ibx; if ($ibx->{obfuscate}) { $ibx->{-no_obfuscate} = $self->{-no_obfuscate}; diff --git a/lib/PublicInbox/DS.pm b/lib/PublicInbox/DS.pm index a02b3bb7..97a6f6ef 100644 --- a/lib/PublicInbox/DS.pm +++ b/lib/PublicInbox/DS.pm @@ -50,7 +50,6 @@ our ( $PostLoopCallback, # subref to call at the end of each loop, if defined (global) $LoopTimeout, # timeout of event loop in milliseconds - $DoneInit, # if we've done the one-time module init yet @Timers, # timers $in_loop, ); @@ -75,12 +74,9 @@ sub Reset { @Timers = (); $PostLoopCallback = undef; - $DoneInit = 0; $_io = undef; # closes real $Epoll FD $Epoll = undef; # may call DSKQXS::DESTROY - - *EventLoop = *FirstTimeEventLoop; } =head2 C<< CLASS->SetLoopTimeout( $timeout ) >> @@ -91,9 +87,7 @@ A timeout of 0 (zero) means poll forever. A timeout of -1 means poll and return immediately. =cut -sub SetLoopTimeout { - return $LoopTimeout = $_[1] + 0; -} +sub SetLoopTimeout { $LoopTimeout = $_[1] + 0 } =head2 C<< PublicInbox::DS::add_timer( $seconds, $coderef, $arg) >> @@ -137,14 +131,13 @@ sub set_cloexec ($) { fcntl($_io, F_SETFD, $fl | FD_CLOEXEC); } +# caller sets return value to $Epoll sub _InitPoller { - return if $DoneInit; - $DoneInit = 1; - if (PublicInbox::Syscall::epoll_defined()) { - $Epoll = epoll_create(); - set_cloexec($Epoll) if (defined($Epoll) && $Epoll >= 0); + my $fd = epoll_create(); + set_cloexec($fd) if (defined($fd) && $fd >= 0); + $fd; } else { my $cls; for (qw(DSKQXS DSPoll)) { @@ -152,9 +145,8 @@ sub _InitPoller last if eval "require $cls"; } $cls->import(qw(epoll_ctl epoll_wait)); - $Epoll = $cls->new; + $cls->new; } - *EventLoop = *EpollEventLoop; } =head2 C<< CLASS->EventLoop() >> @@ -163,13 +155,6 @@ Start processing IO events. In most daemon programs this never exits. See C<PostLoopCallback> below for how to exit the loop. =cut -sub FirstTimeEventLoop { - my $class = shift; - - _InitPoller(); - - EventLoop($class); -} sub now () { clock_gettime(CLOCK_MONOTONIC) } @@ -213,12 +198,7 @@ sub RunTimers { my $timeout = int(($Timers[0][0] - $now) * 1000) + 1; # -1 is an infinite timeout, so prefer a real timeout - return $timeout if $LoopTimeout == -1; - - # otherwise pick the lower of our regular timeout and time until - # the next timer - return $LoopTimeout if $LoopTimeout < $timeout; - return $timeout; + ($LoopTimeout < 0 || $LoopTimeout >= $timeout) ? $timeout : $LoopTimeout; } # We can't use waitpid(-1) safely here since it can hit ``, system(), @@ -271,21 +251,21 @@ sub PostEventLoop () { $PostLoopCallback ? $PostLoopCallback->(\%DescriptorMap) : 1; } -sub EpollEventLoop { +sub EventLoop { + $Epoll //= _InitPoller(); local $in_loop = 1; + my @events; do { - my @events; - my $i; my $timeout = RunTimers(); # get up to 1000 events - my $evcount = epoll_wait($Epoll, 1000, $timeout, \@events); - for ($i=0; $i<$evcount; $i++) { + epoll_wait($Epoll, 1000, $timeout, \@events); + for my $fd (@events) { # it's possible epoll_wait returned many events, including some at the end # that ones in the front triggered unregister-interest actions. if we # can't find the %sock entry, it's because we're no longer interested # in that event. - $DescriptorMap{$events[$i]->[0]}->event_step; + $DescriptorMap{$fd}->event_step; } } while (PostEventLoop()); _run_later(); @@ -330,8 +310,7 @@ sub new { $self->{sock} = $sock; my $fd = fileno($sock); - _InitPoller(); - + $Epoll //= _InitPoller(); retry: if (epoll_ctl($Epoll, EPOLL_CTL_ADD, $fd, $ev)) { if ($! == EINVAL && ($ev & EPOLLEXCLUSIVE)) { diff --git a/lib/PublicInbox/DSKQXS.pm b/lib/PublicInbox/DSKQXS.pm index d1d3fe60..aa2c9168 100644 --- a/lib/PublicInbox/DSKQXS.pm +++ b/lib/PublicInbox/DSKQXS.pm @@ -134,7 +134,7 @@ sub epoll_wait { } } # caller only cares for $events[$i]->[0] - scalar(@$events); + $_ = $_->[0] for @$events; } # kqueue is close-on-fork (not exec), so we must not close it diff --git a/lib/PublicInbox/DSPoll.pm b/lib/PublicInbox/DSPoll.pm index 1d9b51d9..a218f695 100644 --- a/lib/PublicInbox/DSPoll.pm +++ b/lib/PublicInbox/DSPoll.pm @@ -45,14 +45,13 @@ sub epoll_wait { my $fd = $pset[$i++]; my $revents = $pset[$i++] or next; delete($self->{$fd}) if $self->{$fd} & EPOLLONESHOT; - push @$events, [ $fd ]; + push @$events, $fd; } my $nevents = scalar @$events; if ($n != $nevents) { warn "BUG? poll() returned $n, but got $nevents"; } } - $n; } 1; diff --git a/lib/PublicInbox/Daemon.pm b/lib/PublicInbox/Daemon.pm index 6b92b60d..bdf1dc45 100644 --- a/lib/PublicInbox/Daemon.pm +++ b/lib/PublicInbox/Daemon.pm @@ -13,7 +13,6 @@ use IO::Socket; use POSIX qw(WNOHANG :signal_h); use Socket qw(IPPROTO_TCP SOL_SOCKET); sub SO_ACCEPTFILTER () { 0x1000 } -use Cwd qw/abs_path/; STDOUT->autoflush(1); STDERR->autoflush(1); use PublicInbox::DS qw(now); @@ -204,10 +203,11 @@ sub check_absolute ($$) { sub daemonize () { if ($daemonize) { + require Cwd; foreach my $i (0..$#ARGV) { my $arg = $ARGV[$i]; next unless -e $arg; - $ARGV[$i] = abs_path($arg); + $ARGV[$i] = Cwd::abs_path($arg); } check_absolute('stdout', $stdout); check_absolute('stderr', $stderr); @@ -369,14 +369,12 @@ sub inherit ($) { foreach my $fd (3..$end) { my $s = IO::Handle->new_from_fd($fd, 'r'); if (my $k = sockname($s)) { - if ($s->blocking) { - $s->blocking(0); - warn <<""; + my $prev_was_blocking = $s->blocking(0); + warn <<"" if $prev_was_blocking; Inherited socket (fd=$fd) is blocking, making it non-blocking. Set 'NonBlocking = true' in the systemd.service unit to avoid stalled processes when multiple service instances start. - } $listener_names->{$k} = $s; push @rv, $s; } else { @@ -423,11 +421,8 @@ sub upgrade { # $_[0] = signal name or number (unused) } sub kill_workers ($) { - my ($s) = @_; - - while (my ($pid, $id) = each %pids) { - kill $s, $pid; - } + my ($sig) = @_; + kill $sig, keys(%pids); } sub upgrade_aborted ($) { diff --git a/lib/PublicInbox/Eml.pm b/lib/PublicInbox/Eml.pm index 571edc5c..4d3fffc0 100644 --- a/lib/PublicInbox/Eml.pm +++ b/lib/PublicInbox/Eml.pm @@ -378,7 +378,9 @@ sub header_str_set { header_set($self, $name, @vals); } -sub mhdr_decode ($) { eval { $MIME_Header->decode($_[0]) } // $_[0] } +sub mhdr_decode ($) { + eval { $MIME_Header->decode($_[0], Encode::FB_DEFAULT) } // $_[0]; +} sub filename { my $dis = header_raw($_[0], 'Content-Disposition'); diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm index 6a173f67..4df885ab 100644 --- a/lib/PublicInbox/ExtMsg.pm +++ b/lib/PublicInbox/ExtMsg.pm @@ -33,7 +33,7 @@ sub search_partial ($$) { my ($ibx, $mid) = @_; return if length($mid) < $MIN_PARTIAL_LEN; my $srch = $ibx->search or return; # NOT ->isrch, we already try ->ALL - my $opt = { limit => PARTIAL_MAX, mset => 2 }; + my $opt = { limit => PARTIAL_MAX, relevance => -1 }; my @try = ("m:$mid*"); my $chop = $mid; if ($chop =~ s/(\W+)(\w*)\z//) { diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm index 7ce950bc..2bcdece6 100644 --- a/lib/PublicInbox/ExtSearch.pm +++ b/lib/PublicInbox/ExtSearch.pm @@ -9,7 +9,6 @@ use strict; use v5.10.1; use PublicInbox::Over; use PublicInbox::Inbox; -use File::Spec (); use PublicInbox::MiscSearch; use DBI qw(:sql_types); # SQL_BLOB @@ -18,7 +17,6 @@ use parent qw(PublicInbox::Search); sub new { my ($class, $topdir) = @_; - $topdir = File::Spec->canonpath($topdir); bless { topdir => $topdir, # xpfx => 'ei15' @@ -31,8 +29,6 @@ sub misc { $self->{misc} //= PublicInbox::MiscSearch->new("$self->{xpfx}/misc"); } -sub search { $_[0] } # self - # same as per-inbox ->over, for now... sub over { my ($self) = @_; @@ -122,6 +118,6 @@ no warnings 'once'; *recent = \&PublicInbox::Inbox::recent; *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef -*isrch = *search; +*isrch = *search = \&PublicInbox::Search::reopen; 1; diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 56896056..a2d70205 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -30,13 +30,11 @@ use PublicInbox::V2Writable; use PublicInbox::InboxWritable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; -use File::Spec; use PublicInbox::DS qw(now); use DBI qw(:sql_types); # SQL_BLOB sub new { my (undef, $dir, $opt) = @_; - $dir = File::Spec->canonpath($dir); my $l = $opt->{indexlevel} // 'full'; $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and die "invalid indexlevel=$l\n"; @@ -56,28 +54,14 @@ sub new { }, __PACKAGE__; $self->{shards} = $self->count_shards || nproc_shards($opt->{creat}); my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3"); - $oidx->{-no_fsync} = 1 if $opt->{-no_fsync}; + $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync}; $self->{oidx} = $oidx; $self } sub attach_inbox { my ($self, $ibx) = @_; - my $key = $ibx->eidx_key; - if (!$ibx->over || !$ibx->mm) { - warn "W: skipping $key (unindexed)\n"; - return; - } - if (!defined($ibx->uidvalidity)) { - warn "W: skipping $key (no UIDVALIDITY)\n"; - return; - } - my $ibxdir = File::Spec->canonpath($ibx->{inboxdir}); - if ($ibxdir ne $ibx->{inboxdir}) { - warn "W: `$ibx->{inboxdir}' canonicalized to `$ibxdir'\n"; - $ibx->{inboxdir} = $ibxdir; - } - $self->{ibx_map}->{$key} //= do { + $self->{ibx_map}->{$ibx->eidx_key} //= do { push @{$self->{ibx_list}}, $ibx; $ibx; } @@ -281,29 +265,36 @@ sub last_commits { $heads; } +sub _ibx_index_reject ($) { + my ($ibx) = @_; + $ibx->mm // return 'unindexed, no msgmap.sqlite3'; + $ibx->uidvalidity // return 'no UIDVALIDITY'; + $ibx->over // return 'unindexed, no over.sqlite3'; + undef; +} + sub _sync_inbox ($$$) { my ($self, $sync, $ibx) = @_; + my $ekey = $ibx->eidx_key; + if (defined(my $err = _ibx_index_reject($ibx))) { + return "W: skipping $ekey ($err)"; + } $sync->{ibx} = $ibx; $sync->{nr} = \(my $nr = 0); my $v = $ibx->version; - my $ekey = $ibx->eidx_key; if ($v == 2) { $sync->{epoch_max} = $ibx->max_git_epoch // return; sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable } elsif ($v == 1) { my $uv = $ibx->uidvalidity; my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv"); - my $head = $ibx->mm->last_commit; - unless (defined $head) { - warn "E: $ibx->{inboxdir} is not indexed\n"; - return; - } + my $head = $ibx->mm->last_commit // + return "E: $ibx->{inboxdir} is not indexed"; my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head); my $unit = { stack => $stk, git => $ibx->git }; push @{$sync->{todo}}, $unit; } else { - warn "E: $ekey unsupported inbox version (v$v)\n"; - return; + return "E: $ekey unsupported inbox version (v$v)"; } for my $unit (@{delete($sync->{todo}) // []}) { last if $sync->{quit}; @@ -311,6 +302,7 @@ sub _sync_inbox ($$$) { } $self->{midx}->index_ibx($ibx) unless $sync->{quit}; $ibx->git->cleanup; # done with this inbox, now + undef; } sub gc_unref_doc ($$$$) { @@ -401,6 +393,32 @@ sub _ibx_for ($$$) { $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped" } +sub _fd_constrained ($) { + my ($self) = @_; + $self->{-fd_constrained} //= do { + my $soft; + if (eval { require BSD::Resource; 1 }) { + my $NOFILE = BSD::Resource::RLIMIT_NOFILE(); + ($soft, undef) = BSD::Resource::getrlimit($NOFILE); + } else { + chomp($soft = `sh -c 'ulimit -n'`); + } + if (defined($soft)) { + my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate + my $ret = $want > $soft; + if ($ret) { + warn <<EOF; +RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early +EOF + } + $ret; + } else { + warn "Unable to determine RLIMIT_NOFILE: $@\n"; + 1; + } + }; +} + sub _reindex_finalize ($$$) { my ($req, $smsg, $eml) = @_; my $sync = $req->{sync}; @@ -437,11 +455,16 @@ sub _reindex_finalize ($$$) { my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty"; $x->{num} = delete($x->{xnum}) // die '{xnum} unset'; $ibx = _ibx_for($self, $sync, $x); - my $e = $ibx->over->get_art($x->{num}); - $e->{blob} eq $x->{blob} or die <<EOF; + if (my $over = $ibx->over) { + my $e = $over->get_art($x->{num}); + $e->{blob} eq $x->{blob} or die <<EOF; $x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num}); EOF - push @todo, $ibx, $e; + push @todo, $ibx, $e; + $over->dbh_close if _fd_constrained($self); + } else { + die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n"; + } } undef $by_chash; while (my ($ibx, $e) = splice(@todo, 0, 2)) { @@ -607,7 +630,7 @@ sub eidxq_process ($$) { # for reindexing my $dbh = $self->{oidx}->dbh; my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return; ${$sync->{nr}} = 0; - $sync->{-regen_fmt} = "%u/$tot\n"; + local $sync->{-regen_fmt} = "%u/$tot\n"; my $pr = $sync->{-opt}->{-progress}; if ($pr) { my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq'); @@ -686,7 +709,8 @@ sub _reindex_check_unseen ($$$) { my $msgs; my $pr = $sync->{-opt}->{-progress}; my $ekey = $ibx->eidx_key; - $sync->{-regen_fmt} = "$ekey checking unseen %u/".$ibx->over->max."\n"; + local $sync->{-regen_fmt} = + "$ekey checking unseen %u/".$ibx->over->max."\n"; ${$sync->{nr}} = 0; while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) { @@ -729,7 +753,7 @@ sub _reindex_check_stale ($$$) { my $pr = $sync->{-opt}->{-progress}; my $fetching; my $ekey = $ibx->eidx_key; - $sync->{-regen_fmt} = + local $sync->{-regen_fmt} = "$ekey check stale/missing %u/".$ibx->over->max."\n"; ${$sync->{nr}} = 0; do { @@ -787,9 +811,14 @@ DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ? sub _reindex_inbox ($$$) { my ($self, $sync, $ibx) = @_; - local $self->{current_info} = $ibx->eidx_key; - _reindex_check_unseen($self, $sync, $ibx); - _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit}; + my $ekey = $ibx->eidx_key; + local $self->{current_info} = $ekey; + if (defined(my $err = _ibx_index_reject($ibx))) { + warn "W: cannot reindex $ekey ($err)\n"; + } else { + _reindex_check_unseen($self, $sync, $ibx); + _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit}; + } delete @$ibx{qw(over mm search git)}; # won't need these for a bit } @@ -810,10 +839,17 @@ sub eidx_reindex { eidxq_process($self, $sync) unless $sync->{quit}; } +sub sync_inbox { + my ($self, $sync, $ibx) = @_; + my $err = _sync_inbox($self, $sync, $ibx); + delete @$ibx{qw(mm over)}; + warn $err, "\n" if defined($err); +} + sub eidx_sync { # main entry point my ($self, $opt) = @_; - my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} || \&CORE::warn; local $self->{current_info} = ''; local $SIG{__WARN__} = sub { $warn_cb->($self->{current_info}, ': ', @_); @@ -840,20 +876,23 @@ sub eidx_sync { # main entry point $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key); } if (delete($opt->{reindex})) { - $sync->{checkpoint_unlocks} = 1; + local $sync->{checkpoint_unlocks} = 1; eidx_reindex($self, $sync); } # don't use $_ here, it'll get clobbered by reindex_checkpoint - for my $ibx (@{$self->{ibx_list}}) { - last if $sync->{quit}; - _sync_inbox($self, $sync, $ibx); + if ($opt->{scan} // 1) { + for my $ibx (@{$self->{ibx_list}}) { + last if $sync->{quit}; + sync_inbox($self, $sync, $ibx); + } } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; eidxq_process($self, $sync) unless $sync->{quit}; eidxq_release($self); - PublicInbox::V2Writable::done($self); + done($self); + $sync; # for eidx_watch } sub update_last_commit { # overrides V2Writable @@ -963,16 +1002,125 @@ sub idx_init { # similar to V2Writable PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o); } $self->parallel_init($self->{indexlevel}); - $self->umask_prepare; $self->with_umask(\&_idx_init, $self, $opt); $self->{oidx}->begin_lazy; $self->{oidx}->eidx_prep; $self->{midx}->begin_txn; } +sub _watch_commit { # PublicInbox::DS::add_timer callback + my ($self) = @_; + delete $self->{-commit_timer}; + eidxq_process($self, $self->{-watch_sync}); + eidxq_release($self); + delete local $self->{-watch_sync}->{-regen_fmt}; + reindex_checkpoint($self, $self->{-watch_sync}); + + # call event_step => done unless commit_timer is armed + PublicInbox::DS::requeue($self); +} + +sub on_inbox_unlock { # called by PublicInbox::InboxIdle + my ($self, $ibx) = @_; + my $opt = $self->{-watch_sync}->{-opt}; + my $pr = $opt->{-progress}; + my $ekey = $ibx->eidx_key; + local $0 = "sync $ekey"; + $pr->("indexing $ekey\n") if $pr; + $self->idx_init($opt); + sync_inbox($self, $self->{-watch_sync}, $ibx); + $self->{-commit_timer} //= PublicInbox::DS::add_timer( + $opt->{'commit-interval'} // 10, + \&_watch_commit, $self); +} + +sub eidx_reload { # -extindex --watch SIGHUP handler + my ($self, $idler) = @_; + if ($self->{cfg}) { + my $pr = $self->{-watch_sync}->{-opt}->{-progress}; + $pr->('reloading ...') if $pr; + delete $self->{-resync_queue}; + @{$self->{ibx_list}} = (); + %{$self->{ibx_map}} = (); + delete $self->{-watch_sync}->{id2pos}; + my $cfg = PublicInbox::Config->new; + attach_config($self, $cfg); + $idler->refresh($cfg); + $pr->(" done\n") if $pr; + } else { + warn "reload not supported without --all\n"; + } +} + +sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler + my ($self) = @_; + $self->{-resync_queue} //= [ @{$self->{ibx_list}} ]; + PublicInbox::DS::requeue($self); # trigger our ->event_step +} + +sub event_step { # PublicInbox::DS::requeue callback + my ($self) = @_; + if (my $resync_queue = $self->{-resync_queue}) { + if (my $ibx = shift(@$resync_queue)) { + on_inbox_unlock($self, $ibx); + PublicInbox::DS::requeue($self); + } else { + delete $self->{-resync_queue}; + _watch_commit($self); + } + } else { + done($self) unless $self->{-commit_timer}; + } +} + +sub eidx_watch { # public-inbox-extindex --watch main loop + my ($self, $opt) = @_; + local %SIG = %SIG; + for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) { + $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" }; + } + require PublicInbox::InboxIdle; + require PublicInbox::DS; + require PublicInbox::Syscall; + require PublicInbox::Sigfd; + my $idler = PublicInbox::InboxIdle->new($self->{cfg}); + if (!$self->{cfg}) { + $idler->watch_inbox($_) for @{$self->{ibx_list}}; + } + $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}}; + my $pr = $opt->{-progress}; + $pr->("performing initial scan ...\n") if $pr; + my $sync = eidx_sync($self, $opt); # initial sync + return if $sync->{quit}; + my $oldset = PublicInbox::Sigfd::block_signals(); + local $self->{current_info} = ''; + my $cb = $SIG{__WARN__} || \&CORE::warn; + local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) }; + my $sig = { + HUP => sub { eidx_reload($self, $idler) }, + USR1 => sub { eidx_resync_start($self) }, + TSTP => sub { kill('STOP', $$) }, + }; + my $quit = PublicInbox::SearchIdx::quit_cb($sync); + $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit; + my $sigfd = PublicInbox::Sigfd->new($sig, + $PublicInbox::Syscall::SFD_NONBLOCK); + %SIG = (%SIG, %$sig) if !$sigfd; + local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock + if (!$sigfd) { + # wake up every second to accept signals if we don't + # have signalfd or IO::KQueue: + PublicInbox::Sigfd::sig_setmask($oldset); + PublicInbox::DS->SetLoopTimeout(1000); + } + PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} }); + $pr->("initial scan complete, entering event loop\n") if $pr; + PublicInbox::DS->EventLoop; # calls InboxIdle->event_step + done($self); +} + no warnings 'once'; *done = \&PublicInbox::V2Writable::done; -*umask_prepare = \&PublicInbox::InboxWritable::umask_prepare; *with_umask = \&PublicInbox::InboxWritable::with_umask; *parallel_init = \&PublicInbox::V2Writable::parallel_init; *nproc_shards = \&PublicInbox::V2Writable::nproc_shards; diff --git a/lib/PublicInbox/Gcf2.pm b/lib/PublicInbox/Gcf2.pm index 041dffe7..fe6afef2 100644 --- a/lib/PublicInbox/Gcf2.pm +++ b/lib/PublicInbox/Gcf2.pm @@ -35,7 +35,7 @@ BEGIN { if (open(my $fh, '<', $f)) { chomp($l, $c); local $/; - $c_src = <$fh>; + defined($c_src = <$fh>) or die "read $f: $!\n"; $CFG{LIBS} = $l; $CFG{CCFLAGSEX} = $c; last; diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm index 08406925..73dc7d3e 100644 --- a/lib/PublicInbox/Git.pm +++ b/lib/PublicInbox/Git.pm @@ -362,10 +362,8 @@ sub popen { sub qx { my ($self, @cmd) = @_; my $fh = $self->popen(@cmd); - local $/ = "\n"; - return <$fh> if wantarray; - local $/; - <$fh> + local $/ = wantarray ? "\n" : undef; + <$fh>; } # check_async and cat_async may trigger the other, so ensure they're diff --git a/lib/PublicInbox/IMAP.pm b/lib/PublicInbox/IMAP.pm index a3a10bde..2af5ab0c 100644 --- a/lib/PublicInbox/IMAP.pm +++ b/lib/PublicInbox/IMAP.pm @@ -1136,7 +1136,7 @@ sub search_common { my $srch = $self->{ibx}->isrch or return "$tag BAD search not available for mailbox\r\n"; my $opt = { - mset => 2, + relevance => -1, limit => UID_SLICE, uid_range => $range_info }; diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm index b7be4c46..079afc5f 100644 --- a/lib/PublicInbox/Import.pm +++ b/lib/PublicInbox/Import.pm @@ -48,7 +48,7 @@ sub gfi_start { return ($self->{in}, $self->{out}) if $self->{pid}; - my (@ret, $out_r, $out_w); + my ($in_r, $pid, $out_r, $out_w); pipe($out_r, $out_w) or die "pipe failed: $!"; $self->lock_acquire; @@ -56,27 +56,28 @@ sub gfi_start { my ($git, $ref) = @$self{qw(git ref)}; local $/ = "\n"; chomp($self->{tip} = $git->qx(qw(rev-parse --revs-only), $ref)); + die "fatal: rev-parse --revs-only $ref: \$?=$?" if $?; if ($self->{path_type} ne '2/38' && $self->{tip}) { local $/ = "\0"; my @t = $git->qx(qw(ls-tree -r -z --name-only), $ref); + die "fatal: ls-tree -r -z --name-only $ref: \$?=$?" if $?; chomp @t; $self->{-tree} = { map { $_ => 1 } @t }; } my @cmd = ('git', "--git-dir=$git->{git_dir}", qw(fast-import --quiet --done --date-format=raw)); - my ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); + ($in_r, $pid) = popen_rd(\@cmd, undef, { 0 => $out_r }); $out_w->autoflush(1); $self->{in} = $in_r; $self->{out} = $out_w; $self->{pid} = $pid; $self->{nchg} = 0; - @ret = ($in_r, $out_w); }; if ($@) { $self->lock_release; die $@; } - @ret; + ($in_r, $out_w); } sub wfail () { die "write to fast-import failed: $!" } diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm index 8a3a0194..af6380a7 100644 --- a/lib/PublicInbox/Inbox.pm +++ b/lib/PublicInbox/Inbox.pm @@ -109,10 +109,6 @@ sub new { delete $opts->{feedmax}; } $opts->{nntpserver} ||= $pi_cfg->{'publicinbox.nntpserver'}; - my $dir = $opts->{inboxdir}; - if (defined $dir && -f "$dir/inbox.lock") { - $opts->{version} = 2; - } # allow any combination of multi-line or comma-delimited hide entries my $hide = {}; @@ -125,7 +121,9 @@ sub new { bless $opts, $class; } -sub version { $_[0]->{version} // 1 } +sub version { + $_[0]->{version} //= -f "$_[0]->{inboxdir}/inbox.lock" ? 2 : 1 +} sub git_epoch { my ($self, $epoch) = @_; # v2-only, callers always supply $epoch @@ -134,7 +132,7 @@ sub git_epoch { return unless -d $git_dir; my $g = PublicInbox::Git->new($git_dir); $g->{-httpbackend_limiter} = $self->{-httpbackend_limiter}; - # no cleanup needed, we never cat-file off this, only clone + # caller must manually cleanup when done $g; }; } @@ -212,12 +210,9 @@ sub over { sub try_cat { my ($path) = @_; - my $rv = ''; - if (open(my $fh, '<', $path)) { - local $/; - $rv = <$fh>; - } - $rv; + open(my $fh, '<', $path) or return ''; + local $/; + <$fh> // ''; } sub cat_desc ($) { @@ -416,8 +411,8 @@ sub on_unlock { my ($self) = @_; check_inodes($self); my $subs = $self->{unlock_subs} or return; - for (values %$subs) { - eval { $_->on_inbox_unlock($self) }; + for my $obj (values %$subs) { + eval { $obj->on_inbox_unlock($self) }; warn "E: $@ ($self->{inboxdir})\n" if $@; } } diff --git a/lib/PublicInbox/InboxIdle.pm b/lib/PublicInbox/InboxIdle.pm index 2737bbbd..35aed696 100644 --- a/lib/PublicInbox/InboxIdle.pm +++ b/lib/PublicInbox/InboxIdle.pm @@ -7,7 +7,6 @@ package PublicInbox::InboxIdle; use strict; use parent qw(PublicInbox::DS); -use Cwd qw(abs_path); use PublicInbox::Syscall qw(EPOLLIN EPOLLET); my $IN_MODIFY = 0x02; # match Linux inotify my $ino_cls; @@ -22,28 +21,38 @@ require PublicInbox::In2Tie if $ino_cls; sub in2_arm ($$) { # PublicInbox::Config::each_inbox callback my ($ibx, $self) = @_; - my $dir = abs_path($ibx->{inboxdir}); - if (!defined($dir)) { - warn "W: $ibx->{inboxdir} not watched: $!\n"; - return; - } + my $dir = $ibx->{inboxdir}; my $inot = $self->{inot}; my $cur = $self->{pathmap}->{$dir} //= []; + my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock'); # transfer old subscriptions to the current inbox, cancel the old watch - if (my $old_ibx = $cur->[0]) { + my $old_ibx = $cur->[0]; + $cur->[0] = $ibx; + if ($old_ibx) { $ibx->{unlock_subs} and die "BUG: $dir->{unlock_subs} should not exist"; $ibx->{unlock_subs} = $old_ibx->{unlock_subs}; + + # Linux::Inotify2::Watch::name matches if watches are the + # same, no point in replacing a watch of the same name + if ($cur->[1]->name eq $lock) { + $self->{on_unlock}->{$lock} = $ibx; + return; + } + # rare, name changed (v1 inbox converted to v2) $cur->[1]->cancel; # Linux::Inotify2::Watch::cancel } - $cur->[0] = $ibx; - my $lock = "$dir/".($ibx->version >= 2 ? 'inbox.lock' : 'ssoma.lock'); if (my $w = $cur->[1] = $inot->watch($lock, $IN_MODIFY)) { $self->{on_unlock}->{$w->name} = $ibx; } else { warn "E: ".ref($inot)."->watch($lock, IN_MODIFY) failed: $!\n"; + if ($!{ENOSPC} && $^O eq 'linux') { + warn <<""; +I: consider increasing /proc/sys/fs/inotify/max_user_watches + + } } # TODO: detect deleted packs (and possibly other files) @@ -54,6 +63,9 @@ sub refresh { $pi_cfg->each_inbox(\&in2_arm, $self); } +# internal API for ease-of-use +sub watch_inbox { in2_arm($_[1], $_[0]) }; + sub new { my ($class, $pi_cfg) = @_; my $self = bless {}, $class; @@ -69,7 +81,7 @@ sub new { $self->{inot} = $inot; $self->{pathmap} = {}; # inboxdir => [ ibx, watch1, watch2, watch3...] $self->{on_unlock} = {}; # lock path => ibx - refresh($self, $pi_cfg); + refresh($self, $pi_cfg) if $pi_cfg; PublicInbox::FakeInotify::poll_once($self) if !$ino_cls; $self; } @@ -80,7 +92,8 @@ sub event_step { my @events = $self->{inot}->read; # Linux::Inotify2::read my $on_unlock = $self->{on_unlock}; for my $ev (@events) { - if (my $ibx = $on_unlock->{$ev->fullname}) { + my $fn = $ev->fullname // next; # cancelled + if (my $ibx = $on_unlock->{$fn}) { $ibx->on_unlock; } } diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm index c0e88f3d..b1d5caf5 100644 --- a/lib/PublicInbox/InboxWritable.pm +++ b/lib/PublicInbox/InboxWritable.pm @@ -46,12 +46,13 @@ sub _init_v1 { require PublicInbox::Msgmap; my $sidx = PublicInbox::SearchIdx->new($self, 1); # just create $sidx->begin_txn_lazy; + my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1); if (defined $skip_artnum) { - my $mm = PublicInbox::Msgmap->new($self->{inboxdir}, 1); $mm->{dbh}->begin_work; $mm->skip_artnum($skip_artnum); $mm->{dbh}->commit; } + undef $mm; # ->created_at set $sidx->commit_txn_lazy; } else { open my $fh, '>>', "$self->{inboxdir}/ssoma.lock" or @@ -64,7 +65,6 @@ sub init_inbox { if ($self->version == 1) { my $dir = assert_usable_dir($self); PublicInbox::Import::init_bare($dir); - $self->umask_prepare; $self->with_umask(\&_init_v1, $self, $skip_artnum); } else { my $v2w = importer($self); @@ -259,7 +259,7 @@ sub _umask_for { sub with_umask { my ($self, $cb, @arg) = @_; - my $old = umask $self->{umask}; + my $old = umask($self->{umask} //= umask_prepare($self)); my $rv = eval { $cb->(@arg) }; my $err = $@; umask $old; @@ -270,8 +270,7 @@ sub with_umask { sub umask_prepare { my ($self) = @_; my $perm = _git_config_perm($self); - my $umask = _umask_for($perm); - $self->{umask} = $umask; + _umask_for($perm); } sub cleanup ($) { @@ -293,7 +292,7 @@ sub warn_ignore { # this expects to be RHS in this assignment: "local $SIG{__WARN__} = ..." sub warn_ignore_cb { - my $cb = $SIG{__WARN__} // sub { print STDERR @_ }; + my $cb = $SIG{__WARN__} // \&CORE::warn; sub { return if warn_ignore(@_); $cb->(@_); diff --git a/lib/PublicInbox/Isearch.pm b/lib/PublicInbox/Isearch.pm index 8a1f257a..7ca2f9e4 100644 --- a/lib/PublicInbox/Isearch.pm +++ b/lib/PublicInbox/Isearch.pm @@ -61,7 +61,7 @@ sub mset_to_artnums { my $docids = PublicInbox::Search::mset_to_artnums($self->{es}, $mset); my $ibx_id = $self->{-ibx_id} //= _ibx_id($self); my $qmarks = join(',', map { '?' } @$docids); - if ($opt && ($opt->{mset} // 0) == 2) { # opt->{mset} = 2 was used + if ($opt && ($opt->{relevance} // 0) == -1) { # -1 => ENQ_ASCENDING my $range = ''; my @r; if (my $r = $opt->{uid_range}) { @@ -89,7 +89,7 @@ SELECT docid,xnum FROM xref3 WHERE ibx_id = ? AND docid IN ($qmarks) } if (scalar keys %order) { warn "W: $self->{es}->{topdir} #", - join(', #', sort keys %order), + join(', ', sort { $a <=> $b } keys %order), " not mapped to `$self->{eidx_key}'\n"; warn "W: $self->{es}->{topdir} may need to be reindexed\n"; @xnums = grep { defined } @xnums; @@ -113,7 +113,7 @@ sub mset_to_smsg { } if (scalar keys %order) { warn "W: $ibx->{inboxdir} #", - join(', #', sort keys %order), + join(', ', sort { $a <=> $b } keys %order), " no longer valid\n"; warn "W: $self->{es}->{topdir} may need to be reindexed\n"; } diff --git a/lib/PublicInbox/ManifestJsGz.pm b/lib/PublicInbox/ManifestJsGz.pm index 33df020a..37ee63d0 100644 --- a/lib/PublicInbox/ManifestJsGz.pm +++ b/lib/PublicInbox/ManifestJsGz.pm @@ -99,4 +99,11 @@ sub psgi_triple { 'Content-Length', bytes::length($out) ], [ $out ] ] } +sub per_inbox { + my ($ctx) = @_; + # only one inbox, slow is probably OK + slow_manifest_add($ctx, $ctx->{ibx}); + psgi_triple($ctx); +} + 1; diff --git a/lib/PublicInbox/Mbox.pm b/lib/PublicInbox/Mbox.pm index c8e4b406..83fa7d8a 100644 --- a/lib/PublicInbox/Mbox.pm +++ b/lib/PublicInbox/Mbox.pm @@ -235,7 +235,7 @@ sub mbox_all { my $over = $ctx->{ibx}->over or return PublicInbox::WWW::need($ctx, 'Overview'); - my $qopts = $ctx->{qopts} = { mset => 2 }; # order by docid + my $qopts = $ctx->{qopts} = { relevance => -1 }; # ORDER BY docid ASC $qopts->{thread} = 1 if $q->{t}; my $mset = $srch->mset($q_string, $qopts); $qopts->{offset} = $mset->size or diff --git a/lib/PublicInbox/MiscIdx.pm b/lib/PublicInbox/MiscIdx.pm index 64591d05..a04dd1c5 100644 --- a/lib/PublicInbox/MiscIdx.pm +++ b/lib/PublicInbox/MiscIdx.pm @@ -21,6 +21,7 @@ use Carp qw(croak); use File::Path (); use PublicInbox::MiscSearch; use PublicInbox::Config; +my $json; sub new { my ($class, $eidx) = @_; @@ -30,6 +31,7 @@ sub new { nodatacow_dir($mi_dir); my $flags = $PublicInbox::SearchIdx::DB_CREATE_OR_OPEN; $flags |= $PublicInbox::SearchIdx::DB_NO_SYNC if $eidx->{-no_fsync}; + $json //= PublicInbox::Config::json(); bless { mi_dir => $mi_dir, flags => $flags, @@ -91,17 +93,27 @@ EOF $xdb->delete_document($_) for @drop; # just in case my $doc = $PublicInbox::Search::X{Document}->new; + term_generator($self)->set_document($doc); - # allow sorting by modified + # allow sorting by modified and uidvalidity (created at) add_val($doc, $PublicInbox::MiscSearch::MODIFIED, $ibx->modified); + add_val($doc, $PublicInbox::MiscSearch::UIDVALIDITY, $ibx->uidvalidity); - $doc->add_boolean_term('Q'.$eidx_key); - $doc->add_boolean_term('T'.'inbox'); - term_generator($self)->set_document($doc); + $doc->add_boolean_term('Q'.$eidx_key); # uniQue id + $doc->add_boolean_term('T'.'inbox'); # Type + + if (defined($ibx->{newsgroup}) && $ibx->nntp_usable) { + $doc->add_boolean_term('T'.'newsgroup'); # additional Type + } + + # force reread from disk, {description} could be loaded from {misc} + delete $ibx->{description}; + my $desc = $ibx->description; # description = S/Subject (or title) # address = A/Author - index_text($self, $ibx->description, 1, 'S'); + index_text($self, $desc, 1, 'S'); + index_text($self, $ibx->{name}, 1, 'XNAME'); my %map = ( address => 'A', listid => 'XLISTID', @@ -113,10 +125,8 @@ EOF index_text($self, $v, 1, $pfx); } } - index_text($self, $ibx->{name}, 1, 'XNAME'); my $data = {}; if (defined(my $max = $ibx->max_git_epoch)) { # v2 - my $desc = $ibx->description; my $pfx = "/$ibx->{name}/git/"; for my $epoch (0..$max) { my $git = $ibx->git_epoch($epoch) or return; @@ -130,7 +140,7 @@ EOF $ent->{git_dir} = $ibx->{inboxdir}; $data->{"/$ibx->{name}"} = $ent; } - $doc->set_data(PublicInbox::Config::json()->encode($data)); + $doc->set_data($json->encode($data)); if (defined $docid) { $xdb->replace_document($docid, $doc); } else { diff --git a/lib/PublicInbox/MiscSearch.pm b/lib/PublicInbox/MiscSearch.pm index f2e31443..6683d564 100644 --- a/lib/PublicInbox/MiscSearch.pm +++ b/lib/PublicInbox/MiscSearch.pm @@ -5,10 +5,12 @@ package PublicInbox::MiscSearch; use strict; use v5.10.1; -use PublicInbox::Search qw(retry_reopen); +use PublicInbox::Search qw(retry_reopen int_val); +my $json; # Xapian value columns: our $MODIFIED = 0; +our $UIDVALIDITY = 1; # (created time) # avoid conflicting with message Search::prob_prefix for UI/UX reasons my %PROB_PREFIX = ( @@ -23,6 +25,8 @@ my %PROB_PREFIX = ( sub new { my ($class, $dir) = @_; + PublicInbox::Search::load_xapian(); + $json //= PublicInbox::Config::json(); bless { xdb => $PublicInbox::Search::X{Database}->new($dir) }, $class; @@ -69,6 +73,7 @@ sub misc_enquire_once { # retry_reopen callback sub mset { my ($self, $qs, $opt) = @_; $opt ||= {}; + reopen($self); my $qp = $self->{qp} //= mi_qp_new($self); $qs = 'type:inbox' if $qs eq ''; my $qr = $qp->parse_query($qs, $PublicInbox::Search::QP_FLAGS); @@ -119,11 +124,13 @@ sub newsgroup_matches { sub ibx_data_once { my ($self, $ibx) = @_; my $xdb = $self->{xdb}; - my $eidx_key = $ibx->eidx_key; # may be {inboxdir}, so private - my $head = $xdb->postlist_begin('Q'.$eidx_key); - my $tail = $xdb->postlist_end('Q'.$eidx_key); + my $term = 'Q'.$ibx->eidx_key; # may be {inboxdir}, so private + my $head = $xdb->postlist_begin($term); + my $tail = $xdb->postlist_end($term); if ($head != $tail) { my $doc = $xdb->get_document($head->get_docid); + $ibx->{uidvalidity} //= int_val($doc, $UIDVALIDITY); + $ibx->{-modified} = int_val($doc, $MODIFIED); $doc->get_data; } else { undef; @@ -135,4 +142,50 @@ sub inbox_data { retry_reopen($self, \&ibx_data_once, $ibx); } +sub ibx_cache_load { + my ($doc, $cache) = @_; + my $end = $doc->termlist_end; + my $cur = $doc->termlist_begin; + $cur->skip_to('Q'); + return if $cur == $end; + my $eidx_key = $cur->get_termname; + $eidx_key =~ s/\AQ// or return; # expired + my $ce = $cache->{$eidx_key} = {}; + $ce->{uidvalidity} = int_val($doc, $UIDVALIDITY); + $ce->{-modified} = int_val($doc, $MODIFIED); + $ce->{description} = do { + # extract description from manifest.js.gz epoch description + my $d; + my $data = $json->decode($doc->get_data); + for (values %$data) { + $d = $_->{description} // next; + $d =~ s/ \[epoch [0-9]+\]\z// or next; + last; + } + $d; + } +} + +sub _nntpd_cache_load { # retry_reopen callback + my ($self) = @_; + my $opt = { limit => $self->{xdb}->get_doccount * 10, relevance => -1 }; + my $mset = mset($self, 'type:newsgroup type:inbox', $opt); + my $cache = {}; + for my $it ($mset->items) { + ibx_cache_load($it->get_document, $cache); + } + $cache +} + +# returns { newsgroup => $cache_entry } mapping, $cache_entry contains +# anything which may trigger seeks at startup, currently: description, +# -modified, and uidvalidity. +sub nntpd_cache_load { + my ($self) = @_; + retry_reopen($self, \&_nntpd_cache_load); +} + +no warnings 'once'; +*reopen = \&PublicInbox::Search::reopen; + 1; diff --git a/lib/PublicInbox/NNTPD.pm b/lib/PublicInbox/NNTPD.pm index 7f9a1d58..6907a03c 100644 --- a/lib/PublicInbox/NNTPD.pm +++ b/lib/PublicInbox/NNTPD.pm @@ -36,10 +36,12 @@ sub refresh_groups { my ($self, $sig) = @_; my $pi_cfg = $sig ? PublicInbox::Config->new : $self->{pi_cfg}; my $groups = $pi_cfg->{-by_newsgroup}; # filled during each_inbox + my $cache = eval { $pi_cfg->ALL->misc->nntpd_cache_load } // {}; $pi_cfg->each_inbox(sub { my ($ibx) = @_; my $ngname = $ibx->{newsgroup} // return; - if ($ibx->nntp_usable) { + my $ce = $cache->{$ngname}; + if (($ce and (%$ibx = (%$ibx, %$ce))) || $ibx->nntp_usable) { # only valid if msgmap and over works # preload to avoid fragmentation: $ibx->description; diff --git a/lib/PublicInbox/OverIdx.pm b/lib/PublicInbox/OverIdx.pm index c8630ddb..bc2e3ef4 100644 --- a/lib/PublicInbox/OverIdx.pm +++ b/lib/PublicInbox/OverIdx.pm @@ -473,10 +473,14 @@ sub dbh_close { sub create { my ($self) = @_; - unless (-r $self->{filename}) { + my $fn = $self->{filename} // do { + Carp::confess('BUG: no {filename}') unless $self->{dbh}; + return; + }; + unless (-r $fn) { require File::Path; require File::Basename; - File::Path::mkpath(File::Basename::dirname($self->{filename})); + File::Path::mkpath(File::Basename::dirname($fn)); } # create the DB: PublicInbox::Over::dbh($self); diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm index bca2036c..58653c9e 100644 --- a/lib/PublicInbox/Search.pm +++ b/lib/PublicInbox/Search.pm @@ -6,7 +6,7 @@ package PublicInbox::Search; use strict; use parent qw(Exporter); -our @EXPORT_OK = qw(retry_reopen); +our @EXPORT_OK = qw(retry_reopen int_val); use List::Util qw(max); # values for searching, changing the numeric value breaks @@ -58,7 +58,11 @@ our $QP_FLAGS; our %X = map { $_ => 0 } qw(BoolWeight Database Enquire QueryParser Stem Query); our $Xap; # 'Search::Xapian' or 'Xapian' our $NVRP; # '$Xap::'.('NumberValueRangeProcessor' or 'NumberRangeProcessor') -our $ENQ_ASCENDING; + +# ENQ_DESCENDING and ENQ_ASCENDING weren't in SWIG Xapian.pm prior to 1.4.16, +# let's hope the ABI is stable +our $ENQ_DESCENDING = 0; +our $ENQ_ASCENDING = 1; sub load_xapian () { return 1 if defined $Xap; @@ -84,13 +88,8 @@ sub load_xapian () { 'NumberRangeProcessor' : 'NumberValueRangeProcessor'); $X{$_} = $Xap.'::'.$_ for (keys %X); - # ENQ_ASCENDING doesn't seem exported by SWIG Xapian.pm, - # so lets hope this part of the ABI is stable because it's - # just an integer: - $ENQ_ASCENDING = $x eq 'Xapian' ? - 1 : Search::Xapian::ENQ_ASCENDING(); - *sortable_serialise = $x.'::sortable_serialise'; + *sortable_unserialise = $x.'::sortable_unserialise'; # n.b. FLAG_PURE_NOT is expensive not suitable for a public # website as it could become a denial-of-service vector # FLAG_PHRASE also seems to cause performance problems chert @@ -266,7 +265,6 @@ sub mset { $opts ||= {}; my $qp = $self->{qp} //= qparse_new($self); my $query = $qp->parse_query($query_string, $self->{qp_flags}); - $opts->{relevance} = 1 unless exists $opts->{relevance}; _do_enquire($self, $query, $opts); } @@ -324,13 +322,17 @@ sub _enquire_once { # retry_reopen callback $enquire->set_query($query); $opts ||= {}; my $desc = !$opts->{asc}; - if (($opts->{mset} || 0) == 2) { # mset == 2: ORDER BY docid/UID + my $rel = $opts->{relevance} // 0; + if ($rel == -1) { # ORDER BY docid/UID + $enquire->set_weighting_scheme($X{BoolWeight}->new); $enquire->set_docid_order($ENQ_ASCENDING); + } elsif ($rel == 0) { + $enquire->set_sort_by_value_then_relevance(TS, $desc); + } elsif ($rel == -2) { $enquire->set_weighting_scheme($X{BoolWeight}->new); - } elsif ($opts->{relevance}) { + $enquire->set_docid_order($ENQ_DESCENDING); + } else { # rel > 0 $enquire->set_sort_by_relevance_then_value(TS, $desc); - } else { - $enquire->set_sort_by_value_then_relevance(TS, $desc); } # `mairix -t / --threads' or JMAP collapseThreads @@ -416,4 +418,10 @@ sub help { \@ret; } +sub int_val ($$) { + my ($doc, $col) = @_; + my $val = $doc->get_value($col) or return; # undefined is '' in Xapian + sortable_unserialise($val) + 0; # PV => IV conversion +} + 1; diff --git a/lib/PublicInbox/SearchIdx.pm b/lib/PublicInbox/SearchIdx.pm index 7e2843e9..95f4234c 100644 --- a/lib/PublicInbox/SearchIdx.pm +++ b/lib/PublicInbox/SearchIdx.pm @@ -64,7 +64,6 @@ sub new { $self->{-set_skip_docdata_once} = 1; $self->{-skip_docdata} = 1; } - $ibx->umask_prepare; if ($version == 1) { $self->{lock_path} = "$inboxdir/ssoma.lock"; my $dir = $self->xdir; @@ -103,7 +102,6 @@ sub load_xapian_writable () { } eval 'require '.$X->{WritableDatabase} or die; *sortable_serialise = $xap.'::sortable_serialise'; - *sortable_unserialise = $xap.'::sortable_unserialise'; $DB_CREATE_OR_OPEN = eval($xap.'::DB_CREATE_OR_OPEN()'); $DB_OPEN = eval($xap.'::DB_OPEN()'); my $ver = (eval($xap.'::major_version()') << 16) | @@ -539,17 +537,12 @@ sub remove_keywords { $self->{xdb}->replace_document($docid, $doc) if $replace; } -sub get_val ($$) { - my ($doc, $col) = @_; - sortable_unserialise($doc->get_value($col)); -} - sub smsg_from_doc ($) { my ($doc) = @_; my $data = $doc->get_data or return; my $smsg = bless {}, 'PublicInbox::Smsg'; - $smsg->{ts} = get_val($doc, PublicInbox::Search::TS()); - my $dt = get_val($doc, PublicInbox::Search::DT()); + $smsg->{ts} = int_val($doc, PublicInbox::Search::TS()); + my $dt = int_val($doc, PublicInbox::Search::DT()); my ($yyyy, $mon, $dd, $hh, $mm, $ss) = unpack('A4A2A2A2A2A2', $dt); $smsg->{ds} = timegm($ss, $mm, $hh, $dd, $mon - 1, $yyyy); $smsg->load_from_data($data); @@ -660,6 +653,7 @@ sub index_both { # git->cat_async callback $smsg->{num} = index_mm($self, $eml, $oid, $sync) or die "E: could not generate NNTP article number for $oid"; add_message($self, $eml, $smsg, $sync); + ++$self->{nidx}; my $cur_cmt = $sync->{cur_cmt} // die 'BUG: {cur_cmt} missing'; ${$sync->{latest_cmt}} = $cur_cmt; } @@ -674,6 +668,7 @@ sub unindex_both { # git->cat_async callback if (defined(my $cur_cmt = $sync->{cur_cmt})) { ${$sync->{latest_cmt}} = $cur_cmt; } + ++$self->{nidx}; } sub with_umask { diff --git a/lib/PublicInbox/Syscall.pm b/lib/PublicInbox/Syscall.pm index e4f00a2a..c403f78a 100644 --- a/lib/PublicInbox/Syscall.pm +++ b/lib/PublicInbox/Syscall.pm @@ -227,38 +227,46 @@ sub epoll_ctl_mod8 { our $epoll_wait_events; our $epoll_wait_size = 0; sub epoll_wait_mod4 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 12 x $epoll_wait_size; - } - my $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - for (0..$ct-1) { - @{$_[3]->[$_]}[1,0] = unpack("LL", substr($epoll_wait_events, 12*$_, 8)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 12 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec); + for (0..$ct - 1) { + # 12-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 8 bytes: epoll_data_t union (first 4 bytes are the fd) + # So we skip the first 4 bytes and take the middle 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 12 * $_ + 4, 4)); + } } sub epoll_wait_mod8 { - # resize our static buffer if requested size is bigger than we've ever done - if ($_[1] > $epoll_wait_size) { - $epoll_wait_size = $_[1]; - $epoll_wait_events = "\0" x 16 x $epoll_wait_size; - } - my $ct; - if ($no_deprecated) { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0, undef); - } else { - $ct = syscall($SYS_epoll_wait, $_[0]+0, $epoll_wait_events, $_[1]+0, $_[2]+0); - } - for (0..$ct-1) { - # 16 byte epoll_event structs, with format: - # 4 byte mask [idx 1] - # 4 byte padding (we put it into idx 2, useless) - # 8 byte data (first 4 bytes are fd, into idx 0) - @{$_[3]->[$_]}[1,2,0] = unpack("LLL", substr($epoll_wait_events, 16*$_, 12)); - } - return $ct; + my ($epfd, $maxevents, $timeout_msec, $events) = @_; + + # resize our static buffer if maxevents bigger than we've ever done + if ($maxevents > $epoll_wait_size) { + $epoll_wait_size = $maxevents; + vec($epoll_wait_events, $maxevents * 16 * 8 - 1, 1) = 0; + } + @$events = (); + my $ct = syscall($SYS_epoll_wait, $epfd, $epoll_wait_events, + $maxevents, $timeout_msec, + $no_deprecated ? undef : ()); + for (0..$ct - 1) { + # 16-byte struct epoll_event + # 4 bytes uint32_t events mask (skipped, useless to us) + # 4 bytes padding (skipped, useless) + # 8 bytes epoll_data_t union (first 4 bytes are the fd) + # So skip the first 8 bytes, take 4, and ignore the last 4: + $events->[$_] = unpack('L', substr($epoll_wait_events, + 16 * $_ + 8, 4)); + } } sub signalfd ($$$) { diff --git a/lib/PublicInbox/Unsubscribe.pm b/lib/PublicInbox/Unsubscribe.pm index b0d3220c..ae0b0679 100644 --- a/lib/PublicInbox/Unsubscribe.pm +++ b/lib/PublicInbox/Unsubscribe.pm @@ -12,7 +12,8 @@ use warnings; use Crypt::CBC; use Plack::Util; use MIME::Base64 qw(decode_base64url); -my $CODE_URL = 'https://public-inbox.org/public-inbox.git'; +my @CODE_URL = qw(http://ou63pmih66umazou.onion/public-inbox.git + https://public-inbox.org/public-inbox.git); my @CT_HTML = ('Content-Type', 'text/html; charset=UTF-8'); sub new { @@ -38,13 +39,15 @@ sub new { my $unsubscribe = $opt{unsubscribe} or die "`unsubscribe' callback not given\n"; + my $code_url = $opt{code_url} || \@CODE_URL; + $code_url = [ $code_url ] if ref($code_url) ne 'ARRAY'; bless { pi_cfg => $opt{pi_config}, # PublicInbox::Config owner_email => $opt{owner_email}, cipher => $cipher, unsubscribe => $unsubscribe, contact => qq(<a\nhref="mailto:$e">$e</a>), - code_url => $opt{code_url} || $CODE_URL, + code_url => $code_url, confirm => $opt{confirm}, }, $class; } @@ -138,7 +141,7 @@ sub r { "<html><head><title>$title</title></head><body><pre>". join("\n", "<b>$title</b>\n", @body) . '</pre><hr>'. "<pre>This page is available under AGPL-3.0+\n" . - "git clone $self->{code_url}\n" . + join('', map { "git clone $_\n" } @{$self->{code_url}}) . qq(Email $self->{contact} if you have any questions). '</pre></body></html>' ] ]; diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 7d41b0f6..edb8ba57 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -91,8 +91,6 @@ sub new { die "$dir does not exist\n"; } } - $v2ibx->umask_prepare; - my $xpfx = "$dir/xap" . PublicInbox::Search::SCHEMA_VERSION; my $self = { ibx => $v2ibx, @@ -314,7 +312,6 @@ sub idx_init { $ibx->git->cleanup; parallel_init($self, $ibx->{indexlevel}); - $ibx->umask_prepare; $ibx->with_umask(\&_idx_init, $self, $opt); } @@ -876,7 +873,7 @@ sub reindex_checkpoint ($$) { $self->done; # release lock } - if (my $pr = $sync->{-opt}->{-progress}) { + if (my $pr = $sync->{-regen_fmt} ? $sync->{-opt}->{-progress} : undef) { $pr->(sprintf($sync->{-regen_fmt}, ${$sync->{nr}})); } @@ -888,12 +885,22 @@ sub reindex_checkpoint ($$) { $mm_tmp->atfork_parent if $mm_tmp; } +sub index_finalize ($$) { + my ($arg, $index) = @_; + ++$arg->{self}->{nidx}; + if (defined(my $cur = $arg->{cur_cmt})) { + ${$arg->{latest_cmt}} = $cur; + } elsif ($index) { + die 'BUG: {cur_cmt} missing'; + } # else { unindexing @leftovers doesn't set {cur_cmt} +} + sub index_oid { # cat_async callback my ($bref, $oid, $type, $size, $arg) = @_; - return if is_bad_blob($oid, $type, $size, $arg->{oid}); + is_bad_blob($oid, $type, $size, $arg->{oid}) and + return index_finalize($arg, 1); # size == 0 purged returns here my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; - return if $size == 0; # purged my ($num, $mid0); my $eml = PublicInbox::Eml->new($$bref); my $mids = mids($eml); @@ -964,7 +971,7 @@ sub index_oid { # cat_async callback if (do_idx($self, $bref, $eml, $smsg)) { ${$arg->{need_checkpoint}} = 1; } - ${$arg->{latest_cmt}} = $arg->{cur_cmt} // die 'BUG: {cur_cmt} missing'; + index_finalize($arg, 1); } # only update last_commit for $i on reindex iff newer than current @@ -1091,12 +1098,14 @@ sub sync_prepare ($$) { -d $git_dir or next; # missing epochs are fine my $git = PublicInbox::Git->new($git_dir); my $unit = { git => $git, epoch => $i }; + my $tip; if ($reindex_heads) { - $head = $reindex_heads->[$i] or next; + $tip = $head = $reindex_heads->[$i] or next; + } else { + $tip = $git->qx(qw(rev-parse -q --verify), $head); + next if $?; # new repo + chomp $tip; } - chomp(my $tip = $git->qx(qw(rev-parse -q --verify), $head)); - next if $?; # new repo - my $range = log_range($sync, $unit, $tip) or next; # can't use 'rev-list --count' if we use --diff-filter $pr->("$pfx $i.git counting $range ... ") if $pr; @@ -1154,11 +1163,12 @@ sub unindex_oid_aux ($$$) { } sub unindex_oid ($$;$) { # git->cat_async callback - my ($bref, $oid, $type, $size, $sync) = @_; - return if is_bad_blob($oid, $type, $size, $sync->{oid}); - my $self = $sync->{self}; + my ($bref, $oid, $type, $size, $arg) = @_; + is_bad_blob($oid, $type, $size, $arg->{oid}) and + return index_finalize($arg, 0); + my $self = $arg->{self}; local $self->{current_info} = "$self->{current_info} $oid"; - my $unindexed = $sync->{in_unindex} ? $sync->{unindexed} : undef; + my $unindexed = $arg->{in_unindex} ? $arg->{unindexed} : undef; my $mm = $self->{mm}; my $mids = mids(PublicInbox::Eml->new($bref)); undef $$bref; @@ -1183,6 +1193,7 @@ sub unindex_oid ($$;$) { # git->cat_async callback } unindex_oid_aux($self, $oid, $mid); } + index_finalize($arg, 0); } sub git { $_[0]->{ibx}->git } @@ -1348,11 +1359,19 @@ sub index_sync { $opt //= {}; return xapian_only($self, $opt) if $opt->{xapian_only}; - my $pr = $opt->{-progress}; my $epoch_max; - my $latest = $self->{ibx}->git_dir_latest(\$epoch_max); - return unless defined $latest; + my $latest = $self->{ibx}->git_dir_latest(\$epoch_max) // return; + if ($opt->{'fast-noop'}) { # nanosecond (st_ctim) comparison + use Time::HiRes qw(stat); + if (my @mm = stat("$self->{ibx}->{inboxdir}/msgmap.sqlite3")) { + my $c = $mm[10]; # 10 = ctime (nsec NV) + my @hd = stat("$latest/refs/heads"); + my @pr = stat("$latest/packed-refs"); + return if $c > ($hd[10] // 0) && $c > ($pr[10] // 0); + } + } + my $pr = $opt->{-progress}; my $seq = $opt->{sequential_shard}; my $art_beg; # the NNTP article number we start xapian_only at my $idxlevel = $self->{ibx}->{indexlevel}; diff --git a/lib/PublicInbox/WWW.pm b/lib/PublicInbox/WWW.pm index a33d25ab..52630ae3 100644 --- a/lib/PublicInbox/WWW.pm +++ b/lib/PublicInbox/WWW.pm @@ -505,7 +505,7 @@ sub get_inbox_manifest ($$$) { my $r404 = invalid_inbox($ctx, $inbox); return $r404 if $r404; require PublicInbox::ManifestJsGz; - PublicInbox::ManifestJsGz->response($ctx); + PublicInbox::ManifestJsGz::per_inbox($ctx); } sub get_attach { diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm index e1246096..bc296e01 100644 --- a/lib/PublicInbox/Watch.pm +++ b/lib/PublicInbox/Watch.pm @@ -217,7 +217,7 @@ sub _try_path { warn "unmappable dir: $1\n"; return; } - my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} || \&CORE::warn; local $SIG{__WARN__} = sub { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $warn_cb->($pfx, "path: $path\n", @_); @@ -467,7 +467,7 @@ sub imap_fetch_all ($$$) { my $key = $req; $key =~ s/\.PEEK//; my ($uids, $batch); - my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} || \&CORE::warn; local $SIG{__WARN__} = sub { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; $batch //= '?'; @@ -929,7 +929,7 @@ sub nntp_fetch_all ($$$) { $beg = $l_art + 1; warn "I: $url fetching ARTICLE $beg..$end\n"; - my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} || \&CORE::warn; my ($err, $art); local $SIG{__WARN__} = sub { my $pfx = ($_[0] // '') =~ /^([A-Z]: )/g ? $1 : ''; diff --git a/lib/PublicInbox/WwwListing.pm b/lib/PublicInbox/WwwListing.pm index fce0e530..4b3f1674 100644 --- a/lib/PublicInbox/WwwListing.pm +++ b/lib/PublicInbox/WwwListing.pm @@ -69,6 +69,9 @@ sub hide_key { 'www' } sub response { my ($class, $ctx) = @_; bless $ctx, $class; + if (my $ALL = $ctx->{www}->{pi_cfg}->ALL) { + $ALL->misc->reopen; + } my $re = $ctx->url_regexp or return $ctx->psgi_triple; my $iter = PublicInbox::ConfigIter->new($ctx->{www}->{pi_cfg}, \&list_match_i, $re, $ctx); diff --git a/lib/PublicInbox/WwwStream.pm b/lib/PublicInbox/WwwStream.pm index 66e34a12..958251a3 100644 --- a/lib/PublicInbox/WwwStream.pm +++ b/lib/PublicInbox/WwwStream.pm @@ -12,7 +12,8 @@ our @EXPORT_OK = qw(html_oneshot); use bytes (); # length use PublicInbox::Hval qw(ascii_html prurl ts2str); our $TOR_URL = 'https://www.torproject.org/'; -our $CODE_URL = 'https://public-inbox.org/public-inbox.git'; +our $CODE_URL = [ qw(http://ou63pmih66umazou.onion/public-inbox.git + https://public-inbox.org/public-inbox.git) ]; sub base_url ($) { my $ctx = shift; @@ -78,22 +79,24 @@ sub html_top ($) { sub coderepos ($) { my ($ctx) = @_; - my $ibx = $ctx->{ibx}; + my $cr = $ctx->{ibx}->{coderepo} // return (); + my $cfg = $ctx->{www}->{pi_cfg}; + my $upfx = ($ctx->{-upfx} // ''). '../'; my @ret; - if (defined(my $cr = $ibx->{coderepo})) { - my $cfg = $ctx->{www}->{pi_cfg}; - my $env = $ctx->{env}; - for my $cr_name (@$cr) { - my $urls = $cfg->{"coderepo.$cr_name.cgiturl"}; - if ($urls) { - $ret[0] //= <<EOF; + for my $cr_name (@$cr) { + my $urls = $cfg->{"coderepo.$cr_name.cgiturl"} // next; + $ret[0] //= <<EOF; code repositories for the project(s) associated with this inbox: EOF - $ret[0] .= "\n\t".prurl($env, $_) for @$urls; - } + for (@$urls) { + # relative or absolute URL?, prefix relative "foo.git" + # with appropriate number of "../" + my $u = m!\A(?:[a-z\+]+:)?//! ? $_ : $upfx.$_; + $u = ascii_html(prurl($ctx->{env}, $u)); + $ret[0] .= qq(\n\t<a\nhref="$u">$u</a>); } } - @ret; # may be empty + @ret; # may be empty, this sub is called as an arg for join() } sub code_footer ($) { diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm index 4f77ef25..ca2345f7 100644 --- a/lib/PublicInbox/Xapcmd.pm +++ b/lib/PublicInbox/Xapcmd.pm @@ -270,7 +270,6 @@ sub run { local %SIG = %SIG; setup_signals(); - $ibx->umask_prepare; $ibx->with_umask(\&_run, $ibx, $cb, $opt); } diff --git a/script/public-inbox-convert b/script/public-inbox-convert index b61c743f..e6ee6529 100755 --- a/script/public-inbox-convert +++ b/script/public-inbox-convert @@ -47,34 +47,21 @@ die $help if (scalar(@ARGV) || $new_dir eq '' || $old_dir eq ''); die "$new_dir exists\n" if -d $new_dir; die "$old_dir not a directory\n" unless -d $old_dir; -require Cwd; -Cwd->import('abs_path'); +require PublicInbox::Admin; require PublicInbox::Config; require PublicInbox::InboxWritable; -my $abs = abs_path($old_dir); -die "failed to resolve $old_dir: $!\n" if (!defined($abs)); - my $cfg = PublicInbox::Config->new; -my $old; -$cfg->each_inbox(sub { - $old = $_[0] if abs_path($_[0]->{inboxdir}) eq $old_dir; -}); -if ($old) { - $old = PublicInbox::InboxWritable->new($old); -} else { +my @old = PublicInbox::Admin::resolve_inboxes([$old_dir], undef, $cfg); +@old > 1 and die "BUG: resolved several inboxes from $old_dir:\n", + map { "\t$_->{inboxdir}\n" } @old; +my $old = PublicInbox::InboxWritable->new($old[0]); +if (delete $old->{-unconfigured}) { warn "W: $old_dir not configured in " . PublicInbox::Config::default_file() . "\n"; - $old = PublicInbox::InboxWritable->new({ - inboxdir => $old_dir, - name => 'ignored', - -primary_address => 'old@example.com', - address => [ 'old@example.com' ], - }); } die "Only conversion from v1 inboxes is supported\n" if $old->version >= 2; -require File::Spec; require PublicInbox::Admin; my $detected = PublicInbox::Admin::detect_indexlevel($old); $old->{indexlevel} //= $detected; @@ -88,12 +75,11 @@ if ($opt->{'index'}) { } local %ENV = (%$env, %ENV) if $env; my $new = { %$old }; -$new->{inboxdir} = File::Spec->canonpath($new_dir); +$new->{inboxdir} = $cfg->rel2abs_collapsed($new_dir); $new->{version} = 2; $new = PublicInbox::InboxWritable->new($new, { nproc => $opt->{jobs} }); $new->{-no_fsync} = 1 if !$opt->{fsync}; my $v2w; -$old->umask_prepare; sub link_or_copy ($$) { my ($src, $dst) = @_; diff --git a/script/public-inbox-edit b/script/public-inbox-edit index a70614fc..81f023bc 100755 --- a/script/public-inbox-edit +++ b/script/public-inbox-edit @@ -183,7 +183,8 @@ retry_edit: # rename/relink $edit_fn open my $new_fh, '<', $edit_fn or die "can't read edited file ($edit_fn): $!\n"; - my $new_raw = do { local $/; <$new_fh> }; + defined(my $new_raw = do { local $/; <$new_fh> }) or die + "read $edit_fn: $!\n"; if (!$opt->{raw}) { # get rid of the From we added diff --git a/script/public-inbox-extindex b/script/public-inbox-extindex index 17ad59fa..5f27988f 100644 --- a/script/public-inbox-extindex +++ b/script/public-inbox-extindex @@ -6,11 +6,12 @@ use strict; use v5.10.1; use Getopt::Long qw(:config gnu_getopt no_ignore_case auto_abbrev); my $help = <<EOF; # the following should fit w/o scrolling in 80x24 term: -usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] +usage: public-inbox-extindex [options] [EXTINDEX_DIR] [INBOX_DIR...] Create and update external (detached) search indices --no-fsync speed up indexing, risk corruption on power outage + --watch run persistently and watch for inbox updates -L LEVEL `medium', or `full' (default: full) --all index all configured inboxes --jobs=NUM set or disable parallelization (NUM=0) @@ -22,26 +23,36 @@ usage: public-inbox-extindex [options] EXTINDEX_DIR [INBOX_DIR] BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-extindex(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { quiet => -1, compact => 0, fsync => 1, scan => 1 }; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i fsync|sync! indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s - gc + gc commit-interval=i watch scan! all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; - -# require lazily to speed up --help -my $eidx_dir = shift(@ARGV) // die "E: $help"; +require IO::Handle; +STDOUT->autoflush(1); +STDERR->autoflush(1); local $SIG{USR1} = 'IGNORE'; # to be overridden in eidx_sync +# require lazily to speed up --help require PublicInbox::Admin; my $cfg = PublicInbox::Config->new; +my $eidx_dir = shift(@ARGV); +unless (defined $eidx_dir) { + if ($opt->{all} && $cfg->ALL) { + $eidx_dir = $cfg->ALL->{topdir}; + } else { + die "E: $help"; + } +} my @ibxs; if ($opt->{gc}) { die "E: inbox paths must not be specified with --gc\n" if @ARGV; - die "E: --all not compatible --gc\n" if $opt->{all}; + die "E: --all not compatible with --gc\n" if $opt->{all}; + die "E: --watch is not compatible with --gc\n" if $opt->{watch}; } else { @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); } @@ -56,6 +67,15 @@ if ($opt->{gc}) { $eidx->attach_config($cfg); $eidx->eidx_gc($opt); } else { - $eidx->attach_inbox($_) for @ibxs; - $eidx->eidx_sync($opt); + if ($opt->{all}) { + $eidx->attach_config($cfg); + } else { + $eidx->attach_inbox($_) for @ibxs; + } + if ($opt->{watch}) { + $cfg = undef; # save memory only after SIGHUP + $eidx->eidx_watch($opt); + } else { + $eidx->eidx_sync($opt); + } } diff --git a/script/public-inbox-index b/script/public-inbox-index index 8a61817c..0fdfddc0 100755 --- a/script/public-inbox-index +++ b/script/public-inbox-index @@ -17,7 +17,7 @@ options: --no-fsync speed up indexing, risk corruption on power outage -L LEVEL `basic', `medium', or `full' (default: full) - -E EIDX update EIDX (e.g. `all') + -E EXTINDEX update extindex (default: `all') --all index all configured inboxes --compact | -c run public-inbox-compact(1) after indexing --sequential-shard index Xapian shards sequentially for slow storage @@ -32,19 +32,26 @@ options: BYTES may use `k', `m', and `g' suffixes (e.g. `10m' for 10 megabytes) See public-inbox-index(1) man page for full documentation. EOF -my $opt = { quiet => -1, compact => 0, max_size => undef, fsync => 1 }; +my $opt = { + quiet => -1, compact => 0, max_size => undef, fsync => 1, + 'update-extindex' => [], # ":s@" optional arg sets '' if no arg given +}; GetOptions($opt, qw(verbose|v+ reindex rethread compact|c+ jobs|j=i prune fsync|sync! xapian_only|xapian-only indexlevel|index-level|L=s max_size|max-size=s batch_size|batch-size=s sequential_shard|seq-shard|sequential-shard - skip-docdata all help|h)) + no-update-extindex update-extindex|E=s@ + fast-noop|F skip-docdata all help|h)) or die $help; if ($opt->{help}) { print $help; exit 0 }; die "--jobs must be >= 0\n" if defined $opt->{jobs} && $opt->{jobs} < 0; if ($opt->{xapian_only} && !$opt->{reindex}) { die "--xapian-only requires --reindex\n"; } +if ($opt->{reindex} && delete($opt->{'fast-noop'})) { + warn "--fast-noop ignored with --reindex\n"; +} # require lazily to speed up --help require PublicInbox::Admin; @@ -56,7 +63,34 @@ my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt, $cfg); PublicInbox::Admin::require_or_die('-index'); unless (@ibxs) { print STDERR $help; exit 1 } +my (@eidx, %eidx_seen); +my $update_extindex = $opt->{'update-extindex'}; +if (!scalar(@$update_extindex) && (my $ALL = $cfg->ALL)) { + # extindex and normal inboxes may have different owners + push(@$update_extindex, 'all') if -w $ALL->{topdir}; +} +@$update_extindex = () if $opt->{'no-update-extindex'}; +if (scalar @$update_extindex) { + PublicInbox::Admin::require_or_die('-search'); + require PublicInbox::ExtSearchIdx; +} +for my $ei_name (@$update_extindex) { + my $es = $cfg->lookup_ei($ei_name); + my $topdir; + if (!$es && -d $ei_name) { # allow dirname or config section name + $topdir = $ei_name; + } elsif ($es) { + $topdir = $es->{topdir}; + } else { + die "extindex `$ei_name' not configured or found\n"; + } + my $o = { %$opt }; + delete $o->{indexlevel} if ($o->{indexlevel}//'') eq 'basic'; + $eidx_seen{$topdir} //= + push(@eidx, PublicInbox::ExtSearchIdx->new($topdir, $o)); +} my $mods = {}; +my @eidx_unconfigured; foreach my $ibx (@ibxs) { # detect_indexlevel may also set $ibx->{-skip_docdata} my $detected = PublicInbox::Admin::detect_indexlevel($ibx); @@ -64,7 +98,14 @@ foreach my $ibx (@ibxs) { $ibx->{indexlevel} //= $opt->{indexlevel} // ($opt->{xapian_only} ? 'full' : $detected); PublicInbox::Admin::scan_ibx_modules($mods, $ibx); + if (@eidx && $ibx->{-unconfigured}) { + push @eidx_unconfigured, " $ibx->{inboxdir}\n"; + } } +warn <<EOF if @eidx_unconfigured; +The following inboxes are unconfigured and will not be updated in +@$update_extindex:\n@eidx_unconfigured +EOF # "Search::Xapian" includes SWIG "Xapian", too: $opt->{compact} = 0 if !$mods->{'Search::Xapian'}; @@ -90,10 +131,21 @@ publicInbox.$ibx->{name}.indexSequentialShard not boolean EOL $ibx_opt = { %$opt, sequential_shard => $v }; } - PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); + my $nidx = PublicInbox::Admin::index_inbox($ibx, undef, $ibx_opt); last if $ibx_opt->{quit}; if (my $copt = $opt->{compact_opt}) { local $copt->{jobs} = 0 if $ibx_opt->{sequential_shard}; PublicInbox::Xapcmd::run($ibx, 'compact', $copt); } + last if $ibx_opt->{quit}; + next if $ibx->{-unconfigured} || !$nidx; + for my $eidx (@eidx) { + $eidx->attach_inbox($ibx); + } +} +my $pr = $opt->{-progress}; +for my $eidx (@eidx) { + $pr->("indexing $eidx->{topdir} ...\n") if $pr; + $eidx->eidx_sync($opt); + last if $opt->{quit}; } diff --git a/script/public-inbox-init b/script/public-inbox-init index c775eb31..7ac77830 100755 --- a/script/public-inbox-init +++ b/script/public-inbox-init @@ -100,11 +100,7 @@ if (-e $pi_config) { defined $perm or die "(f)stat failed on $pi_config: $!\n"; chmod($perm & 07777, $fh) or die "(f)chmod failed on future $pi_config: $!\n"; - my $old; - { - local $/; - $old = <$oh>; - } + defined(my $old = do { local $/; <$oh> }) or die "read $pi_config: $!\n"; print $fh $old or die "failed to write: $!\n"; close $oh or die "failed to close $pi_config: $!\n"; @@ -138,10 +134,9 @@ close($fh) or die "failed to close $pi_config_tmp: $!\n"; my $pfx = "publicinbox.$name"; my @x = (qw/git config/, "--file=$pi_config_tmp"); -require File::Spec; -$inboxdir = File::Spec->canonpath($inboxdir); +$inboxdir = PublicInbox::Config::rel2abs_collapsed($inboxdir); +die "`\\n' not allowed in `$inboxdir'\n" if index($inboxdir, "\n") >= 0; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; if (-f "$inboxdir/inbox.lock") { if (!defined $version) { $version = 2; @@ -186,11 +181,6 @@ if ($skip_docdata) { $ibx->{-skip_docdata} = $skip_docdata; } $ibx->init_inbox(0, $skip_epoch, $skip_artnum); -require Cwd; -my $tmp = Cwd::abs_path($inboxdir); -defined($tmp) or die "failed to resolve $inboxdir: $!\n"; -$inboxdir = $tmp; -die "`\\n' not allowed in `$inboxdir'\n" if $inboxdir =~ /\n/s; # needed for git prior to v2.1.0 umask(0077) if defined $perm; diff --git a/script/public-inbox-learn b/script/public-inbox-learn index 9352c8ff..1731a4ba 100755 --- a/script/public-inbox-learn +++ b/script/public-inbox-learn @@ -39,8 +39,7 @@ my $spamc = PublicInbox::Spamcheck::Spamc->new; my $pi_cfg = PublicInbox::Config->new; my $err; my $mime = PublicInbox::Eml->new(do{ - local $/; - my $data = <STDIN>; + defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; if ($train ne 'rm') { diff --git a/script/public-inbox-purge b/script/public-inbox-purge index 7bca11ea..52f1f18a 100755 --- a/script/public-inbox-purge +++ b/script/public-inbox-purge @@ -32,7 +32,7 @@ if ($opt->{help}) { print $help; exit 0 }; my @ibxs = PublicInbox::Admin::resolve_inboxes(\@ARGV, $opt); PublicInbox::AdminEdit::check_editable(\@ibxs); -my $data = do { local $/; <STDIN> }; +defined(my $data = do { local $/; <STDIN> }) or die "read STDIN: $!\n"; $data =~ s/\A[\r\n]*From [^\r\n]*\r?\n//s; my $n_purged = 0; @@ -12,10 +12,7 @@ my $v2_dir = "$tmpdir/v2"; my ($res, $err, $v); PublicInbox::Import::init_bare($git_dir); -*resolve_inboxdir = do { - no warnings 'once'; - *PublicInbox::Admin::resolve_inboxdir; -}; +*resolve_inboxdir = \&PublicInbox::Admin::resolve_inboxdir; # v1 is(resolve_inboxdir($git_dir), $git_dir, 'top-level GIT_DIR resolved'); @@ -72,16 +69,23 @@ SKIP: { ok(-e "$v2_dir/inbox.lock", 'exists'); is(resolve_inboxdir($v2_dir), $v2_dir, 'resolve_inboxdir works on v2_dir'); - ok(chdir($v2_dir), 'chdir v2_dir OK'); + chdir($v2_dir) or BAIL_OUT "chdir v2_dir: $!"; is(resolve_inboxdir(), $v2_dir, 'resolve_inboxdir works inside v2_dir'); $res = resolve_inboxdir(undef, \$v); is($v, 2, 'version 2 detected'); is($res, $v2_dir, 'detects directory along with version'); # TODO: should work from inside Xapian dirs, and git dirs, here... + PublicInbox::Import::init_bare("$v2_dir/git/0.git"); + my $objdir = "$v2_dir/git/0.git/objects"; + is($v2_dir, resolve_inboxdir($objdir, \$v), 'at $objdir'); + is($v, 2, 'version 2 detected at $objdir'); + chdir($objdir) or BAIL_OUT "chdir objdir: $!"; + is(resolve_inboxdir(undef, \$v), $v2_dir, 'inside $objdir'); + is($v, 2, 'version 2 detected inside $objdir'); } -chdir '/'; +chdir '/' or BAIL_OUT "chdir: $!"; my @pairs = ( '1g' => 1024 ** 3, @@ -234,12 +234,13 @@ EOF } SKIP: { + # XXX wildcard match requires git 2.26+ require_git('1.8.5', 2) or skip 'git 1.8.5+ required for --url-match', 2; my $f = "$tmpdir/urlmatch"; open my $fh, '>', $f or BAIL_OUT $!; print $fh <<EOF or BAIL_OUT $!; -[imap "imap://*.example.com"] +[imap "imap://mail.example.com"] pollInterval = 9 EOF close $fh or BAIL_OUT; diff --git a/t/ds-poll.t b/t/ds-poll.t index 3771059b..0ee57b69 100644 --- a/t/ds-poll.t +++ b/t/ds-poll.t @@ -16,35 +16,35 @@ pipe($r, $w) or die; pipe($x, $y) or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($r), EPOLLIN), 0, 'add EPOLLIN'); my $events = []; -my $n = $p->epoll_wait(9, 0, $events); +$p->epoll_wait(9, 0, $events); is_deeply($events, [], 'no events set'); -is($n, 0, 'nothing ready, yet'); is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($w), EPOLLOUT|EPOLLONESHOT), 0, 'add EPOLLOUT|EPOLLONESHOT'); -$n = $p->epoll_wait(9, -1, $events); -is($n, 1, 'got POLLOUT event'); -is($events->[0]->[0], fileno($w), '$w ready'); +$p->epoll_wait(9, -1, $events); +is(scalar(@$events), 1, 'got POLLOUT event'); +is($events->[0], fileno($w), '$w ready'); -$n = $p->epoll_wait(9, 0, $events); -is($n, 0, 'nothing ready after oneshot'); +$p->epoll_wait(9, 0, $events); +is(scalar(@$events), 0, 'nothing ready after oneshot'); is_deeply($events, [], 'no events set after oneshot'); syswrite($w, '1') == 1 or die; for my $t (0..1) { - $n = $p->epoll_wait(9, $t, $events); - is($events->[0]->[0], fileno($r), "level-trigger POLLIN ready #$t"); - is($n, 1, "only event ready #$t"); + $p->epoll_wait(9, $t, $events); + is($events->[0], fileno($r), "level-trigger POLLIN ready #$t"); + is(scalar(@$events), 1, "only event ready #$t"); } syswrite($y, '1') == 1 or die; is($p->epoll_ctl(EPOLL_CTL_ADD, fileno($x), EPOLLIN|EPOLLONESHOT), 0, 'EPOLLIN|EPOLLONESHOT add'); -is($p->epoll_wait(9, -1, $events), 2, 'epoll_wait has 2 ready'); -my @fds = sort(map { $_->[0] } @$events); +$p->epoll_wait(9, -1, $events); +is(scalar @$events, 2, 'epoll_wait has 2 ready'); +my @fds = sort @$events; my @exp = sort((fileno($r), fileno($x))); is_deeply(\@fds, \@exp, 'got both ready FDs'); is($p->epoll_ctl(EPOLL_CTL_DEL, fileno($r), 0), 0, 'EPOLL_CTL_DEL OK'); -$n = $p->epoll_wait(9, 0, $events); -is($n, 0, 'nothing ready after EPOLL_CTL_DEL'); +$p->epoll_wait(9, 0, $events); +is(scalar @$events, 0, 'nothing ready after EPOLL_CTL_DEL'); done_testing; @@ -12,11 +12,11 @@ is(epoll_ctl($epfd, EPOLL_CTL_ADD, fileno($w), EPOLLOUT), 0, 'epoll_ctl socket EPOLLOUT'); my @events; -is(epoll_wait($epfd, 100, 10000, \@events), 1, 'epoll_wait returns'); +epoll_wait($epfd, 100, 10000, \@events); is(scalar(@events), 1, 'got one event'); -is($events[0]->[0], fileno($w), 'got expected FD'); -is($events[0]->[1], EPOLLOUT, 'got expected event'); +is($events[0], fileno($w), 'got expected FD'); close $w; -is(epoll_wait($epfd, 100, 0, \@events), 0, 'epoll_wait timeout'); +epoll_wait($epfd, 100, 0, \@events); +is(@events, 0, 'epoll_wait timeout'); done_testing; @@ -76,12 +76,17 @@ if (1) { is(length($$x), $size, 'read correct number of bytes'); my $ref = $gcf->qx(qw(cat-file blob), $buf); + is($?, 0, 'no error on scalar success'); my @ref = $gcf->qx(qw(cat-file blob), $buf); + is($?, 0, 'no error on wantarray success'); my $nl = scalar @ref; ok($nl > 1, "qx returned array length of $nl"); + is(join('', @ref), $ref, 'qx array and scalar context both work'); $gcf->qx(qw(repack -adq)); ok($gcf->packed_bytes > 0, 'packed size is positive'); + $gcf->qx(qw(rev-parse --verify bogus)); + isnt($?, 0, '$? set on failure'.$?); } SKIP: { @@ -296,27 +296,11 @@ $pi_cfg->each_inbox(sub { # ensure IDLE persists across HUP, w/o extra watches or FDs $td->kill('HUP') or BAIL_OUT "failed to kill -imapd: $!"; - SKIP: { - skip 'no inotify fdinfo (or support)', 2 if !@ino_info; - my (@tmp, %prev); - local $/ = "\n"; - my $end = time + 5; - until (time > $end) { - select undef, undef, undef, 0.01; - open my $fh, '<', $ino_fdinfo or - BAIL_OUT "$ino_fdinfo: $!"; - %prev = map { $_ => 1 } @ino_info; - @tmp = grep(/^inotify wd:/, <$fh>); - if (scalar(@tmp) == scalar(@ino_info)) { - delete @prev{@tmp}; - last if scalar(keys(%prev)) == @ino_info; - } - } - is(scalar @tmp, scalar @ino_info, - 'old inotify watches replaced'); - is(scalar keys %prev, scalar @ino_info, - 'no previous watches overlap'); - }; + for my $n (1..2) { # kick the event loop so we know HUP is done + my $m = $imap_client->new(%mic_opt); + ok($m->login && $m->IsAuthenticated && $m->logout, + "connection $n works after HUP"); + } open($fh, '<', 't/data/0001.patch') or BAIL_OUT("open: $!"); run_script(['-mda', '--no-precheck'], $env, { 0 => $fh }) or @@ -332,13 +332,13 @@ $ibx->with_umask(sub { like($smsg->{to}, qr/\blist\@example\.com\b/, 'to appears'); my $doc = $m->get_document; my $col = PublicInbox::Search::BYTES(); - my $bytes = PublicInbox::SearchIdx::get_val($doc, $col); + my $bytes = PublicInbox::Search::int_val($doc, $col); like($bytes, qr/\A[0-9]+\z/, '$bytes stored as digit'); ok($bytes > 0, '$bytes is > 0'); is($bytes, $smsg->{bytes}, 'bytes Xapian value matches Over'); $col = PublicInbox::Search::UID(); - my $uid = PublicInbox::SearchIdx::get_val($doc, $col); + my $uid = PublicInbox::Search::int_val($doc, $col); is($uid, $smsg->{num}, 'UID column matches {num}'); is($uid, $m->get_docid, 'UID column matches docid'); } diff --git a/xt/create-many-inboxes.t b/xt/create-many-inboxes.t new file mode 100644 index 00000000..c92643b2 --- /dev/null +++ b/xt/create-many-inboxes.t @@ -0,0 +1,99 @@ +#!perl -w +# Copyright (C) 2020 all contributors <meta@public-inbox.org> +# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt> +use strict; +use Test::More; +use PublicInbox::TestCommon; +use PublicInbox::Eml; +use File::Path qw(mkpath); +use IO::Handle (); # autoflush +use POSIX qw(_exit); +use Cwd qw(getcwd abs_path); +use File::Spec; +my $many_root = $ENV{TEST_MANY_ROOT} or + plan skip_all => 'TEST_MANY_ROOT not defined'; +my $cwd = getcwd(); +mkpath($many_root); +-d $many_root or BAIL_OUT "$many_root: $!"; +$many_root = abs_path($many_root); +$many_root =~ m!\A\Q$cwd\E/! and BAIL_OUT "$many_root must not be in $cwd"; +require_git 2.6; +require_mods(qw(DBD::SQLite Search::Xapian)); +use_ok 'PublicInbox::V2Writable'; +my $nr_inbox = $ENV{NR_INBOX} // 10; +my $nproc = $ENV{NPROC} || PublicInbox::V2Writable::detect_nproc() || 2; +my $indexlevel = $ENV{TEST_INDEXLEVEL} // 'basic'; +diag "NR_INBOX=$nr_inbox NPROC=$nproc TEST_INDEXLEVEL=$indexlevel"; +diag "TEST_MANY_ROOT=$many_root"; +my $level_cfg = $indexlevel eq 'full' ? '' : "\tindexlevel = $indexlevel\n"; +my $pfx = "$many_root/$nr_inbox-$indexlevel"; +mkpath($pfx); +open my $cfg_fh, '>>', "$pfx/config" or BAIL_OUT $!; +$cfg_fh->autoflush(1); +my $v2_init_add = sub { + my ($i) = @_; + my $ibx = PublicInbox::Inbox->new({ + inboxdir => "$pfx/test-$i", + name => "test-$i", + newsgroup => "inbox.comp.test.foo.test-$i", + address => [ "test-$i\@example.com" ], + url => [ "//example.com/test-$i" ], + version => 2, + }); + $ibx->{indexlevel} = $indexlevel if $level_cfg ne ''; + my $entry = <<EOF; +[publicinbox "$ibx->{name}"] + address = $ibx->{-primary_address} + url = $ibx->{url}->[0] + newsgroup = $ibx->{newsgroup} + inboxdir = $ibx->{inboxdir} +EOF + $entry .= $level_cfg; + print $cfg_fh $entry or die $!; + my $v2w = PublicInbox::V2Writable->new($ibx, { nproc => 0 }); + $v2w->init_inbox(0); + $v2w->add(PublicInbox::Eml->new(<<EOM)); +Date: Sat, 02 Oct 2010 00:00:00 +0000 +From: Lorelei <l\@example.com> +To: test-$i\@example.com +Message-ID: <20101002-000000-$i\@example.com> +Subject: hello world $i + +hi +EOM + $v2w->done; +}; + +my @children; +for my $i (1..$nproc) { + my ($r, $w); + pipe($r, $w) or BAIL_OUT $!; + my $pid = fork; + if ($pid == 0) { + close $w; + while (my $i = <$r>) { + chomp $i; + $v2_init_add->($i); + } + _exit(0); + } + defined $pid or BAIL_OUT "fork: $!"; + close $r or BAIL_OUT $!; + push @children, [ $w, $pid ]; + $w->autoflush(1); +} + +for my $i (0..$nr_inbox) { + print { $children[$i % @children]->[0] } "$i\n" or BAIL_OUT $!; +} + +for my $c (@children) { + close $c->[0] or BAIL_OUT "close $!"; +} +my $i = 0; +for my $c (@children) { + my $pid = waitpid($c->[1], 0); + is($?, 0, ++$i.' exited ok'); +} +ok(close($cfg_fh), 'config written'); +done_testing; |