From 3e9888ed30b7fe092b03789d19a8020d4bc0fb39 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Dec 2020 07:51:20 +0000 Subject: use rel2abs_collapsed when loading Inbox objects We need to canonicalize paths for inboxes which do not have a newsgroup defined, otherwise ->eidx_key matches can fail in unexpected ways. --- lib/PublicInbox/ExtSearchIdx.pm | 5 ----- 1 file changed, 5 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index b82d0546..c4b429df 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -72,11 +72,6 @@ sub attach_inbox { warn "W: skipping $key (no UIDVALIDITY)\n"; return; } - my $ibxdir = File::Spec->canonpath($ibx->{inboxdir}); - if ($ibxdir ne $ibx->{inboxdir}) { - warn "W: `$ibx->{inboxdir}' canonicalized to `$ibxdir'\n"; - $ibx->{inboxdir} = $ibxdir; - } $self->{ibx_map}->{$key} //= do { push @{$self->{ibx_list}}, $ibx; $ibx; -- cgit v1.2.3-24-ge0c7 From c13272432ad28adb506faf6fb9121569cf5ec710 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 21 Dec 2020 07:51:22 +0000 Subject: extsearch*: drop unnecessary path canonicalization Unlike inboxdir, the canonical-ness of -extindex paths is not relevant at the moment, and may never be relevant at all. So don't mislead others into thinking these paths being canonicalized matters. --- lib/PublicInbox/ExtSearchIdx.pm | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index c4b429df..f04e0443 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -30,13 +30,11 @@ use PublicInbox::V2Writable; use PublicInbox::InboxWritable; use PublicInbox::ContentHash qw(content_hash); use PublicInbox::Eml; -use File::Spec; use PublicInbox::DS qw(now); use DBI qw(:sql_types); # SQL_BLOB sub new { my (undef, $dir, $opt) = @_; - $dir = File::Spec->canonpath($dir); my $l = $opt->{indexlevel} // 'full'; $l !~ $PublicInbox::SearchIdx::INDEXLEVELS and die "invalid indexlevel=$l\n"; -- cgit v1.2.3-24-ge0c7 From 4a2e89007cb7b62151cb1869e49b27ebacfc27eb Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Dec 2020 08:38:48 +0000 Subject: miscsearch: index UIDVALIDITY, use as startup cache This brings -nntpd startup time down from ~35s to ~5s with 50K inboxes. Further improvements ought to be possible with deeper changes to MiscIdx, since -mda having to load every inbox seems unreasonable; but this general change is fairly unintrusive. --- lib/PublicInbox/ExtSearchIdx.pm | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index f04e0443..9d64ff5a 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -61,16 +61,20 @@ sub new { sub attach_inbox { my ($self, $ibx) = @_; - my $key = $ibx->eidx_key; - if (!$ibx->over || !$ibx->mm) { - warn "W: skipping $key (unindexed)\n"; - return; - } - if (!defined($ibx->uidvalidity)) { - warn "W: skipping $key (no UIDVALIDITY)\n"; - return; + my $ekey = $ibx->eidx_key; + my $misc = $self->{misc}; + if ($misc && $misc->inbox_data($ibx)) { # all good if already indexed + } else { + if (!$ibx->over || !$ibx->mm) { + warn "W: skipping $ekey (unindexed)\n"; + return; + } + if (!defined($ibx->uidvalidity)) { + warn "W: skipping $ekey (no UIDVALIDITY)\n"; + return; + } } - $self->{ibx_map}->{$key} //= do { + $self->{ibx_map}->{$ekey} //= do { push @{$self->{ibx_list}}, $ibx; $ibx; } -- cgit v1.2.3-24-ge0c7 From 6f9b927bf1fc5e84b92532477b275a45cd30cb01 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Wed, 23 Dec 2020 08:38:49 +0000 Subject: extsearchidx: close SQLite handles after attaching This is needed to prevent us from running out of FDs when indexing many inboxes. Perhaps checking these on attach_inbox is unnecessary and may be removed entirely down the line. --- lib/PublicInbox/ExtSearchIdx.pm | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 9d64ff5a..fb627089 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -65,11 +65,14 @@ sub attach_inbox { my $misc = $self->{misc}; if ($misc && $misc->inbox_data($ibx)) { # all good if already indexed } else { - if (!$ibx->over || !$ibx->mm) { + my @sqlite = ($ibx->over, $ibx->mm); + my $uidvalidity = $ibx->uidvalidity; + $ibx->{mm} = $ibx->{over} = undef; + if (scalar(@sqlite) != 2) { warn "W: skipping $ekey (unindexed)\n"; return; } - if (!defined($ibx->uidvalidity)) { + if (!defined($uidvalidity)) { warn "W: skipping $ekey (no UIDVALIDITY)\n"; return; } -- cgit v1.2.3-24-ge0c7 From b3cf37096874c6c80ef554e5153e5d995c72ab95 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Thu, 24 Dec 2020 10:09:18 +0000 Subject: inboxwritable: delay umask_prepare calls This simplifies all ->with_umask callers and opens the door for further optimizations to delay/elide process spawning. --- lib/PublicInbox/ExtSearchIdx.pm | 2 -- 1 file changed, 2 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index fb627089..c43a6c5e 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -934,7 +934,6 @@ sub idx_init { # similar to V2Writable PublicInbox::V2Writable::write_alternates($info_dir, $mode, $o); } $self->parallel_init($self->{indexlevel}); - $self->umask_prepare; $self->with_umask(\&_idx_init, $self, $opt); $self->{oidx}->begin_lazy; $self->{oidx}->eidx_prep; @@ -943,7 +942,6 @@ sub idx_init { # similar to V2Writable no warnings 'once'; *done = \&PublicInbox::V2Writable::done; -*umask_prepare = \&PublicInbox::InboxWritable::umask_prepare; *with_umask = \&PublicInbox::InboxWritable::with_umask; *parallel_init = \&PublicInbox::V2Writable::parallel_init; *nproc_shards = \&PublicInbox::V2Writable::nproc_shards; -- cgit v1.2.3-24-ge0c7 From 672d146577305baa7f508bd2e33212bba6fdb800 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:10 +0000 Subject: extsearchidx: delay SQLite availability checks This will make attach_inbox faster for no-op calls. It also helps us avoid races in case msgmap or over.sqlite3 gets unlinked while -extindex is running. --- lib/PublicInbox/ExtSearchIdx.pm | 57 ++++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 29 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index c43a6c5e..386e1cee 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -61,23 +61,7 @@ sub new { sub attach_inbox { my ($self, $ibx) = @_; - my $ekey = $ibx->eidx_key; - my $misc = $self->{misc}; - if ($misc && $misc->inbox_data($ibx)) { # all good if already indexed - } else { - my @sqlite = ($ibx->over, $ibx->mm); - my $uidvalidity = $ibx->uidvalidity; - $ibx->{mm} = $ibx->{over} = undef; - if (scalar(@sqlite) != 2) { - warn "W: skipping $ekey (unindexed)\n"; - return; - } - if (!defined($uidvalidity)) { - warn "W: skipping $ekey (no UIDVALIDITY)\n"; - return; - } - } - $self->{ibx_map}->{$ekey} //= do { + $self->{ibx_map}->{$ibx->eidx_key} //= do { push @{$self->{ibx_list}}, $ibx; $ibx; } @@ -281,29 +265,36 @@ sub last_commits { $heads; } +sub _ibx_index_reject ($) { + my ($ibx) = @_; + $ibx->mm // return 'unindexed, no msgmap.sqlite3'; + $ibx->uidvalidity // return 'no UIDVALIDITY'; + $ibx->over // return 'unindexed, no over.sqlite3'; + undef; +} + sub _sync_inbox ($$$) { my ($self, $sync, $ibx) = @_; + my $ekey = $ibx->eidx_key; + if (defined(my $err = _ibx_index_reject($ibx))) { + return "W: skipping $ekey ($err)"; + } $sync->{ibx} = $ibx; $sync->{nr} = \(my $nr = 0); my $v = $ibx->version; - my $ekey = $ibx->eidx_key; if ($v == 2) { $sync->{epoch_max} = $ibx->max_git_epoch // return; sync_prepare($self, $sync); # or return # TODO: once MiscIdx is stable } elsif ($v == 1) { my $uv = $ibx->uidvalidity; my $lc = $self->{oidx}->eidx_meta("lc-v1:$ekey//$uv"); - my $head = $ibx->mm->last_commit; - unless (defined $head) { - warn "E: $ibx->{inboxdir} is not indexed\n"; - return; - } + my $head = $ibx->mm->last_commit // + return "E: $ibx->{inboxdir} is not indexed"; my $stk = prepare_stack($sync, $lc ? "$lc..$head" : $head); my $unit = { stack => $stk, git => $ibx->git }; push @{$sync->{todo}}, $unit; } else { - warn "E: $ekey unsupported inbox version (v$v)\n"; - return; + return "E: $ekey unsupported inbox version (v$v)"; } for my $unit (@{delete($sync->{todo}) // []}) { last if $sync->{quit}; @@ -311,6 +302,7 @@ sub _sync_inbox ($$$) { } $self->{midx}->index_ibx($ibx) unless $sync->{quit}; $ibx->git->cleanup; # done with this inbox, now + undef; } sub gc_unref_doc ($$$$) { @@ -787,9 +779,14 @@ DELETE FROM xref3 WHERE ibx_id = ? AND xnum = ? AND oidbin = ? sub _reindex_inbox ($$$) { my ($self, $sync, $ibx) = @_; - local $self->{current_info} = $ibx->eidx_key; - _reindex_check_unseen($self, $sync, $ibx); - _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit}; + my $ekey = $ibx->eidx_key; + local $self->{current_info} = $ekey; + if (defined(my $err = _ibx_index_reject($ibx))) { + warn "W: cannot reindex $ekey ($err)\n"; + } else { + _reindex_check_unseen($self, $sync, $ibx); + _reindex_check_stale($self, $sync, $ibx) unless $sync->{quit}; + } delete @$ibx{qw(over mm search git)}; # won't need these for a bit } @@ -847,7 +844,9 @@ sub eidx_sync { # main entry point # don't use $_ here, it'll get clobbered by reindex_checkpoint for my $ibx (@{$self->{ibx_list}}) { last if $sync->{quit}; - _sync_inbox($self, $sync, $ibx); + my $err = _sync_inbox($self, $sync, $ibx); + delete @$ibx{qw(mm over)}; + warn $err, "\n" if defined($err); } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; eidxq_process($self, $sync) unless $sync->{quit}; -- cgit v1.2.3-24-ge0c7 From 14e606423429d6121c295c2bc0599fe1bf66b07c Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:11 +0000 Subject: extsearchidx: close DB handles after use if FD constrained Most distros ship with low RLIMIT_NOFILE limits and surprises may lurk for admins who configure many inboxes. Keep FD usage under control to avoid EMFILE errors at inopportune times during reindex. From what I can tell, this is the only place where extindex can have unpredictable FD growth when there's thousands of inboxes, and it's in an extremely rare code path. --- lib/PublicInbox/ExtSearchIdx.pm | 37 ++++++++++++++++++++++++++++++++++--- 1 file changed, 34 insertions(+), 3 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 386e1cee..3f197973 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -393,6 +393,32 @@ sub _ibx_for ($$$) { $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped" } +sub _fd_constrained ($) { + my ($self) = @_; + $self->{-fd_constrained} //= do { + my $soft; + if (eval { require BSD::Resource; 1 }) { + my $NOFILE = BSD::Resource::RLIMIT_NOFILE(); + ($soft, undef) = BSD::Resource::getrlimit($NOFILE); + } else { + chomp($soft = `sh -c 'ulimit -n'`); + } + if (defined($soft)) { + my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate + my $ret = $want > $soft; + if ($ret) { + warn <{sync}; @@ -429,11 +455,16 @@ sub _reindex_finalize ($$$) { my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty"; $x->{num} = delete($x->{xnum}) // die '{xnum} unset'; $ibx = _ibx_for($self, $sync, $x); - my $e = $ibx->over->get_art($x->{num}); - $e->{blob} eq $x->{blob} or die <over) { + my $e = $over->get_art($x->{num}); + $e->{blob} eq $x->{blob} or die <{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num}); EOF - push @todo, $ibx, $e; + push @todo, $ibx, $e; + $over->dbh_close if _fd_constrained($self); + } else { + die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n"; + } } undef $by_chash; while (my ($ibx, $e) = splice(@todo, 0, 2)) { -- cgit v1.2.3-24-ge0c7 From 66518051763825d491d0c1df6837d4266edc180a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Fri, 25 Dec 2020 10:21:13 +0000 Subject: index: fix --no-fsync flag propagation to extindex Negation in flag names are confusing, but trying to deviate from the DB_NO_SYNC name used by Xapian is also confusing. --- lib/PublicInbox/ExtSearchIdx.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 3f197973..e7fdae48 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -54,7 +54,7 @@ sub new { }, __PACKAGE__; $self->{shards} = $self->count_shards || nproc_shards($opt->{creat}); my $oidx = PublicInbox::OverIdx->new("$self->{xpfx}/over.sqlite3"); - $oidx->{-no_fsync} = 1 if $opt->{-no_fsync}; + $self->{-no_fsync} = $oidx->{-no_fsync} = 1 if !$opt->{fsync}; $self->{oidx} = $oidx; $self } -- cgit v1.2.3-24-ge0c7 From 0b018bebe7d8ee807ab07b570cf33669da4875b0 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 01:44:36 +0000 Subject: default to CORE::warn in $SIG{__WARN__} handlers As with CORE::die and $SIG{__DIE__}, it turns out CORE::warn is safe to use inside $SIG{__WARN__} handlers without triggering infinite recursion. So fall back to reusing CORE::warn instead of creating a new sub. --- lib/PublicInbox/ExtSearchIdx.pm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index e7fdae48..64ebf6db 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -841,7 +841,7 @@ sub eidx_reindex { sub eidx_sync { # main entry point my ($self, $opt) = @_; - my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ }; + my $warn_cb = $SIG{__WARN__} || \&CORE::warn; local $self->{current_info} = ''; local $SIG{__WARN__} = sub { $warn_cb->($self->{current_info}, ': ', @_); -- cgit v1.2.3-24-ge0c7 From 1d96509a3f59c38394d2f3ac4323dc54c74dc202 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 01:44:37 +0000 Subject: extindex: --watch for inotify-based updates This reuses existing InboxIdle infrastructure to update external indices based on per-inbox updates. This is an alternative to auto-updating external indices via the -index command and also works with existing uses of -mda and public-inbox-watch. Using inotify (or EVFILT_VNODE) allows watching thousands of inboxes without having to scan every single one at every invocation. This is especially beneficial in cases where an external index is not writable to the users writing to per-inbox indices. --- lib/PublicInbox/ExtSearchIdx.pm | 126 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 118 insertions(+), 8 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 64ebf6db..53ff2ca1 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -630,7 +630,7 @@ sub eidxq_process ($$) { # for reindexing my $dbh = $self->{oidx}->dbh; my $tot = $dbh->selectrow_array('SELECT COUNT(*) FROM eidxq') or return; ${$sync->{nr}} = 0; - $sync->{-regen_fmt} = "%u/$tot\n"; + local $sync->{-regen_fmt} = "%u/$tot\n"; my $pr = $sync->{-opt}->{-progress}; if ($pr) { my $min = $dbh->selectrow_array('SELECT MIN(docid) FROM eidxq'); @@ -709,7 +709,8 @@ sub _reindex_check_unseen ($$$) { my $msgs; my $pr = $sync->{-opt}->{-progress}; my $ekey = $ibx->eidx_key; - $sync->{-regen_fmt} = "$ekey checking unseen %u/".$ibx->over->max."\n"; + local $sync->{-regen_fmt} = + "$ekey checking unseen %u/".$ibx->over->max."\n"; ${$sync->{nr}} = 0; while (scalar(@{$msgs = $ibx->over->query_xover($beg, $end)})) { @@ -752,7 +753,7 @@ sub _reindex_check_stale ($$$) { my $pr = $sync->{-opt}->{-progress}; my $fetching; my $ekey = $ibx->eidx_key; - $sync->{-regen_fmt} = + local $sync->{-regen_fmt} = "$ekey check stale/missing %u/".$ibx->over->max."\n"; ${$sync->{nr}} = 0; do { @@ -838,6 +839,13 @@ sub eidx_reindex { eidxq_process($self, $sync) unless $sync->{quit}; } +sub sync_inbox { + my ($self, $sync, $ibx) = @_; + my $err = _sync_inbox($self, $sync, $ibx); + delete @$ibx{qw(mm over)}; + warn $err, "\n" if defined($err); +} + sub eidx_sync { # main entry point my ($self, $opt) = @_; @@ -868,22 +876,21 @@ sub eidx_sync { # main entry point $ibx->{-ibx_id} //= $self->{oidx}->ibx_id($ibx->eidx_key); } if (delete($opt->{reindex})) { - $sync->{checkpoint_unlocks} = 1; + local $sync->{checkpoint_unlocks} = 1; eidx_reindex($self, $sync); } # don't use $_ here, it'll get clobbered by reindex_checkpoint for my $ibx (@{$self->{ibx_list}}) { last if $sync->{quit}; - my $err = _sync_inbox($self, $sync, $ibx); - delete @$ibx{qw(mm over)}; - warn $err, "\n" if defined($err); + sync_inbox($self, $sync, $ibx); } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; eidxq_process($self, $sync) unless $sync->{quit}; eidxq_release($self); - PublicInbox::V2Writable::done($self); + done($self); + $sync; # for eidx_watch } sub update_last_commit { # overrides V2Writable @@ -970,6 +977,109 @@ sub idx_init { # similar to V2Writable $self->{midx}->begin_txn; } +sub _watch_commit { # PublicInbox::DS::add_timer callback + my ($self) = @_; + delete $self->{-commit_timer}; + eidxq_process($self, $self->{-watch_sync}); + eidxq_release($self); + delete local $self->{-watch_sync}->{-regen_fmt}; + reindex_checkpoint($self, $self->{-watch_sync}); + + # call event_step => done unless commit_timer is armed + PublicInbox::DS::requeue($self); +} + +sub on_inbox_unlock { # called by PublicInbox::InboxIdle + my ($self, $ibx) = @_; + my $opt = $self->{-watch_sync}->{-opt}; + my $pr = $opt->{-progress}; + my $ekey = $ibx->eidx_key; + local $0 = "sync $ekey"; + $pr->("indexing $ekey\n") if $pr; + $self->idx_init($opt); + sync_inbox($self, $self->{-watch_sync}, $ibx); + $self->{-commit_timer} //= PublicInbox::DS::add_timer( + $opt->{'commit-interval'} // 10, + \&_watch_commit, $self); +} + +sub eidx_reload { # -extindex --watch SIGHUP handler + my ($self, $idler) = @_; + if ($self->{cfg}) { + my $pr = $self->{-watch_sync}->{-opt}->{-progress}; + $pr->('reloading ...') if $pr; + @{$self->{ibx_list}} = (); + %{$self->{ibx_map}} = (); + delete $self->{-watch_sync}->{id2pos}; + my $cfg = PublicInbox::Config->new; + attach_config($self, $cfg); + $idler->refresh($cfg); + $pr->(" done\n") if $pr; + } else { + warn "reload not supported without --all\n"; + } +} + +sub eidx_resync_start ($) { # -extindex --watch SIGUSR1 handler + my ($self) = @_; + $self->{-resync_queue} //= [ @{$self->{ibx_list}} ]; + PublicInbox::DS::requeue($self); # trigger our ->event_step +} + +sub event_step { # PublicInbox::DS::requeue callback + my ($self) = @_; + if (my $resync_queue = $self->{-resync_queue}) { + if (my $ibx = shift(@$resync_queue)) { + on_inbox_unlock($self, $ibx); + PublicInbox::DS::requeue($self); + } else { + delete $self->{-resync_queue}; + _watch_commit($self); + } + } else { + done($self) unless $self->{-commit_timer}; + } +} + +sub eidx_watch { # public-inbox-extindex --watch main loop + my ($self, $opt) = @_; + require PublicInbox::InboxIdle; + require PublicInbox::DS; + require PublicInbox::Syscall; + require PublicInbox::Sigfd; + my $idler = PublicInbox::InboxIdle->new($self->{cfg}); + if (!$self->{cfg}) { + $idler->watch_inbox($_) for @{$self->{ibx_list}}; + } + $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}}; + my $sync = eidx_sync($self, $opt); # initial sync + return if $sync->{quit}; + my $oldset = PublicInbox::Sigfd::block_signals(); + local $self->{current_info} = ''; + my $cb = $SIG{__WARN__} || \&CORE::warn; + local $SIG{__WARN__} = sub { $cb->($self->{current_info}, ': ', @_) }; + my $sig = { + HUP => sub { eidx_reload($self, $idler) }, + USR1 => sub { eidx_resync_start($self) }, + TSTP => sub { kill('STOP', $$) }, + }; + my $quit = PublicInbox::SearchIdx::quit_cb($sync); + $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit; + my $sigfd = PublicInbox::Sigfd->new($sig, + $PublicInbox::Syscall::SFD_NONBLOCK); + local %SIG = (%SIG, %$sig) if !$sigfd; + local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock + if (!$sigfd) { + # wake up every second to accept signals if we don't + # have signalfd or IO::KQueue: + PublicInbox::Sigfd::sig_setmask($oldset); + PublicInbox::DS->SetLoopTimeout(1000); + } + PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} }); + PublicInbox::DS->EventLoop; # calls InboxIdle->event_step + done($self); +} + no warnings 'once'; *done = \&PublicInbox::V2Writable::done; *with_umask = \&PublicInbox::InboxWritable::with_umask; -- cgit v1.2.3-24-ge0c7 From ae8df17135014a64a9f1def786f66c9c16b05fcf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 10:16:21 +0000 Subject: extindex: various --watch signal handling fixes We need to clobber the SIGUSR1 resync queue on SIGHUP to invalidate old inbox objects. Furthermore, the lengthy initial scan needs to ignore signals intended for the event loop to avoid unexpected behavior. Finally, add some progress output to inform users on the terminal to inform users' of progress. --- lib/PublicInbox/ExtSearchIdx.pm | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 53ff2ca1..778154a5 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -1008,6 +1008,7 @@ sub eidx_reload { # -extindex --watch SIGHUP handler if ($self->{cfg}) { my $pr = $self->{-watch_sync}->{-opt}->{-progress}; $pr->('reloading ...') if $pr; + delete $self->{-resync_queue}; @{$self->{ibx_list}} = (); %{$self->{ibx_map}} = (); delete $self->{-watch_sync}->{id2pos}; @@ -1043,6 +1044,10 @@ sub event_step { # PublicInbox::DS::requeue callback sub eidx_watch { # public-inbox-extindex --watch main loop my ($self, $opt) = @_; + local %SIG = %SIG; + for my $sig (qw(HUP USR1 TSTP QUIT INT TERM)) { + $SIG{$sig} = sub { warn "SIG$sig ignored while scanning\n" }; + } require PublicInbox::InboxIdle; require PublicInbox::DS; require PublicInbox::Syscall; @@ -1052,6 +1057,8 @@ sub eidx_watch { # public-inbox-extindex --watch main loop $idler->watch_inbox($_) for @{$self->{ibx_list}}; } $_->subscribe_unlock(__PACKAGE__, $self) for @{$self->{ibx_list}}; + my $pr = $opt->{-progress}; + $pr->("performing initial scan ...\n") if $pr; my $sync = eidx_sync($self, $opt); # initial sync return if $sync->{quit}; my $oldset = PublicInbox::Sigfd::block_signals(); @@ -1067,7 +1074,7 @@ sub eidx_watch { # public-inbox-extindex --watch main loop $sig->{QUIT} = $sig->{INT} = $sig->{TERM} = $quit; my $sigfd = PublicInbox::Sigfd->new($sig, $PublicInbox::Syscall::SFD_NONBLOCK); - local %SIG = (%SIG, %$sig) if !$sigfd; + %SIG = (%SIG, %$sig) if !$sigfd; local $self->{-watch_sync} = $sync; # for ->on_inbox_unlock if (!$sigfd) { # wake up every second to accept signals if we don't @@ -1076,6 +1083,7 @@ sub eidx_watch { # public-inbox-extindex --watch main loop PublicInbox::DS->SetLoopTimeout(1000); } PublicInbox::DS->SetPostLoopCallback(sub { !$sync->{quit} }); + $pr->("initial scan complete, entering event loop\n") if $pr; PublicInbox::DS->EventLoop; # calls InboxIdle->event_step done($self); } -- cgit v1.2.3-24-ge0c7 From 02aad3e340d1711359c4def6e91482140a989ce1 Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sat, 26 Dec 2020 10:16:23 +0000 Subject: extindex: add undocumented --no-scan switch This makes diagnosing --watch problems easier when there's 50K inboxes by avoiding the lengthy scan (which is the reason --watch exists in the first place). --- lib/PublicInbox/ExtSearchIdx.pm | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'lib/PublicInbox/ExtSearchIdx.pm') diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm index 778154a5..07e64698 100644 --- a/lib/PublicInbox/ExtSearchIdx.pm +++ b/lib/PublicInbox/ExtSearchIdx.pm @@ -881,9 +881,11 @@ sub eidx_sync { # main entry point } # don't use $_ here, it'll get clobbered by reindex_checkpoint - for my $ibx (@{$self->{ibx_list}}) { - last if $sync->{quit}; - sync_inbox($self, $sync, $ibx); + if ($opt->{scan} // 1) { + for my $ibx (@{$self->{ibx_list}}) { + last if $sync->{quit}; + sync_inbox($self, $sync, $ibx); + } } $self->{oidx}->rethread_done($opt) unless $sync->{quit}; eidxq_process($self, $sync) unless $sync->{quit}; -- cgit v1.2.3-24-ge0c7