about summary refs log tree commit homepage
path: root/lib
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2020-12-25 10:21:11 +0000
committerEric Wong <e@80x24.org>2020-12-26 06:22:56 +0000
commit14e606423429d6121c295c2bc0599fe1bf66b07c (patch)
tree5c33b2899a4f65f4170d6f8585ac6ee828e101a0 /lib
parent672d146577305baa7f508bd2e33212bba6fdb800 (diff)
downloadpublic-inbox-14e606423429d6121c295c2bc0599fe1bf66b07c.tar.gz
Most distros ship with low RLIMIT_NOFILE limits and surprises
may lurk for admins who configure many inboxes.  Keep FD usage
under control to avoid EMFILE errors at inopportune times during
reindex.

From what I can tell, this is the only place where extindex can
have unpredictable FD growth when there's thousands of inboxes,
and it's in an extremely rare code path.
Diffstat (limited to 'lib')
-rw-r--r--lib/PublicInbox/ExtSearchIdx.pm37
1 files changed, 34 insertions, 3 deletions
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 386e1cee..3f197973 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -393,6 +393,32 @@ sub _ibx_for ($$$) {
         $self->{ibx_list}->[$pos] // die "BUG: ibx for $smsg->{blob} not mapped"
 }
 
+sub _fd_constrained ($) {
+        my ($self) = @_;
+        $self->{-fd_constrained} //= do {
+                my $soft;
+                if (eval { require BSD::Resource; 1 }) {
+                        my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+                        ($soft, undef) = BSD::Resource::getrlimit($NOFILE);
+                } else {
+                        chomp($soft = `sh -c 'ulimit -n'`);
+                }
+                if (defined($soft)) {
+                        my $want = scalar(@{$self->{ibx_list}}) + 64; # estimate
+                        my $ret = $want > $soft;
+                        if ($ret) {
+                                warn <<EOF;
+RLIMIT_NOFILE=$soft insufficient (want: $want), will close DB handles early
+EOF
+                        }
+                        $ret;
+                } else {
+                        warn "Unable to determine RLIMIT_NOFILE: $@\n";
+                        1;
+                }
+        };
+}
+
 sub _reindex_finalize ($$$) {
         my ($req, $smsg, $eml) = @_;
         my $sync = $req->{sync};
@@ -429,11 +455,16 @@ sub _reindex_finalize ($$$) {
                 my $x = pop(@$ary) // die "BUG: #$docid {by_chash} empty";
                 $x->{num} = delete($x->{xnum}) // die '{xnum} unset';
                 $ibx = _ibx_for($self, $sync, $x);
-                my $e = $ibx->over->get_art($x->{num});
-                $e->{blob} eq $x->{blob} or die <<EOF;
+                if (my $over = $ibx->over) {
+                        my $e = $over->get_art($x->{num});
+                        $e->{blob} eq $x->{blob} or die <<EOF;
 $x->{blob} != $e->{blob} (${\$ibx->eidx_key}:$e->{num});
 EOF
-                push @todo, $ibx, $e;
+                        push @todo, $ibx, $e;
+                        $over->dbh_close if _fd_constrained($self);
+                } else {
+                        die "$ibx->{inboxdir}: over.sqlite3 unusable: $!\n";
+                }
         }
         undef $by_chash;
         while (my ($ibx, $e) = splice(@todo, 0, 2)) {