user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 6/9] xap_helper: expire DB handles when FD table is near full
  2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
@ 2024-05-19 21:55  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

For long-lived daemons across config reloads, we shouldn't keep
Xapian DBs open forever under FD pressure.  So estimate the
number of FDs we need per-shard and start clearing some out
if we have too many open.

While we're at it, hoist out our ulimit_n helper and share it
across extindex and the Perl XapHelper implementation.
---
 lib/PublicInbox/ExtSearchIdx.pm |  8 +----
 lib/PublicInbox/Search.pm       | 16 +++++++++
 lib/PublicInbox/XapHelper.pm    | 18 +++++++---
 lib/PublicInbox/XapHelperCxx.pm |  1 +
 lib/PublicInbox/xap_helper.h    | 58 +++++++++++++++++++++++++--------
 t/xap_helper.t                  | 23 +++++++++++++
 6 files changed, 100 insertions(+), 24 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index 883dbea3..934197c0 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -543,13 +543,7 @@ sub _ibx_for ($$$) {
 sub _fd_constrained ($) {
 	my ($self) = @_;
 	$self->{-fd_constrained} //= do {
-		my $soft;
-		if (eval { require BSD::Resource; 1 }) {
-			my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
-			($soft, undef) = BSD::Resource::getrlimit($NOFILE);
-		} else {
-			chomp($soft = `sh -c 'ulimit -n'`);
-		}
+		my $soft = PublicInbox::Search::ulimit_n;
 		if (defined($soft)) {
 			# $want is an estimate
 			my $want = scalar(@{$self->{ibx_active}}) + 64;
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index e5c5d6ab..25ef49c5 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -54,6 +54,9 @@ use constant {
 	#
 	#      v1.6.0 adds BYTES, UID and THREADID values
 	SCHEMA_VERSION => 15,
+
+	# we may have up to 8 FDs per shard (depends on Xapian *shrug*)
+	SHARD_COST => 8,
 };
 
 use PublicInbox::Smsg;
@@ -729,4 +732,17 @@ sub get_doc ($$) {
 	}
 }
 
+# not sure where best to put this...
+sub ulimit_n () {
+	my $n;
+	if (eval { require BSD::Resource; 1 }) {
+		my $NOFILE = BSD::Resource::RLIMIT_NOFILE();
+		($n, undef) = BSD::Resource::getrlimit($NOFILE);
+	} else {
+		require PublicInbox::Spawn;
+		$n = PublicInbox::Spawn::run_qx([qw(/bin/sh -c), 'ulimit -n']);
+	}
+	$n;
+}
+
 1;
diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index f1311bd4..db9e99ae 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -18,7 +18,7 @@ use POSIX qw(:signal_h);
 use Fcntl qw(LOCK_UN LOCK_EX);
 use Carp qw(croak);
 my $X = \%PublicInbox::Search::X;
-our (%SRCH, %WORKERS, $nworker, $workerset, $in);
+our (%SRCH, %WORKERS, $nworker, $workerset, $in, $SHARD_NFD, $MY_FD_MAX);
 our $stderr = \*STDERR;
 
 sub cmd_test_inspect {
@@ -193,8 +193,14 @@ sub dispatch {
 	my $key = "-d\0".join("\0-d\0", @$dirs);
 	$key .= "\0".join("\0", map { ('-Q', $_) } @{$req->{Q}}) if $req->{Q};
 	my $new;
-	$req->{srch} = $SRCH{$key} //= do {
+	$req->{srch} = $SRCH{$key} // do {
 		$new = { qp_flags => $PublicInbox::Search::QP_FLAGS };
+		my $nfd = scalar(@$dirs) * PublicInbox::Search::SHARD_COST;
+		$SHARD_NFD += $nfd;
+		if ($SHARD_NFD > $MY_FD_MAX) {
+			$SHARD_NFD = $nfd;
+			%SRCH = ();
+		}
 		my $first = shift @$dirs;
 		my $slow_phrase = -f "$first/iamchert";
 		$new->{xdb} = $X->{Database}->new($first);
@@ -207,7 +213,7 @@ sub dispatch {
 		bless $new, $req->{c} ? 'PublicInbox::CodeSearch' :
 					'PublicInbox::Search';
 		$new->{qp} = $new->qparse_new;
-		$new;
+		$SRCH{$key} = $new;
 	};
 	$req->{srch}->{xdb}->reopen unless $new;
 	$req->{Q} && !$req->{srch}->{qp_extra_done} and
@@ -305,7 +311,7 @@ sub start (@) {
 	my $c = getsockopt(local $in = \*STDIN, SOL_SOCKET, SO_TYPE);
 	unpack('i', $c) == SOCK_SEQPACKET or die 'stdin is not SOCK_SEQPACKET';
 
-	local (%SRCH, %WORKERS);
+	local (%SRCH, %WORKERS, $SHARD_NFD, $MY_FD_MAX);
 	PublicInbox::Search::load_xapian();
 	$GLP->getoptionsfromarray(\@argv, my $opt = { j => 1 }, 'j=i') or
 		die 'bad args';
@@ -314,6 +320,10 @@ sub start (@) {
 	for (@PublicInbox::DS::UNBLOCKABLE, POSIX::SIGUSR1) {
 		$workerset->delset($_) or die "delset($_): $!";
 	}
+	$MY_FD_MAX = PublicInbox::Search::ulimit_n //
+		die "E: unable to get RLIMIT_NOFILE: $!";
+	warn "W: RLIMIT_NOFILE=$MY_FD_MAX too low\n" if $MY_FD_MAX < 72;
+	$MY_FD_MAX -= 64;
 
 	local $nworker = $opt->{j};
 	return recv_loop() if $nworker == 0;
diff --git a/lib/PublicInbox/XapHelperCxx.pm b/lib/PublicInbox/XapHelperCxx.pm
index 74852ad1..922bd583 100644
--- a/lib/PublicInbox/XapHelperCxx.pm
+++ b/lib/PublicInbox/XapHelperCxx.pm
@@ -34,6 +34,7 @@ my $ldflags = '-Wl,-O1';
 $ldflags .= ' -Wl,--compress-debug-sections=zlib' if $^O ne 'openbsd';
 my $xflags = ($ENV{CXXFLAGS} // '-Wall -ggdb3 -pipe') . ' ' .
 	' -DTHREADID=' . PublicInbox::Search::THREADID .
+	' -DSHARD_COST=' . PublicInbox::Search::SHARD_COST .
 	' -DXH_SPEC="'.join('',
 		map { s/=.*/:/; $_ } @PublicInbox::Search::XH_SPEC) . '" ' .
 	($ENV{LDFLAGS} // $ldflags);
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index 44e0d63e..c71ac06d 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -140,6 +140,7 @@ static int srch_eq(const struct srch *a, const struct srch *b)
 KHASHL_CSET_INIT(KH_LOCAL, srch_set, srch_set, struct srch *,
 		srch_hash, srch_eq)
 static srch_set *srch_cache;
+static long my_fd_max, shard_nfd;
 // sock_fd is modified in signal handler, yes, it's SOCK_SEQPACKET
 static volatile int sock_fd = STDIN_FILENO;
 static sigset_t fullset, workerset;
@@ -552,6 +553,34 @@ static bool is_chert(const char *dir)
 	return false;
 }
 
+static void srch_free(struct srch *srch)
+{
+	delete srch->qp;
+	delete srch->db;
+	free(srch);
+}
+
+static void srch_cache_renew(struct srch *keep)
+{
+	khint_t k;
+
+	// can't delete while iterating, so just free each + clear
+	for (k = kh_begin(srch_cache); k != kh_end(srch_cache); k++) {
+		if (!kh_exist(srch_cache, k)) continue;
+		struct srch *cur = kh_key(srch_cache, k);
+
+		if (cur != keep)
+			srch_free(cur);
+	}
+	srch_set_cs_clear(srch_cache);
+	if (keep) {
+		int absent;
+		k = srch_set_put(srch_cache, keep, &absent);
+		assert(absent);
+		assert(k < kh_end(srch_cache));
+	}
+}
+
 static bool srch_init(struct req *req)
 {
 	int i;
@@ -563,6 +592,13 @@ static bool srch_init(struct req *req)
 			Xapian::QueryParser::FLAG_WILDCARD;
 	if (is_chert(req->dirv[0]))
 		srch->qp_flags &= ~FLAG_PHRASE;
+	long nfd = req->dirc * SHARD_COST;
+
+	shard_nfd += nfd;
+	if (shard_nfd > my_fd_max) {
+		srch_cache_renew(srch);
+		shard_nfd = nfd;
+	}
 	try {
 		srch->db = new Xapian::Database(req->dirv[0]);
 	} catch (...) {
@@ -629,13 +665,6 @@ static void srch_init_extra(struct req *req)
 	req->srch->qp_extra_done = true;
 }
 
-static void srch_free(struct srch *srch)
-{
-	delete srch->qp;
-	delete srch->db;
-	free(srch);
-}
-
 static void dispatch(struct req *req)
 {
 	int c;
@@ -900,12 +929,7 @@ static void cleanup_all(void)
 	cleanup_pids();
 	if (!srch_cache)
 		return;
-
-	khint_t k;
-	for (k = kh_begin(srch_cache); k != kh_end(srch_cache); k++) {
-		if (kh_exist(srch_cache, k))
-			srch_free(kh_key(srch_cache, k));
-	}
+	srch_cache_renew(NULL);
 	srch_set_destroy(srch_cache);
 	srch_cache = NULL;
 }
@@ -1041,12 +1065,20 @@ int main(int argc, char *argv[])
 	socklen_t slen = (socklen_t)sizeof(c);
 	stdout_path = getenv("STDOUT_PATH");
 	stderr_path = getenv("STDERR_PATH");
+	struct rlimit rl;
 
 	if (getsockopt(sock_fd, SOL_SOCKET, SO_TYPE, &c, &slen))
 		err(EXIT_FAILURE, "getsockopt");
 	if (c != SOCK_SEQPACKET)
 		errx(EXIT_FAILURE, "stdin is not SOCK_SEQPACKET");
 
+	if (getrlimit(RLIMIT_NOFILE, &rl))
+		err(EXIT_FAILURE, "getrlimit");
+	my_fd_max = rl.rlim_cur;
+	if (my_fd_max < 72)
+		warnx("W: RLIMIT_NOFILE=%ld too low\n", my_fd_max);
+	my_fd_max -= 64;
+
 	mail_nrp_init();
 	code_nrp_init();
 	srch_cache = srch_set_init();
diff --git a/t/xap_helper.t b/t/xap_helper.t
index 78be8539..b0fa75a2 100644
--- a/t/xap_helper.t
+++ b/t/xap_helper.t
@@ -284,4 +284,27 @@ for my $n (@NO_CXX) {
 	}
 }
 
+SKIP: {
+	my $nr = $ENV{TEST_XH_FDMAX} or
+		skip 'TEST_XH_FDMAX unset', 1;
+	my @xhc = map {
+		local $ENV{PI_NO_CXX} = $_;
+		PublicInbox::XapClient::start_helper('-j0');
+	} @NO_CXX;
+	my $n = 1;
+	my $exp;
+	for (0..(PublicInbox::Search::ulimit_n() * $nr)) {
+		for my $xhc (@xhc) {
+			my $r = $xhc->mkreq([], qw(mset -Q), "tst$n=XTST$n",
+					@ibx_shard_args, qw(rt:0..));
+			chomp(my @res = readline($r));
+			$exp //= $res[0];
+			$exp eq $res[0] or
+				is $exp, $res[0], "mset mismatch on n=$n";
+			++$n;
+		}
+	}
+	ok $exp, "got expected entries ($n)";
+}
+
 done_testing;

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/9] fixes noticed while working on indexheader
@ 2024-05-19 21:55  7% Eric Wong
  2024-05-19 21:55  5% ` [PATCH 6/9] xap_helper: expire DB handles when FD table is near full Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

Still trying to figure out how to go about indexheader+altid
across -extindex, multiple inboxes, and lei.  But a bunch of
improvements were found to existing behavior and we shouldn't
have to worry about running out of FDs in xap_helper, anymore.

Introducing khashl.h seems like a big code import atm; but
hsearch(3) really sucks and std::map is too much alien-looking
C++ (and chained hash tables have poor locality).  The FUSE shim
will certainly be C (and not C++) and perhaps URCU (for
rculfhash) is too rare a dependency to count on, so having
khashl available would help there.

Eric Wong (9):
  config: dedupe ibx->{newsgroup}
  xap_helper: key search instances by -Q params, too
  xap_helper.h: use khashl.h instead of hsearch(3)
  xap_helper.h: use xcalloc to simplify error checking
  xap_helper.h: memoize Xapian handles with khashl
  xap_helper: expire DB handles when FD table is near full
  xap_helper: drop DB handles on EMFILE/ENFILE/etc...
  lei_saved_search: drop ->altid_map method
  www_text: fix /$INBOX/_/text/help/raw endpoint

 MANIFEST                          |   1 +
 lib/PublicInbox/Config.pm         |   4 +-
 lib/PublicInbox/ExtSearchIdx.pm   |   8 +-
 lib/PublicInbox/LeiSavedSearch.pm |   2 -
 lib/PublicInbox/Search.pm         |  16 +
 lib/PublicInbox/WwwText.pm        |   2 +-
 lib/PublicInbox/XapHelper.pm      |  48 ++-
 lib/PublicInbox/XapHelperCxx.pm   |   1 +
 lib/PublicInbox/khashl.h          | 502 ++++++++++++++++++++++++++++++
 lib/PublicInbox/xap_helper.h      | 273 +++++++++-------
 lib/PublicInbox/xh_cidx.h         |  79 +++--
 t/psgi_text.t                     |  21 +-
 t/xap_helper.t                    |  23 ++
 13 files changed, 818 insertions(+), 162 deletions(-)
 create mode 100644 lib/PublicInbox/khashl.h


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
2024-05-19 21:55  5% ` [PATCH 6/9] xap_helper: expire DB handles when FD table is near full Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).