user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 7/9] xap_helper: drop DB handles on EMFILE/ENFILE/etc...
  2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
@ 2024-05-19 21:55  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

This allows the process to recover in case we get the SHARD_COST
calculation wrong in case Xapian uses more FDs than expected in
new versions.  We'll no longer attempt to recover from ENOMEM
and similar errors during Xapian DB initialization and instead
just tear down the process (as we do in other places).
---
 lib/PublicInbox/XapHelper.pm | 27 +++++++++----
 lib/PublicInbox/xap_helper.h | 76 +++++++++++++++++-------------------
 2 files changed, 56 insertions(+), 47 deletions(-)

diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index db9e99ae..ba41b5d2 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -202,14 +202,27 @@ sub dispatch {
 			%SRCH = ();
 		}
 		my $first = shift @$dirs;
-		my $slow_phrase = -f "$first/iamchert";
-		$new->{xdb} = $X->{Database}->new($first);
-		for (@$dirs) {
-			$slow_phrase ||= -f "$_/iamchert";
-			$new->{xdb}->add_database($X->{Database}->new($_));
+		for my $retried (0, 1) {
+			my $slow_phrase = -f "$first/iamchert";
+			eval {
+				$new->{xdb} = $X->{Database}->new($first);
+				for (@$dirs) {
+					$slow_phrase ||= -f "$_/iamchert";
+					$new->{xdb}->add_database(
+							$X->{Database}->new($_))
+				}
+			};
+			last unless $@;
+			if ($retried) {
+				die "E: $@\n";
+			} else { # may be EMFILE/ENFILE/ENOMEM....
+				warn "W: $@, retrying...\n";
+				%SRCH = ();
+				$SHARD_NFD = $nfd;
+			}
+			$slow_phrase or $new->{qp_flags}
+				|= PublicInbox::Search::FLAG_PHRASE();
 		}
-		$slow_phrase or
-			$new->{qp_flags} |= PublicInbox::Search::FLAG_PHRASE();
 		bless $new, $req->{c} ? 'PublicInbox::CodeSearch' :
 					'PublicInbox::Search';
 		$new->{qp} = $new->qparse_new;
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index c71ac06d..831afdc6 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -581,17 +581,14 @@ static void srch_cache_renew(struct srch *keep)
 	}
 }
 
-static bool srch_init(struct req *req)
+static void srch_init(struct req *req)
 {
 	int i;
 	struct srch *srch = req->srch;
 	const unsigned FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE;
-	srch->qp_flags = FLAG_PHRASE |
-			Xapian::QueryParser::FLAG_BOOLEAN |
+	srch->qp_flags = Xapian::QueryParser::FLAG_BOOLEAN |
 			Xapian::QueryParser::FLAG_LOVEHATE |
 			Xapian::QueryParser::FLAG_WILDCARD;
-	if (is_chert(req->dirv[0]))
-		srch->qp_flags &= ~FLAG_PHRASE;
 	long nfd = req->dirc * SHARD_COST;
 
 	shard_nfd += nfd;
@@ -599,37 +596,42 @@ static bool srch_init(struct req *req)
 		srch_cache_renew(srch);
 		shard_nfd = nfd;
 	}
-	try {
-		srch->db = new Xapian::Database(req->dirv[0]);
-	} catch (...) {
-		warn("E: Xapian::Database(%s)", req->dirv[0]);
-		return false;
-	}
-	try {
-		for (i = 1; i < req->dirc; i++) {
-			const char *dir = req->dirv[i];
-			if (srch->qp_flags & FLAG_PHRASE && is_chert(dir))
+	for (int retried = 0; retried < 2; retried++) {
+		srch->qp_flags |= FLAG_PHRASE;
+		i = 0;
+		try {
+			srch->db = new Xapian::Database(req->dirv[i]);
+			if (is_chert(req->dirv[0]))
 				srch->qp_flags &= ~FLAG_PHRASE;
-			srch->db->add_database(Xapian::Database(dir));
+			for (i = 1; i < req->dirc; i++) {
+				const char *dir = req->dirv[i];
+				if (srch->qp_flags & FLAG_PHRASE &&
+						is_chert(dir))
+					srch->qp_flags &= ~FLAG_PHRASE;
+				srch->db->add_database(Xapian::Database(dir));
+			}
+			break;
+		} catch (const Xapian::Error & e) {
+			warnx("E: Xapian::Error: %s (%s)",
+				e.get_description().c_str(), req->dirv[i]);
+		} catch (...) { // does this happen?
+			warn("E: add_database(%s)", req->dirv[i]);
+		}
+		if (retried) {
+			errx(EXIT_FAILURE, "E: can't open %s", req->dirv[i]);
+		} else {
+			warnx("retrying...");
+			if (srch->db)
+				delete srch->db;
+			srch->db = NULL;
+			srch_cache_renew(srch);
 		}
-	} catch (...) {
-		warn("E: add_database(%s)", req->dirv[i]);
-		return false;
-	}
-	try {
-		srch->qp = new Xapian::QueryParser;
-	} catch (...) {
-		perror("E: Xapian::QueryParser");
-		return false;
 	}
+	// these will raise and die on ENOMEM or other errors
+	srch->qp = new Xapian::QueryParser;
 	srch->qp->set_default_op(Xapian::Query::OP_AND);
 	srch->qp->set_database(*srch->db);
-	try {
-		srch->qp->set_stemmer(Xapian::Stem("english"));
-	} catch (...) {
-		perror("E: Xapian::Stem");
-		return false;
-	}
+	srch->qp->set_stemmer(Xapian::Stem("english"));
 	srch->qp->set_stemming_strategy(Xapian::QueryParser::STEM_SOME);
 	srch->qp->SET_MAX_EXPANSION(100);
 
@@ -637,7 +639,6 @@ static bool srch_init(struct req *req)
 		qp_init_code_search(srch->qp); // CodeSearch.pm
 	else
 		qp_init_mail_search(srch->qp); // Search.pm
-	return true;
 }
 
 // setup query parser for altid and arbitrary headers
@@ -761,15 +762,12 @@ static void dispatch(struct req *req)
 	khint_t ki = srch_set_put(srch_cache, kbuf.srch, &absent);
 	assert(ki < kh_end(srch_cache));
 	req->srch = kh_key(srch_cache, ki);
-	if (!absent) { // reuse existing
+	if (absent) {
+		srch_init(req);
+	} else {
 		assert(req->srch != kbuf.srch);
 		srch_free(kbuf.srch);
 		req->srch->db->reopen();
-	} else if (!srch_init(req)) {
-		int gone = srch_set_del(srch_cache, ki);
-		assert(gone);
-		srch_free(kbuf.srch);
-		goto cmd_err; // srch_init already warned
 	}
 	if (req->qpfxc && !req->srch->qp_extra_done)
 		srch_init_extra(req);
@@ -786,8 +784,6 @@ static void dispatch(struct req *req)
 	}
 	if (req->timeout_sec)
 		alarm(0);
-cmd_err:
-	return; // just be silent on errors, for now
 }
 
 static void cleanup_pids(void)

^ permalink raw reply related	[relevance 5%]

* [PATCH 0/9] fixes noticed while working on indexheader
@ 2024-05-19 21:55  7% Eric Wong
  2024-05-19 21:55  5% ` [PATCH 7/9] xap_helper: drop DB handles on EMFILE/ENFILE/etc Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

Still trying to figure out how to go about indexheader+altid
across -extindex, multiple inboxes, and lei.  But a bunch of
improvements were found to existing behavior and we shouldn't
have to worry about running out of FDs in xap_helper, anymore.

Introducing khashl.h seems like a big code import atm; but
hsearch(3) really sucks and std::map is too much alien-looking
C++ (and chained hash tables have poor locality).  The FUSE shim
will certainly be C (and not C++) and perhaps URCU (for
rculfhash) is too rare a dependency to count on, so having
khashl available would help there.

Eric Wong (9):
  config: dedupe ibx->{newsgroup}
  xap_helper: key search instances by -Q params, too
  xap_helper.h: use khashl.h instead of hsearch(3)
  xap_helper.h: use xcalloc to simplify error checking
  xap_helper.h: memoize Xapian handles with khashl
  xap_helper: expire DB handles when FD table is near full
  xap_helper: drop DB handles on EMFILE/ENFILE/etc...
  lei_saved_search: drop ->altid_map method
  www_text: fix /$INBOX/_/text/help/raw endpoint

 MANIFEST                          |   1 +
 lib/PublicInbox/Config.pm         |   4 +-
 lib/PublicInbox/ExtSearchIdx.pm   |   8 +-
 lib/PublicInbox/LeiSavedSearch.pm |   2 -
 lib/PublicInbox/Search.pm         |  16 +
 lib/PublicInbox/WwwText.pm        |   2 +-
 lib/PublicInbox/XapHelper.pm      |  48 ++-
 lib/PublicInbox/XapHelperCxx.pm   |   1 +
 lib/PublicInbox/khashl.h          | 502 ++++++++++++++++++++++++++++++
 lib/PublicInbox/xap_helper.h      | 273 +++++++++-------
 lib/PublicInbox/xh_cidx.h         |  79 +++--
 t/psgi_text.t                     |  21 +-
 t/xap_helper.t                    |  23 ++
 13 files changed, 818 insertions(+), 162 deletions(-)
 create mode 100644 lib/PublicInbox/khashl.h


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
2024-05-19 21:55  5% ` [PATCH 7/9] xap_helper: drop DB handles on EMFILE/ENFILE/etc Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).