user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/9] fixes noticed while working on indexheader
@ 2024-05-19 21:55  7% Eric Wong
  2024-05-19 21:55  5% ` [PATCH 2/9] xap_helper: key search instances by -Q params, too Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

Still trying to figure out how to go about indexheader+altid
across -extindex, multiple inboxes, and lei.  But a bunch of
improvements were found to existing behavior and we shouldn't
have to worry about running out of FDs in xap_helper, anymore.

Introducing khashl.h seems like a big code import atm; but
hsearch(3) really sucks and std::map is too much alien-looking
C++ (and chained hash tables have poor locality).  The FUSE shim
will certainly be C (and not C++) and perhaps URCU (for
rculfhash) is too rare a dependency to count on, so having
khashl available would help there.

Eric Wong (9):
  config: dedupe ibx->{newsgroup}
  xap_helper: key search instances by -Q params, too
  xap_helper.h: use khashl.h instead of hsearch(3)
  xap_helper.h: use xcalloc to simplify error checking
  xap_helper.h: memoize Xapian handles with khashl
  xap_helper: expire DB handles when FD table is near full
  xap_helper: drop DB handles on EMFILE/ENFILE/etc...
  lei_saved_search: drop ->altid_map method
  www_text: fix /$INBOX/_/text/help/raw endpoint

 MANIFEST                          |   1 +
 lib/PublicInbox/Config.pm         |   4 +-
 lib/PublicInbox/ExtSearchIdx.pm   |   8 +-
 lib/PublicInbox/LeiSavedSearch.pm |   2 -
 lib/PublicInbox/Search.pm         |  16 +
 lib/PublicInbox/WwwText.pm        |   2 +-
 lib/PublicInbox/XapHelper.pm      |  48 ++-
 lib/PublicInbox/XapHelperCxx.pm   |   1 +
 lib/PublicInbox/khashl.h          | 502 ++++++++++++++++++++++++++++++
 lib/PublicInbox/xap_helper.h      | 273 +++++++++-------
 lib/PublicInbox/xh_cidx.h         |  79 +++--
 t/psgi_text.t                     |  21 +-
 t/xap_helper.t                    |  23 ++
 13 files changed, 818 insertions(+), 162 deletions(-)
 create mode 100644 lib/PublicInbox/khashl.h


^ permalink raw reply	[relevance 7%]

* [PATCH 2/9] xap_helper: key search instances by -Q params, too
  2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
@ 2024-05-19 21:55  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2024-05-19 21:55 UTC (permalink / raw)
  To: meta

In addition to the shards which comprise the xap_helper search
instance, we also account for changes in altid and indexheader
in case xap_helper lifetime exceeds the given
PublicInbox::Config.

xap_helper will be Config lifetime agnostic since it's possible
to run -netd and -httpd instances with multiple Config files,
but a single xap_helper instance (with workers) should be able
to service all of them.
---
 lib/PublicInbox/XapHelper.pm |  3 ++-
 lib/PublicInbox/xap_helper.h | 45 +++++++++++++++++++-----------------
 2 files changed, 26 insertions(+), 22 deletions(-)

diff --git a/lib/PublicInbox/XapHelper.pm b/lib/PublicInbox/XapHelper.pm
index c9957f64..f1311bd4 100644
--- a/lib/PublicInbox/XapHelper.pm
+++ b/lib/PublicInbox/XapHelper.pm
@@ -190,7 +190,8 @@ sub dispatch {
 	$GLP->getoptionsfromarray(\@argv, $req, @PublicInbox::Search::XH_SPEC)
 		or return;
 	my $dirs = delete $req->{d} or die 'no -d args';
-	my $key = join("\0", @$dirs);
+	my $key = "-d\0".join("\0-d\0", @$dirs);
+	$key .= "\0".join("\0", map { ('-Q', $_) } @{$req->{Q}}) if $req->{Q};
 	my $new;
 	$req->{srch} = $SRCH{$key} //= do {
 		$new = { qp_flags => $PublicInbox::Search::QP_FLAGS };
diff --git a/lib/PublicInbox/xap_helper.h b/lib/PublicInbox/xap_helper.h
index a30a8768..8bfd7ab6 100644
--- a/lib/PublicInbox/xap_helper.h
+++ b/lib/PublicInbox/xap_helper.h
@@ -112,12 +112,12 @@ enum exc_iter {
 };
 
 struct srch {
-	int paths_len; // int for comparisons
+	int ckey_len; // int for comparisons
 	unsigned qp_flags;
 	bool qp_extra_done;
 	Xapian::Database *db;
 	Xapian::QueryParser *qp;
-	char paths[]; // $shard_path0\0$shard_path1\0...
+	char ckey[]; // $shard_path0\0$shard_path1\0...
 };
 
 #define MY_ARG_MAX 256
@@ -128,6 +128,7 @@ struct req { // argv and pfxv point into global rbuf
 	char *argv[MY_ARG_MAX];
 	char *pfxv[MY_ARG_MAX]; // -A <prefix>
 	char *qpfxv[MY_ARG_MAX]; // -Q <user_prefix>[:=]<INTERNAL_PREFIX>
+	char *dirv[MY_ARG_MAX]; // -d /path/to/XDB(shard)
 	size_t *lenv; // -A <prefix>LENGTH
 	struct srch *srch;
 	char *Pgit_dir;
@@ -139,9 +140,7 @@ struct req { // argv and pfxv point into global rbuf
 	unsigned long timeout_sec;
 	size_t nr_out;
 	long sort_col; // value column, negative means BoolWeight
-	int argc;
-	int pfxc;
-	int qpfxc;
+	int argc, pfxc, qpfxc, dirc;
 	FILE *fp[2]; // [0] response pipe or sock, [1] status/errors (optional)
 	bool has_input; // fp[0] is bidirectional
 	bool collapse_threads;
@@ -516,9 +515,9 @@ static int srch_cmp(const void *pa, const void *pb) // for tfind|tsearch
 {
 	const struct srch *a = (const struct srch *)pa;
 	const struct srch *b = (const struct srch *)pb;
-	int diff = a->paths_len - b->paths_len;
+	int diff = a->ckey_len - b->ckey_len;
 
-	return diff ? diff : memcmp(a->paths, b->paths, (size_t)a->paths_len);
+	return diff ? diff : memcmp(a->ckey, b->ckey, (size_t)a->ckey_len);
 }
 
 static bool is_chert(const char *dir)
@@ -536,31 +535,30 @@ static bool is_chert(const char *dir)
 
 static bool srch_init(struct req *req)
 {
-	char *dirv[MY_ARG_MAX];
 	int i;
 	struct srch *srch = req->srch;
-	int dirc = (int)SPLIT2ARGV(dirv, srch->paths, (size_t)srch->paths_len);
 	const unsigned FLAG_PHRASE = Xapian::QueryParser::FLAG_PHRASE;
 	srch->qp_flags = FLAG_PHRASE |
 			Xapian::QueryParser::FLAG_BOOLEAN |
 			Xapian::QueryParser::FLAG_LOVEHATE |
 			Xapian::QueryParser::FLAG_WILDCARD;
-	if (is_chert(dirv[0]))
+	if (is_chert(req->dirv[0]))
 		srch->qp_flags &= ~FLAG_PHRASE;
 	try {
-		srch->db = new Xapian::Database(dirv[0]);
+		srch->db = new Xapian::Database(req->dirv[0]);
 	} catch (...) {
-		warn("E: Xapian::Database(%s)", dirv[0]);
+		warn("E: Xapian::Database(%s)", req->dirv[0]);
 		return false;
 	}
 	try {
-		for (i = 1; i < dirc; i++) {
-			if (srch->qp_flags & FLAG_PHRASE && is_chert(dirv[i]))
+		for (i = 1; i < req->dirc; i++) {
+			const char *dir = req->dirv[i];
+			if (srch->qp_flags & FLAG_PHRASE && is_chert(dir))
 				srch->qp_flags &= ~FLAG_PHRASE;
-			srch->db->add_database(Xapian::Database(dirv[i]));
+			srch->db->add_database(Xapian::Database(dir));
 		}
 	} catch (...) {
-		warn("E: add_database(%s)", dirv[i]);
+		warn("E: add_database(%s)", req->dirv[i]);
 		return false;
 	}
 	try {
@@ -644,7 +642,7 @@ static void dispatch(struct req *req)
 	kfp = open_memstream(&kbuf.ptr, &size);
 	if (!kfp) err(EXIT_FAILURE, "open_memstream(kbuf)");
 	// write padding, first (contents don't matter)
-	fwrite(&req->argv[0], offsetof(struct srch, paths), 1, kfp);
+	fwrite(&req->argv[0], offsetof(struct srch, ckey), 1, kfp);
 
 	// global getopt variables:
 	optopt = 0;
@@ -656,7 +654,11 @@ static void dispatch(struct req *req)
 		switch (c) {
 		case 'a': req->asc = true; break;
 		case 'c': req->code_search = true; break;
-		case 'd': fwrite(optarg, strlen(optarg) + 1, 1, kfp); break;
+		case 'd':
+			req->dirv[req->dirc++] = optarg;
+			if (MY_ARG_MAX == req->dirc) ABORT("too many -d");
+			fprintf(kfp, "-d%c%s%c", 0, optarg, 0);
+			break;
 		case 'g': req->Pgit_dir = optarg - 1; break; // pad "P" prefix
 		case 'k':
 			req->sort_col = strtol(optarg, &end, 10);
@@ -696,6 +698,7 @@ static void dispatch(struct req *req)
 		case 'Q':
 			req->qpfxv[req->qpfxc++] = optarg;
 			if (MY_ARG_MAX == req->qpfxc) ABORT("too many -Q");
+			fprintf(kfp, "-Q%c%s%c", 0, optarg, 0);
 			break;
 		default: ABORT("bad switch `-%c'", c);
 		}
@@ -704,9 +707,9 @@ static void dispatch(struct req *req)
 	kbuf.srch->db = NULL;
 	kbuf.srch->qp = NULL;
 	kbuf.srch->qp_extra_done = false;
-	kbuf.srch->paths_len = size - offsetof(struct srch, paths);
-	if (kbuf.srch->paths_len <= 0)
-		ABORT("no -d args");
+	kbuf.srch->ckey_len = size - offsetof(struct srch, ckey);
+	if (kbuf.srch->ckey_len <= 0 || !req->dirc)
+		ABORT("no -d args (or too many)");
 	s = (struct srch **)tsearch(kbuf.srch, &srch_tree, srch_cmp);
 	if (!s) err(EXIT_FAILURE, "tsearch"); // likely ENOMEM
 	req->srch = *s;

^ permalink raw reply related	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2024-05-19 21:55  7% [PATCH 0/9] fixes noticed while working on indexheader Eric Wong
2024-05-19 21:55  5% ` [PATCH 2/9] xap_helper: key search instances by -Q params, too Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).