user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS]
  2021-02-02 11:46  7% [PATCH 00/16] lei: -I/--include and more Eric Wong
@ 2021-02-02 11:47  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-02-02 11:47 UTC (permalink / raw)
  To: meta

This comma-delimited parameter allows controlling the number or
lei_xsearch and lei2mail worker processes.  With the change
to make IPC wq_* work use the event loop, it's now safe to
run fewer worker processes for searching with no risk of
deadlocks.

MAX_PER_HOST isn't configurable yet for remote hosts,
and maybe it shouldn't be due to potential for abuse.
---
 lib/PublicInbox/IPC.pm        | 19 +++++++++++++++++++
 lib/PublicInbox/LEI.pm        |  5 ++++-
 lib/PublicInbox/LeiQuery.pm   | 14 ++++++++++++--
 lib/PublicInbox/LeiXSearch.pm |  1 -
 lib/PublicInbox/V2Writable.pm | 22 ++--------------------
 5 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/lib/PublicInbox/IPC.pm b/lib/PublicInbox/IPC.pm
index 50de1bed..3873649b 100644
--- a/lib/PublicInbox/IPC.pm
+++ b/lib/PublicInbox/IPC.pm
@@ -466,4 +466,23 @@ sub DESTROY {
 # Sereal doesn't have dclone
 sub deep_clone { ipc_thaw(ipc_freeze($_[-1])) }
 
+sub detect_nproc () {
+	# _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
+	return POSIX::sysconf(84) if $^O eq 'linux';
+	return POSIX::sysconf(58) if $^O eq 'freebsd';
+	# TODO: more OSes
+
+	# getconf(1) is POSIX, but *NPROCESSORS* vars are not
+	for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
+		`getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
+	}
+	for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
+		`$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
+	}
+
+	# should we bother with `sysctl hw.ncpu`?  Those only give
+	# us total processor count, not online processor count.
+	undef
+}
+
 1;
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 2c512c5e..9afc90cf 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -104,7 +104,7 @@ our %CMD = ( # sorted in order of importance/use:
 'q' => [ 'SEARCH_TERMS...', 'search for messages matching terms', qw(
 	save-as=s output|mfolder|o=s format|f=s dedupe|d=s thread|t augment|a
 	sort|s=s reverse|r offset=i remote! local! external! pretty
-	include|I=s@ exclude=s@ only=s@
+	include|I=s@ exclude=s@ only=s@ jobs|j=s
 	mua-cmd|mua=s no-torsocks torsocks=s verbose|v quiet|q
 	received-after=s received-before=s sent-after=s sent-since=s),
 	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
@@ -236,6 +236,9 @@ my %OPTDESC = (
 'q	only=s@' => [ 'URL_OR_PATHNAME',
 		'only use specified external(s) for search' ],
 
+'q	jobs=s'	=> [ '[SEARCH_JOBS][,WRITER_JOBS]',
+		'control number of search and writer jobs' ],
+
 'ls-query	format|f=s' => $ls_format,
 'ls-external	format|f=s' => $ls_format,
 
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index ca214ca1..72a67c24 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -17,6 +17,7 @@ sub lei_q {
 	my ($self, @argv) = @_;
 	require PublicInbox::LeiXSearch;
 	require PublicInbox::LeiOverview;
+	require PublicInbox::V2Writable;
 	PublicInbox::Config->json; # preload before forking
 	my $opt = $self->{opt};
 	# prepare any number of LeiXSearch || LeiSearch || Inbox || URL
@@ -53,13 +54,22 @@ sub lei_q {
 	unless ($lxs->locals || $lxs->remotes) {
 		return $self->fail('no local or remote inboxes to search');
 	}
-	my $xj = $lxs->concurrency($opt);
+	my ($xj, $mj) = split(/,/, $opt->{jobs} // '');
+	if (defined($xj) && $xj ne '' && $xj !~ /\A[1-9][0-9]*\z/) {
+		return $self->fail("`$xj' search jobs must be >= 1");
+	}
+	$xj ||= $lxs->concurrency($opt); # allow: "--jobs ,$WRITER_ONLY"
+	my $nproc = $lxs->detect_nproc; # don't memoize, schedtool(1) exists
+	$xj = $nproc if $xj > $nproc;
 	PublicInbox::LeiOverview->new($self) or return;
 	$self->atfork_prepare_wq($lxs);
 	$lxs->wq_workers_start('lei_xsearch', $xj, $self->oldset);
 	delete $lxs->{-ipc_atfork_child_close};
 	if (my $l2m = $self->{l2m}) {
-		my $mj = 4; # TODO: configurable
+		if (defined($mj) && $mj !~ /\A[1-9][0-9]*\z/) {
+			return $self->fail("`$mj' writer jobs must be >= 1");
+		}
+		$mj //= $nproc;
 		$self->atfork_prepare_wq($l2m);
 		$l2m->wq_workers_start('lei2mail', $mj, $self->oldset);
 		delete $l2m->{-ipc_atfork_child_close};
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 57a18075..37bd233e 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -350,7 +350,6 @@ sub do_post_augment {
 }
 
 my $MAX_PER_HOST = 4;
-sub MAX_PER_HOST { $MAX_PER_HOST }
 
 sub concurrency {
 	my ($self, $opt) = @_;
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index 35b7fe30..cbd4f003 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -8,6 +8,7 @@ use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
 use PublicInbox::SearchIdxShard;
+use PublicInbox::IPC;
 use PublicInbox::Eml;
 use PublicInbox::Git;
 use PublicInbox::Import;
@@ -35,32 +36,13 @@ our $PACKING_FACTOR = 0.4;
 # to increase Xapian shards
 our $NPROC_MAX_DEFAULT = 4;
 
-sub detect_nproc () {
-	# _SC_NPROCESSORS_ONLN = 84 on both Linux glibc and musl
-	return POSIX::sysconf(84) if $^O eq 'linux';
-	return POSIX::sysconf(58) if $^O eq 'freebsd';
-	# TODO: more OSes
-
-	# getconf(1) is POSIX, but *NPROCESSORS* vars are not
-	for (qw(_NPROCESSORS_ONLN NPROCESSORS_ONLN)) {
-		`getconf $_ 2>/dev/null` =~ /^(\d+)$/ and return $1;
-	}
-	for my $nproc (qw(nproc gnproc)) { # GNU coreutils nproc
-		`$nproc 2>/dev/null` =~ /^(\d+)$/ and return $1;
-	}
-
-	# should we bother with `sysctl hw.ncpu`?  Those only give
-	# us total processor count, not online processor count.
-	undef
-}
-
 sub nproc_shards ($) {
 	my ($creat_opt) = @_;
 	my $n = $creat_opt->{nproc} if ref($creat_opt) eq 'HASH';
 	$n //= $ENV{NPROC};
 	if (!$n) {
 		# assume 2 cores if not detectable or zero
-		state $NPROC_DETECTED = detect_nproc() || 2;
+		state $NPROC_DETECTED = PublicInbox::IPC::detect_nproc() || 2;
 		$n = $NPROC_DETECTED;
 		$n = $NPROC_MAX_DEFAULT if $n > $NPROC_MAX_DEFAULT;
 	}

^ permalink raw reply related	[relevance 5%]

* [PATCH 00/16] lei: -I/--include and more
@ 2021-02-02 11:46  7% Eric Wong
  2021-02-02 11:47  5% ` [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS] Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-02-02 11:46 UTC (permalink / raw)
  To: meta

We're further embracing SOCK_SEQPACKET for progress reporting.
There's numerous cleanups for the oneshot case, but that's still
using worker processes.  Worker-less oneshot seems pretty-broken
atm, but 16/16 will let us work on it more easily.

Eric Wong (16):
  lei: switch to use SEQPACKET socketpair instead of pipe
  lei_query: default to 10000 messages as documented
  lei q: emit progress and counting via PktOp
  lei q: support --only, --include and --exclude
  lei: complete: do not complete non-arg options w/ help text
  lei: q: shell completion for --(include|exclude|only)
  lei_xsearch: truncate curl stderr after reading it
  lib: explicitly distinguish oneshot use
  lei q: do not leave temporary files after oneshot exit
  cmd_ipc4: fix comments and formatting
  pktop: fix potential undefined var
  lei_xsearch: ensure curl.err and tail(1) cleanup happens
  doc: lei-q: note "-a" and link to Xapian QueryParser
  lei_overview: avoid unnecessary {l2m} delete
  lei q: tidy up progress reporting
  lei q: support --jobs [SEARCHERS],[WRITERS]

 Documentation/lei-q.pod        |  5 +-
 MANIFEST                       |  2 +-
 lib/PublicInbox/CmdIPC4.pm     |  7 ++-
 lib/PublicInbox/IPC.pm         | 42 +++++++++++++----
 lib/PublicInbox/LEI.pm         | 60 +++++++++++++++---------
 lib/PublicInbox/LeiExternal.pm | 12 ++---
 lib/PublicInbox/LeiOverview.pm | 15 +++---
 lib/PublicInbox/LeiQuery.pm    | 77 ++++++++++++++++++++++++-------
 lib/PublicInbox/LeiXSearch.pm  | 83 ++++++++++++++++++++++++----------
 lib/PublicInbox/OpPipe.pm      | 41 -----------------
 lib/PublicInbox/PktOp.pm       | 69 ++++++++++++++++++++++++++++
 lib/PublicInbox/V2Writable.pm  | 22 +--------
 t/lei.t                        | 14 ++++--
 t/lei_external.t               |  2 +-
 xt/lei-sigpipe.t               | 29 ++++++++++--
 15 files changed, 318 insertions(+), 162 deletions(-)
 delete mode 100644 lib/PublicInbox/OpPipe.pm
 create mode 100644 lib/PublicInbox/PktOp.pm


^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-02-02 11:46  7% [PATCH 00/16] lei: -I/--include and more Eric Wong
2021-02-02 11:47  5% ` [PATCH 16/16] lei q: support --jobs [SEARCHERS],[WRITERS] Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).