user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 12/12] lei q: improve --limit behavior and progress
  2021-09-21  7:41  7% [PATCH 00/12] lei: fix various annoyances Eric Wong
@ 2021-09-21  7:41  4% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-09-21  7:41 UTC (permalink / raw)
  To: meta

Avoid slurping gigantic (e.g. 100000) result sets into a single
response if a giant limit is specified, and instead use 10000
as a window for the mset with a given offset.  We'll also warn
and hint towards about the --limit= switch when the estimated
result set is larger than the default limit.
---
 Documentation/lei-q.pod           |  6 ++++--
 lib/PublicInbox/LeiLcat.pm        |  3 +--
 lib/PublicInbox/LeiQuery.pm       |  9 +++++++--
 lib/PublicInbox/LeiSavedSearch.pm | 18 +++++++++++-------
 lib/PublicInbox/LeiUp.pm          | 25 +++++++++----------------
 lib/PublicInbox/LeiXSearch.pm     | 18 +++++++++++++++---
 6 files changed, 47 insertions(+), 32 deletions(-)

diff --git a/Documentation/lei-q.pod b/Documentation/lei-q.pod
index 2823ced8..e1e3666d 100644
--- a/Documentation/lei-q.pod
+++ b/Documentation/lei-q.pod
@@ -10,7 +10,7 @@ lei q [OPTIONS] (--stdin|-)
 
 =head1 DESCRIPTION
 
-Search for messages across the lei store and externals.
+Search for messages across the lei/store and externals.
 
 =for comment
 TODO: Give common prefixes, or at least a description/reference.
@@ -192,7 +192,9 @@ Default: fcntl,dotlock
 
 =item -n NUMBER
 
-Limit the number of matches.
+Fuzzy limit the number of matches per-local external and lei/store.
+Messages added by the L<--threads> switch do not count towards this
+limit, and there is no limit on remote externals.
 
 Default: 10000
 
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index c13e2153..d553b187 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -144,9 +144,8 @@ sub lei_lcat {
 	$lei->ale->refresh_externals($lxs, $lei);
 	$lei->_lei_store(1);
 	my $opt = $lei->{opt};
-	my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset);
+	my %mset_opt;
 	$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
-	$mset_opt{limit} //= 10000;
 	$opt->{sort} //= 'relevance';
 	$mset_opt{relevance} = 1;
 	$lei->{mset_opt} = \%mset_opt;
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index d5f132f1..cb5ac8fb 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -41,6 +41,12 @@ sub _start_query { # used by "lei q" and "lei up"
 
 	# descending docid order is cheapest, MUA controls sorting order
 	$self->{mset_opt}->{relevance} //= -2 if $l2m || $opt->{threads};
+
+	my $tot = $self->{mset_opt}->{total} //= $self->{opt}->{limit} // 10000;
+	$self->{mset_opt}->{limit} = $tot > 10000 ? 10000 : $tot;
+	$self->{mset_opt}->{offset} //= 0;
+	$self->{mset_opt}->{threads} //= $opt->{threads};
+
 	if ($self->{net}) {
 		require PublicInbox::LeiAuth;
 		$self->{auth} = PublicInbox::LeiAuth->new
@@ -118,9 +124,8 @@ sub lei_q {
 	my $lxs = lxs_prepare($self) or return;
 	$self->ale->refresh_externals($lxs, $self);
 	my $opt = $self->{opt};
-	my %mset_opt = map { $_ => $opt->{$_} } qw(threads limit offset);
+	my %mset_opt;
 	$mset_opt{asc} = $opt->{'reverse'} ? 1 : 0;
-	$mset_opt{limit} //= 10000;
 	if (defined(my $sort = $opt->{'sort'})) {
 		if ($sort eq 'relevance') {
 			$mset_opt{relevance} = 1;
diff --git a/lib/PublicInbox/LeiSavedSearch.pm b/lib/PublicInbox/LeiSavedSearch.pm
index 637456e4..3e10f780 100644
--- a/lib/PublicInbox/LeiSavedSearch.pm
+++ b/lib/PublicInbox/LeiSavedSearch.pm
@@ -29,6 +29,8 @@ sub BOOL_FIELDS () {
 	qw(external local remote import-remote import-before threads)
 }
 
+sub SINGLE_FIELDS () { qw(limit dedupe output) }
+
 sub lss_dir_for ($$;$) {
 	my ($lei, $dstref, $on_fs) = @_;
 	my $pfx;
@@ -89,9 +91,9 @@ sub list {
 	} @$out
 }
 
-sub translate_dedupe ($$$) {
-	my ($self, $lei, $dd) = @_;
-	$dd //= 'content';
+sub translate_dedupe ($$) {
+	my ($self, $lei) = @_;
+	my $dd = $lei->{opt}->{dedupe} // 'content';
 	return 1 if $dd eq 'content'; # the default
 	return $self->{"-dedupe_$dd"} = 1 if ($dd eq 'oid' || $dd eq 'mid');
 	$lei->fail("--dedupe=$dd requires --no-save");
@@ -128,8 +130,7 @@ sub new { # new saved search "lei q --save"
 	File::Path::make_path($dir); # raises on error
 	$self->{-cfg} = {};
 	my $f = $self->{'-f'} = "$dir/lei.saved-search";
-	my $dd = $lei->{opt}->{dedupe};
-	translate_dedupe($self, $lei, $dd) or return;
+	translate_dedupe($self, $lei) or return;
 	open my $fh, '>', $f or return $lei->fail("open $f: $!");
 	my $sq_dst = PublicInbox::Config::squote_maybe($dst);
 	my $q = $lei->{mset_opt}->{q_raw} // die 'BUG: {q_raw} missing';
@@ -139,15 +140,14 @@ sub new { # new saved search "lei q --save"
 		$q = "\tq = ".cquote_val($q);
 	}
 	$dst = "$lei->{ovv}->{fmt}:$dst" if $dst !~ m!\Aimaps?://!i;
+	$lei->{opt}->{output} = $dst;
 	print $fh <<EOM;
 ; to refresh with new results, run: lei up $sq_dst
 ; `maxuid' and `lastresult' lines are maintained by "lei up" for optimization
 [lei]
 $q
 [lei "q"]
-	output = $dst
 EOM
-	print $fh "\tdedupe = $dd\n" if $dd;
 	for my $k (ARRAY_FIELDS) {
 		my $ary = $lei->{opt}->{$k} // next;
 		for my $x (@$ary) {
@@ -158,6 +158,10 @@ EOM
 		my $val = $lei->{opt}->{$k} // next;
 		print $fh "\t$k = ".($val ? 1 : 0)."\n";
 	}
+	for my $k (SINGLE_FIELDS) {
+		my $val = $lei->{opt}->{$k} // next;
+		print $fh "\t$k = $val\n";
+	}
 	close($fh) or return $lei->fail("close $f: $!");
 	$self->{lock_path} = "$self->{-f}.flock";
 	$self->{-ovf} = "$dir/over.sqlite3";
diff --git a/lib/PublicInbox/LeiUp.pm b/lib/PublicInbox/LeiUp.pm
index abb05d46..89cf0112 100644
--- a/lib/PublicInbox/LeiUp.pm
+++ b/lib/PublicInbox/LeiUp.pm
@@ -18,7 +18,6 @@ sub up1 ($$) {
 	my $lss = PublicInbox::LeiSavedSearch->up($lei, $out) or return;
 	my $f = $lss->{'-f'};
 	my $mset_opt = $lei->{mset_opt} = { relevance => -2 };
-	$mset_opt->{limit} = $lei->{opt}->{limit} // 10000;
 	my $q = $mset_opt->{q_raw} = $lss->{-cfg}->{'lei.q'} //
 				return $lei->fail("lei.q unset in $f");
 	my $lse = $lei->{lse} // die 'BUG: {lse} missing';
@@ -27,24 +26,18 @@ sub up1 ($$) {
 	} else {
 		$lse->query_approxidate($lse->git, $mset_opt->{qstr} = $q);
 	}
-	my $o = $lei->{opt}->{output} = $lss->{-cfg}->{'lei.q.output'} //
-		return $lei->fail("lei.q.output unset in $f");
-	ref($o) and return $lei->fail("multiple values of lei.q.output in $f");
-	if (defined(my $dd = $lss->{-cfg}->{'lei.q.dedupe'})) {
-		$lss->translate_dedupe($lei, $dd) or return;
-		$lei->{opt}->{dedupe} = $dd;
-	}
-	for my $k (qw(only include exclude)) {
+	# n.b. only a few CLI args are accepted for "up", so //= usually sets
+	for my $k ($lss->ARRAY_FIELDS) {
 		my $v = $lss->{-cfg}->get_all("lei.q.$k") // next;
-		$lei->{opt}->{$k} = $v;
+		$lei->{opt}->{$k} //= $v;
 	}
-	for my $k (qw(external local remote
-			import-remote import-before threads)) {
-		my $c = "lei.q.$k";
-		my $v = $lss->{-cfg}->{$c} // next;
-		ref($v) and return $lei->fail("multiple values of $c in $f");
-		$lei->{opt}->{$k} = $v;
+	for my $k ($lss->BOOL_FIELDS, $lss->SINGLE_FIELDS) {
+		my $v = $lss->{-cfg}->get_1('lei.q', $k) // next;
+		$lei->{opt}->{$k} //= $v;
 	}
+	my $o = $lei->{opt}->{output} // '';
+	return $lei->fail("lei.q.output unset in $f") if $o eq '';
+	$lss->translate_dedupe($lei) or return;
 	$lei->{lss} = $lss; # for LeiOverview->new and query_remote_mboxrd
 	my $lxs = $lei->lxs_prepare or return;
 	$lei->ale->refresh_externals($lxs, $lei);
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 2227c2ac..584ffde9 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -110,10 +110,20 @@ sub recent {
 
 sub over {}
 
+sub _check_mset_limit ($$$) {
+	my ($lei, $desc, $mset) = @_;
+	return if defined($lei->{opt}->{limit}); # user requested limit
+	my $est = $mset->get_matches_estimated;
+	my $tot = $lei->{mset_opt}->{total};
+	$est > $tot and $lei->qerr(<<"");
+# $desc estimated matches ($est) exceeds default --limit=$tot
+
+}
+
 sub _mset_more ($$) {
 	my ($mset, $mo) = @_;
 	my $size = $mset->size;
-	$size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{limit});
+	$size >= $mo->{limit} && (($mo->{offset} += $size) < $mo->{total});
 }
 
 # $startq will EOF when do_augment is done augmenting and allow
@@ -182,7 +192,7 @@ sub query_one_mset { # for --threads and l2m w/o sort
 	my $first_ids;
 	do {
 		$mset = $srch->mset($mo->{qstr}, $mo);
-		mset_progress($lei, $dir, $mset->size,
+		mset_progress($lei, $dir, $mo->{offset} + $mset->size,
 				$mset->get_matches_estimated);
 		wait_startq($lei); # wait for keyword updates
 		my $ids = $srch->mset_to_artnums($mset, $mo);
@@ -222,6 +232,7 @@ sub query_one_mset { # for --threads and l2m w/o sort
 			}
 		}
 	} while (_mset_more($mset, $mo));
+	_check_mset_limit($lei, $dir, $mset);
 	if ($lss && scalar(@$first_ids)) {
 		undef $stop_at;
 		my $max = $first_ids->[0];
@@ -244,7 +255,7 @@ sub query_combined_mset { # non-parallel for non-"--threads" users
 	my $each_smsg = $lei->{ovv}->ovv_each_smsg_cb($lei);
 	do {
 		$mset = $self->mset($mo->{qstr}, $mo);
-		mset_progress($lei, 'xsearch', $mset->size,
+		mset_progress($lei, 'xsearch', $mo->{offset} + $mset->size,
 				$mset->get_matches_estimated);
 		wait_startq($lei); # wait for keyword updates
 		for my $mitem ($mset->items) {
@@ -252,6 +263,7 @@ sub query_combined_mset { # non-parallel for non-"--threads" users
 			$each_smsg->($smsg, $mitem);
 		}
 	} while (_mset_more($mset, $mo));
+	_check_mset_limit($lei, 'xsearch', $mset);
 	undef $each_smsg; # may commit
 	$lei->{ovv}->ovv_atexit_child($lei);
 }

^ permalink raw reply related	[relevance 4%]

* [PATCH 00/12] lei: fix various annoyances
@ 2021-09-21  7:41  7% Eric Wong
  2021-09-21  7:41  4% ` [PATCH 12/12] lei q: improve --limit behavior and progress Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-09-21  7:41 UTC (permalink / raw)
  To: meta

Eric Wong (12):
  lei inspect: convert to WQ worker
  lei inspect: support NNTP URLs
  lei_mail_sync: account for non-unique cases
  lei: simplify internal arg2folder usage
  lei lcat: use single queue for ordering
  doc: lei-security: section for WIP auth methods
  lei lcat: support NNTP URLs
  lei: various completion improvements
  lei q: show progress on >1s preparation phase
  search: drop reopen retry message
  lei q: update messages to reflect --save default
  lei q: improve --limit behavior and progress

 Documentation/lei-q.pod               |   6 +-
 Documentation/lei-security.pod        |   8 ++
 lib/PublicInbox/LEI.pm                |   2 +-
 lib/PublicInbox/LeiExportKw.pm        |  12 +--
 lib/PublicInbox/LeiExternal.pm        |   5 ++
 lib/PublicInbox/LeiForgetMailSync.pm  |   9 +--
 lib/PublicInbox/LeiImport.pm          |  20 +++--
 lib/PublicInbox/LeiImportKw.pm        |  12 ++-
 lib/PublicInbox/LeiInspect.pm         | 103 +++++++++++++++++---------
 lib/PublicInbox/LeiLcat.pm            |  91 +++++++++++++----------
 lib/PublicInbox/LeiLsMailSource.pm    |  11 +--
 lib/PublicInbox/LeiMailSync.pm        | 102 +++++++++++++++++--------
 lib/PublicInbox/LeiNoteEvent.pm       |   5 +-
 lib/PublicInbox/LeiQuery.pm           |   9 ++-
 lib/PublicInbox/LeiRefreshMailSync.pm |  18 +++--
 lib/PublicInbox/LeiSavedSearch.pm     |  22 +++---
 lib/PublicInbox/LeiTag.pm             |  14 ++--
 lib/PublicInbox/LeiToMail.pm          |  23 +++++-
 lib/PublicInbox/LeiUp.pm              |  25 +++----
 lib/PublicInbox/LeiXSearch.pm         |  36 ++++++---
 lib/PublicInbox/Search.pm             |   1 -
 lib/PublicInbox/SharedKV.pm           |  19 +++--
 lib/PublicInbox/TestCommon.pm         |  11 ++-
 t/lei-import-nntp.t                   |  44 +++++++++++
 t/lei-q-save.t                        |   6 +-
 t/lei-watch.t                         |   2 +-
 26 files changed, 414 insertions(+), 202 deletions(-)

^ permalink raw reply	[relevance 7%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-09-21  7:41  7% [PATCH 00/12] lei: fix various annoyances Eric Wong
2021-09-21  7:41  4% ` [PATCH 12/12] lei q: improve --limit behavior and progress Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).