user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 2/1] daemons: revamp periodic cleanup task
  @ 2021-09-23  0:46  4%         ` Eric Wong
  0 siblings, 0 replies; 4+ results
From: Eric Wong @ 2021-09-23  0:46 UTC (permalink / raw)
  To: Konstantin Ryabitsev; +Cc: meta

Neither Inboxes nor ExtSearch objects were retrying correctly
when there are live git processes, but the inboxes were getting
rescanned for search or other reasons.  Ensure the scan retries
eventually if there's live processes.

We also need to update the cleanup task to detect Xapian shard
count changes, since Xapian ->reopen is enough to detect any
other Xapian changes.  Otherwise, we just issue an inexpensive
->reopen call and let Xapian check whether there's anything
worth reopening.

This also lets us eliminate the Devel::Peek dependency.
---
 ci/deps.perl                 |  5 ---
 lib/PublicInbox/Admin.pm     |  3 +-
 lib/PublicInbox/ExtSearch.pm |  7 +++-
 lib/PublicInbox/Git.pm       | 11 ++++--
 lib/PublicInbox/Inbox.pm     | 67 ++++++++++++------------------------
 lib/PublicInbox/Search.pm    | 16 ++++++++-
 t/extsearch.t                | 15 ++++++++
 7 files changed, 68 insertions(+), 56 deletions(-)

diff --git a/ci/deps.perl b/ci/deps.perl
index a797911a..ae85986d 100755
--- a/ci/deps.perl
+++ b/ci/deps.perl
@@ -17,7 +17,6 @@ my $profiles = {
 	essential => [ qw(
 		git
 		perl
-		Devel::Peek
 		Digest::SHA
 		Encode
 		ExtUtils::MakeMaker
@@ -79,10 +78,6 @@ my $non_auto = {
 		pkg => 'p5-TimeDate',
 		rpm => 'perl-TimeDate',
 	},
-	'Devel::Peek' => {
-		deb => 'perl', # libperl5.XX, but the XX varies
-		pkg => 'perl5',
-	},
 	'Digest::SHA' => {
 		deb => 'perl', # libperl5.XX, but the XX varies
 		pkg => 'perl5',
diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index 20964f9c..dcf17cf5 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -198,8 +198,7 @@ sub resolve_inboxes ($;$$) {
 	$opt->{-eidx_ok} ? (\@ibxs, \@eidx) : @ibxs;
 }
 
-# TODO: make Devel::Peek optional, only used for daemon
-my @base_mod = qw(Devel::Peek);
+my @base_mod = ();
 my @over_mod = qw(DBD::SQLite DBI);
 my %mod_groups = (
 	-index => [ @base_mod, @over_mod ],
diff --git a/lib/PublicInbox/ExtSearch.pm b/lib/PublicInbox/ExtSearch.pm
index cd7cba2e..7520e71c 100644
--- a/lib/PublicInbox/ExtSearch.pm
+++ b/lib/PublicInbox/ExtSearch.pm
@@ -112,6 +112,11 @@ sub description {
 		'$EXTINDEX_DIR/description missing';
 }
 
+sub search {
+	PublicInbox::Inbox::_cleanup_later($_[0]);
+	$_[0];
+}
+
 no warnings 'once';
 *base_url = \&PublicInbox::Inbox::base_url;
 *smsg_eml = \&PublicInbox::Inbox::smsg_eml;
@@ -121,6 +126,6 @@ no warnings 'once';
 *recent = \&PublicInbox::Inbox::recent;
 
 *max_git_epoch = *nntp_usable = *msg_by_path = \&mm; # undef
-*isrch = *search = \&PublicInbox::Search::reopen;
+*isrch = \&search;
 
 1;
diff --git a/lib/PublicInbox/Git.pm b/lib/PublicInbox/Git.pm
index cf51239f..3c577ab3 100644
--- a/lib/PublicInbox/Git.pm
+++ b/lib/PublicInbox/Git.pm
@@ -400,7 +400,7 @@ sub cleanup {
 	delete $self->{inflight_c};
 	_destroy($self, qw(cat_rbuf in out pid));
 	_destroy($self, qw(chk_rbuf in_c out_c pid_c err_c));
-	!!($self->{pid} || $self->{pid_c});
+	defined($self->{pid}) || defined($self->{pid_c});
 }
 
 
@@ -523,18 +523,25 @@ sub manifest_entry {
 	$ent;
 }
 
+# returns true if there are pending cat-file processes
 sub cleanup_if_unlinked {
 	my ($self) = @_;
 	return cleanup($self) if $^O ne 'linux';
 	# Linux-specific /proc/$PID/maps access
 	# TODO: support this inside git.git
+	my $ret = 0;
 	for my $fld (qw(pid pid_c)) {
 		my $pid = $self->{$fld} // next;
-		open my $fh, '<', "/proc/$pid/maps" or next;
+		open my $fh, '<', "/proc/$pid/maps" or return cleanup($self);
 		while (<$fh>) {
+			# n.b. we do not restart for unlinked multi-pack-index
+			# since it's not too huge, and the startup cost may
+			# be higher.
 			return cleanup($self) if /\.(?:idx|pack) \(deleted\)$/;
 		}
+		++$ret;
 	}
+	$ret;
 }
 
 1;
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index 02ffc7be..c0962af9 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -16,70 +16,47 @@ use Carp qw(croak);
 # admin will attempt to replace them atomically after compact/vacuum
 # and we need to be prepared for that.
 my $cleanup_timer;
-my $cleanup_avail = -1; # 0, or 1
-my $have_devel_peek;
 my $CLEANUP = {}; # string(inbox) -> inbox
 
 sub git_cleanup ($) {
 	my ($self) = @_;
-	if ($self->isa(__PACKAGE__)) {
-		# normal Inbox; low startup cost, and likely to have many
-		# many so this keeps process/pipe counts in check
-		$self->{git}->cleanup if $self->{git};
-	} else {
-		# ExtSearch, high startup cost if ->ALL, and probably
-		# only one per-daemon, so teardown only if required:
-		$self->git->cleanup_if_unlinked;
-	}
+	my $git = $self->{git} // return undef;
+	# normal inboxes have low startup cost and there may be many, so
+	# keep process+pipe counts in check.  ExtSearch may have high startup
+	# cost (e.g. ->ALL) and but likely one per-daemon, so cleanup only
+	# if there's unlinked files
+	my $live = $self->isa(__PACKAGE__) ? $git->cleanup
+					: $git->cleanup_if_unlinked;
+	delete($self->{git}) unless $live;
+	$live;
 }
 
+# returns true if further checking is required
+sub cleanup_shards { $_[0]->{search} ? $_[0]->{search}->cleanup_shards : undef }
+
 sub cleanup_task () {
 	$cleanup_timer = undef;
 	my $next = {};
 	for my $ibx (values %$CLEANUP) {
-		my $again;
-		if ($have_devel_peek) {
-			foreach my $f (qw(search)) {
-				# we bump refcnt by assigning tmp, here:
-				my $tmp = $ibx->{$f} or next;
-				next if Devel::Peek::SvREFCNT($tmp) > 2;
-				delete $ibx->{$f};
-				# refcnt is zero when tmp is out-of-scope
-			}
-		}
-		git_cleanup($ibx);
-		if (my $gits = $ibx->{-repo_objs}) {
-			foreach my $git (@$gits) {
-				$again = 1 if $git->cleanup;
-			}
+		my $again = git_cleanup($ibx);
+		$ibx->cleanup_shards and $again = 1;
+		for my $git (@{$ibx->{-repo_objs}}) {
+			$again = 1 if $git->cleanup;
 		}
 		check_inodes($ibx);
-		if ($have_devel_peek) {
-			$again ||= !!$ibx->{search};
-		}
 		$next->{"$ibx"} = $ibx if $again;
 	}
 	$CLEANUP = $next;
+	$cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
 }
 
-sub cleanup_possible () {
+sub _cleanup_later ($) {
 	# no need to require DS, here, if it were enabled another
 	# module would've require'd it, already
-	eval { PublicInbox::DS::in_loop() } or return 0;
-
-	eval {
-		require Devel::Peek; # needs separate package in Fedora
-		$have_devel_peek = 1;
-	};
-	1;
-}
-
-sub _cleanup_later ($) {
-	my ($self) = @_;
-	$cleanup_avail = cleanup_possible() if $cleanup_avail < 0;
-	return if $cleanup_avail != 1;
-	$cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
-	$CLEANUP->{"$self"} = $self;
+	if (eval { PublicInbox::DS::in_loop() }) {
+		$cleanup_timer //= PublicInbox::DS::later(\&cleanup_task);
+		$CLEANUP->{"$_[0]"} = $_[0]; # $self
+	}
 }
 
 sub _set_limiter ($$$) {
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index 1d1cd2f5..d285c11c 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -199,7 +199,7 @@ sub xdb_shards_flat ($) {
 	my (@xdb, $slow_phrase);
 	load_xapian();
 	$self->{qp_flags} //= $QP_FLAGS;
-	if ($xpfx =~ m/xapian${\SCHEMA_VERSION}\z/) {
+	if ($xpfx =~ m!/xapian[0-9]+\z!) {
 		@xdb = ($X{Database}->new($xpfx));
 		$self->{qp_flags} |= FLAG_PHRASE() if !-f "$xpfx/iamchert";
 	} else {
@@ -243,6 +243,20 @@ sub xdb ($) {
 	};
 }
 
+# returns true if a future rescan is desired
+sub cleanup_shards {
+	my ($self) = @_;
+	return unless exists($self->{xdb});
+	my $xpfx = $self->{xpfx};
+	return reopen($self) if $xpfx =~ m!/xapian[0-9]+\z!; # true
+	opendir(my $dh, $xpfx) or return warn("$xpfx gone: $!\n"); # true
+	my $nr = grep(/\A[0-9]+\z/, readdir($dh)) or
+		return warn("$xpfx has no shards\n"); # true
+	return reopen($self) if $nr == ($self->{nshard} // -1);
+	delete($self->{xdb});
+	undef;
+}
+
 sub new {
 	my ($class, $ibx) = @_;
 	ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
diff --git a/t/extsearch.t b/t/extsearch.t
index ad4f2c6d..6c074022 100644
--- a/t/extsearch.t
+++ b/t/extsearch.t
@@ -436,12 +436,27 @@ for my $j (1, 3, 6) {
 
 SKIP: {
 	my $d = "$home/extindex-j1";
+	my $es = PublicInbox::ExtSearch->new($d);
+	ok(my $nresult0 = $es->mset('z:0..')->size, 'got results');
+	ok(ref($es->{xdb}), '{xdb} created');
+	my $nshards1 = $es->{nshard};
+	is($nshards1, 1, 'correct shard count');
+	my $xdb_str = "$es->{xdb}";
+	ok($es->cleanup_shards, 'cleanup_shards noop');
+	is("$es->{xdb}", $xdb_str, '{xdb} unchanged');
+
 	my $o = { 2 => \(my $err = '') };
 	ok(run_script([qw(-xcpdb -R4), $d]), 'xcpdb R4');
 	my @dirs = glob("$d/ei*/?");
 	for my $i (0..3) {
 		is(grep(m!/ei[0-9]+/$i\z!, @dirs), 1, "shard [$i] created");
 	}
+	is($es->cleanup_shards, undef, 'cleanup_shards cleaned');
+	ok(!defined($es->{xdb}), 'old {xdb} gone');
+	is($es->cleanup_shards, undef, 'cleanup_shards clean idempotent');
+	is($es->mset('z:0..')->size, $nresult0, 'new shards, same results');
+	ok($es->cleanup_shards, 'cleanup_shards true after open');
+
 	for my $i (4..5) {
 		is(grep(m!/ei[0-9]+/$i\z!, @dirs), 0, "no shard [$i]");
 	}

^ permalink raw reply related	[relevance 4%]

* [PATCH 1/2] extmsg: search_partial: use ->isrch if available
  @ 2021-09-25 22:16  6% ` Eric Wong
  2021-09-25 22:16  5% ` [PATCH 2/2] search: avoid setting undef hashtable entries Eric Wong
  1 sibling, 0 replies; 4+ results
From: Eric Wong @ 2021-09-25 22:16 UTC (permalink / raw)
  To: meta

This allows us to avoid creating ibx->{search}->{xdb} at this
spot by using an `undef' value.  This is a step towards
eliminating the innocuous "/path/to/inboxdir/xap15 has no shards"
messages introduced in commit 63d7b8ce (daemons: revamp
periodic cleanup task, 2021-09-23)
---
 lib/PublicInbox/ExtMsg.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/ExtMsg.pm b/lib/PublicInbox/ExtMsg.pm
index b7427b1b..c134de55 100644
--- a/lib/PublicInbox/ExtMsg.pm
+++ b/lib/PublicInbox/ExtMsg.pm
@@ -32,7 +32,7 @@ sub PARTIAL_MAX () { 100 }
 sub search_partial ($$) {
 	my ($ibx, $mid) = @_;
 	return if length($mid) < $MIN_PARTIAL_LEN;
-	my $srch = $ibx->search or return; # NOT ->isrch, we already try ->ALL
+	my $srch = $ibx->isrch or return;
 	my $opt = { limit => PARTIAL_MAX, relevance => -1 };
 	my @try = ("m:$mid*");
 	my $chop = $mid;

^ permalink raw reply related	[relevance 6%]

* [PATCH 2/2] search: avoid setting undef hashtable entries
    2021-09-25 22:16  6% ` [PATCH 1/2] extmsg: search_partial: use ->isrch if available Eric Wong
@ 2021-09-25 22:16  5% ` Eric Wong
  1 sibling, 0 replies; 4+ results
From: Eric Wong @ 2021-09-25 22:16 UTC (permalink / raw)
  To: meta

`undef' entries still take up a slot in the hash table, and
cause the `exists' check to false-positive in ->cleanup_shards.
This should fully fix the (innocuous) messages introduced in
commit 63d7b8ce (daemons: revamp periodic cleanup task, 2021-09-23)
---
 lib/PublicInbox/AdminEdit.pm |  5 +++--
 lib/PublicInbox/Inbox.pm     | 15 +++++++--------
 lib/PublicInbox/Search.pm    | 12 ++++++------
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/lib/PublicInbox/AdminEdit.pm b/lib/PublicInbox/AdminEdit.pm
index 2f6707d8..c8c3d3e8 100644
--- a/lib/PublicInbox/AdminEdit.pm
+++ b/lib/PublicInbox/AdminEdit.pm
@@ -27,8 +27,9 @@ sub check_editable ($) {
 		# Make sure it's purged in that case:
 		$ibx->over or die "no over.sqlite3 in $ibx->{inboxdir}\n";
 
-		# $ibx->{search} is populated by $ibx->over call
-		my $xdir_ro = $ibx->{search}->xdir(1);
+		require PublicInbox::Search;
+		my $xdir_ro = PublicInbox::Search->new($ibx)->xdir(1);
+
 		my $nshard = 0;
 		foreach my $shard (<$xdir_ro/*>) {
 			if (-d $shard && $shard =~ m!/[0-9]+\z!) {
diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index c0962af9..3ba92c99 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -167,12 +167,12 @@ sub mm {
 
 sub search {
 	my ($self) = @_;
-	my $srch = $self->{search} //= eval {
+	$self->{search} // eval {
 		_cleanup_later($self);
 		require PublicInbox::Search;
-		PublicInbox::Search->new($self);
+		my $srch = PublicInbox::Search->new($self);
+		(eval { $srch->xdb }) ? ($self->{search} = $srch) : undef;
 	};
-	(eval { $srch->xdb }) ? $srch : undef;
 }
 
 # isrch is preferred for read-only interfaces if available since it
@@ -181,15 +181,14 @@ sub isrch { $_[0]->{isrch} // search($_[0]) }
 
 sub over {
 	my ($self, $req) = @_;
-	$self->{over} //= eval {
-		my $srch = $self->{search} //= do {
-			_cleanup_later($self);
+	$self->{over} // eval {
+		my $srch = $self->{search} // do {
 			require PublicInbox::Search;
 			PublicInbox::Search->new($self);
 		};
 		my $over = PublicInbox::Over->new("$srch->{xpfx}/over.sqlite3");
 		$over->dbh; # may fail
-		$over;
+		$self->{over} = $over;
 	} // ($req ? croak("E: $@") : undef);
 }
 
@@ -293,7 +292,7 @@ sub imap_url { $_[0]->{-imap_url} //= _x_url($_[0], 'imap', $_[1]) }
 sub nntp_usable {
 	my ($self) = @_;
 	my $ret = mm($self) && over($self);
-	$self->{mm} = $self->{over} = $self->{search} = undef;
+	delete @$self{qw(mm over search)};
 	$ret;
 }
 
diff --git a/lib/PublicInbox/Search.pm b/lib/PublicInbox/Search.pm
index d285c11c..17e202e1 100644
--- a/lib/PublicInbox/Search.pm
+++ b/lib/PublicInbox/Search.pm
@@ -234,12 +234,12 @@ sub mset_to_artnums {
 
 sub xdb ($) {
 	my ($self) = @_;
-	$self->{xdb} //= do {
+	$self->{xdb} // do {
 		my @xdb = $self->xdb_shards_flat or return;
 		$self->{nshard} = scalar(@xdb);
 		my $xdb = shift @xdb;
 		$xdb->add_database($_) for @xdb;
-		$xdb;
+		$self->{xdb} = $xdb;
 	};
 }
 
@@ -261,10 +261,10 @@ sub new {
 	my ($class, $ibx) = @_;
 	ref $ibx or die "BUG: expected PublicInbox::Inbox object: $ibx";
 	my $xap = $ibx->version > 1 ? 'xap' : 'public-inbox/xapian';
-	bless {
-		xpfx => "$ibx->{inboxdir}/$xap" . SCHEMA_VERSION,
-		altid => $ibx->{altid},
-	}, $class;
+	my $xpfx = "$ibx->{inboxdir}/$xap".SCHEMA_VERSION;
+	my $self = bless { xpfx => $xpfx }, $class;
+	$self->{altid} = $ibx->{altid} if defined($ibx->{altid});
+	$self;
 }
 
 sub reopen {

^ permalink raw reply related	[relevance 5%]

* [PATCH] inbox: do not vivify {-repo_objs} during cleanup
  @ 2021-09-29  0:14  7% ` Eric Wong
  0 siblings, 0 replies; 4+ results
From: Eric Wong @ 2021-09-29  0:14 UTC (permalink / raw)
  To: Kyle Meyer; +Cc: meta

Kyle Meyer <kyle@kyleam.com> wrote:
> I've been noticing a good number of these error messages:
>
>   E: BUG {try_gits} empty at /usr/local/share/perl/5.28.1/PublicInbox/SolverGit.pm line 80.

Thanks, the following should fix it (though I suppose @INC paths
should be cleaned in WWW):
------8<-----
Subject: [PATCH] inbox: do not vivify {-repo_objs} during cleanup

This caused config->repo_objs to not fill in {-repo_objs}
properly before starting solver.

Reported-by: Kyle Meyer <kyle@kyleam.com>
Link: https://public-inbox.org/meta/87o88cqobd.fsf@kyleam.com/
Fixes: 63d7b8ceee55a34 ("daemons: revamp periodic cleanup task")
---
 lib/PublicInbox/Inbox.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/Inbox.pm b/lib/PublicInbox/Inbox.pm
index c525f4d1..95467d5a 100644
--- a/lib/PublicInbox/Inbox.pm
+++ b/lib/PublicInbox/Inbox.pm
@@ -30,7 +30,7 @@ sub do_cleanup {
 	my ($ibx) = @_;
 	my $live = git_cleanup($ibx);
 	$ibx->cleanup_shards and $live = 1;
-	for my $git (@{$ibx->{-repo_objs}}) {
+	for my $git (@{$ibx->{-repo_objs} // []}) {
 		$live = 1 if $git->cleanup(1);
 	}
 	delete @$ibx{qw(over mm description cloneurl

^ permalink raw reply related	[relevance 7%]

Results 1-4 of 4 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-09-21 14:47     Holding on to deleted packfiles Konstantin Ryabitsev
2021-09-21 19:06     ` Eric Wong
2021-09-21 20:57       ` Konstantin Ryabitsev
2021-09-21 21:37         ` Eric Wong
2021-09-22  9:45           ` [PATCH] gcf2 + extsearch: check for unlinked files on Linux Eric Wong
2021-09-23  0:46  4%         ` [PATCH 2/1] daemons: revamp periodic cleanup task Eric Wong
2021-09-25 22:16     [PATCH 0/2] www: shrink per-inbox hashtable slots Eric Wong
2021-09-25 22:16  6% ` [PATCH 1/2] extmsg: search_partial: use ->isrch if available Eric Wong
2021-09-25 22:16  5% ` [PATCH 2/2] search: avoid setting undef hashtable entries Eric Wong
2021-09-28 23:21     solver: 'BUG {try_gits} empty' error Kyle Meyer
2021-09-29  0:14  7% ` [PATCH] inbox: do not vivify {-repo_objs} during cleanup Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).