user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH] inboxwritable: drop {-importer} cyclic reference
Date: Thu, 14 Nov 2019 01:12:11 +0000	[thread overview]
Message-ID: <20191114011211.24855-1-e@80x24.org> (raw)

InboxWritable caching the result of ->importer leads to a
circular references with returned (V2Writable|Import) object
holds onto the calling InboxWritable object.

With public-inbox-watch, this leads to a memory leak if a user
is reloading via SIGHUP after a message is imported (it would
only become noticeable with SIGHUPs after every message imported).

I would not expect anybody to to notice this in real-world
usage.  I only noticed this since I was making -xcpdb suitable
for long-lived process use (e.g. "mod_perl style") and a flock
remained unreleased on v1 inboxes after resharding.

WatchMaildir (used by -watch) already handles caching of the
importer object itself, and all of our other real-world uses of
->importer are short-lived or designed for batch scripts, so
there's no need to cache the importer result internally.
---
 lib/PublicInbox/Admin.pm         |  4 ++--
 lib/PublicInbox/InboxWritable.pm | 30 ++++++++++++++----------------
 lib/PublicInbox/Xapcmd.pm        | 25 ++++++++++---------------
 script/public-inbox-index        |  2 +-
 4 files changed, 27 insertions(+), 34 deletions(-)

diff --git a/lib/PublicInbox/Admin.pm b/lib/PublicInbox/Admin.pm
index e9fb5d6f..d2a0d06b 100644
--- a/lib/PublicInbox/Admin.pm
+++ b/lib/PublicInbox/Admin.pm
@@ -199,12 +199,12 @@ invalid indexlevel=$indexlevel (must be `basic', `medium', or `full')
 }
 
 sub index_inbox {
-	my ($ibx, $opt) = @_;
+	my ($ibx, $im, $opt) = @_;
 	my $jobs = delete $opt->{jobs} if $opt;
 	if (ref($ibx) && ($ibx->{version} || 1) == 2) {
 		eval { require PublicInbox::V2Writable };
 		die "v2 requirements not met: $@\n" if $@;
-		my $v2w = eval { $ibx->importer(0) } || eval {
+		my $v2w = $im // eval { $ibx->importer(0) } || eval {
 			PublicInbox::V2Writable->new($ibx, {nproc=>$jobs});
 		};
 		if (defined $jobs) {
diff --git a/lib/PublicInbox/InboxWritable.pm b/lib/PublicInbox/InboxWritable.pm
index 9eab394d..c73910ac 100644
--- a/lib/PublicInbox/InboxWritable.pm
+++ b/lib/PublicInbox/InboxWritable.pm
@@ -52,22 +52,20 @@ sub init_inbox {
 
 sub importer {
 	my ($self, $parallel) = @_;
-	$self->{-importer} ||= do {
-		my $v = $self->{version} || 1;
-		if ($v == 2) {
-			eval { require PublicInbox::V2Writable };
-			die "v2 not supported: $@\n" if $@;
-			my $opt = $self->{-creat_opt};
-			my $v2w = PublicInbox::V2Writable->new($self, $opt);
-			$v2w->{parallel} = $parallel;
-			$v2w;
-		} elsif ($v == 1) {
-			my @arg = (undef, undef, undef, $self);
-			PublicInbox::Import->new(@arg);
-		} else {
-			$! = 78; # EX_CONFIG 5.3.5 local configuration error
-			die "unsupported inbox version: $v\n";
-		}
+	my $v = $self->{version} || 1;
+	if ($v == 2) {
+		eval { require PublicInbox::V2Writable };
+		die "v2 not supported: $@\n" if $@;
+		my $opt = $self->{-creat_opt};
+		my $v2w = PublicInbox::V2Writable->new($self, $opt);
+		$v2w->{parallel} = $parallel;
+		$v2w;
+	} elsif ($v == 1) {
+		my @arg = (undef, undef, undef, $self);
+		PublicInbox::Import->new(@arg);
+	} else {
+		$! = 78; # EX_CONFIG 5.3.5 local configuration error
+		die "unsupported inbox version: $v\n";
 	}
 }
 
diff --git a/lib/PublicInbox/Xapcmd.pm b/lib/PublicInbox/Xapcmd.pm
index 95d472b0..c807bf10 100644
--- a/lib/PublicInbox/Xapcmd.pm
+++ b/lib/PublicInbox/Xapcmd.pm
@@ -15,12 +15,10 @@ use File::Basename qw(dirname);
 our $XAPIAN_COMPACT = $ENV{XAPIAN_COMPACT} || 'xapian-compact';
 our @COMPACT_OPT = qw(jobs|j=i quiet|q blocksize|b=s no-full|n fuller|F);
 
-sub commit_changes ($$$) {
-	my ($ibx, $tmp, $opt) = @_;
+sub commit_changes ($$$$) {
+	my ($ibx, $im, $tmp, $opt) = @_;
 	my $reshard = $opt->{reshard};
 	my $reindex = $opt->{reindex};
-	my $im = $ibx->importer(0);
-	$im->lock_acquire if !$opt->{-coarse_lock};
 
 	$SIG{INT} or die 'BUG: $SIG{INT} not handled';
 	my @old_shard;
@@ -78,10 +76,7 @@ sub commit_changes ($$$) {
 			}
 		}
 
-		PublicInbox::Admin::index_inbox($ibx, $opt);
-		# implicit lock_release
-	} else {
-		$im->lock_release;
+		PublicInbox::Admin::index_inbox($ibx, $im, $opt);
 	}
 }
 
@@ -98,8 +93,8 @@ sub runnable_or_die ($) {
 	which($exe) or die "$exe not found in PATH\n";
 }
 
-sub prepare_reindex ($$) {
-	my ($ibx, $reindex) = @_;
+sub prepare_reindex ($$$) {
+	my ($ibx, $im, $reindex) = @_;
 	if ($ibx->{version} == 1) {
 		my $dir = $ibx->search->xdir(1);
 		my $xdb = Search::Xapian::Database->new($dir);
@@ -107,9 +102,8 @@ sub prepare_reindex ($$) {
 			$reindex->{from} = $lc;
 		}
 	} else { # v2
-		my $v2w = $ibx->importer(0);
 		my $max;
-		$v2w->git_dir_latest(\$max) or return;
+		$im->git_dir_latest(\$max) or return;
 		my $from = $reindex->{from};
 		my $mm = $ibx->mm;
 		my $v = PublicInbox::Search::SCHEMA_VERSION();
@@ -229,20 +223,21 @@ sub run {
 			$tmp->{$_} ||= undef for @$src;
 		}
 	}
-	my $im = $ibx->importer(0);
 	my $max = $opt->{jobs} || scalar(@q);
 	$ibx->with_umask(sub {
+		my $im = $ibx->importer(0);
 		$im->lock_acquire;
 
 		# fine-grained locking if we prepare for reindex
 		if (!$opt->{-coarse_lock}) {
-			prepare_reindex($ibx, $reindex);
+			prepare_reindex($ibx, $im, $reindex);
 			$im->lock_release;
 		}
 
 		delete($ibx->{$_}) for (qw(mm over search)); # cleanup
 		process_queue(\@q, $cb, $max, $opt);
-		commit_changes($ibx, $tmp, $opt);
+		$im->lock_acquire if !$opt->{-coarse_lock};
+		commit_changes($ibx, $im, $tmp, $opt);
 	});
 }
 
diff --git a/script/public-inbox-index b/script/public-inbox-index
index 439da157..139b6e56 100755
--- a/script/public-inbox-index
+++ b/script/public-inbox-index
@@ -33,4 +33,4 @@ foreach my $ibx (@ibxs) {
 
 PublicInbox::Admin::require_or_die(keys %$mods);
 PublicInbox::Admin::progress_prepare($opt);
-PublicInbox::Admin::index_inbox($_, $opt) for @ibxs;
+PublicInbox::Admin::index_inbox($_, undef, $opt) for @ibxs;

                 reply	other threads:[~2019-11-14  1:12 UTC|newest]

Thread overview: [no followups] expand[flat|nested]  mbox.gz  Atom feed

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20191114011211.24855-1-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).