user/dev discussion of public-inbox itself
 help / color / Atom feed
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 2/1] improve error handling on import fork failures
Date: Fri, 31 Jul 2020 08:56:20 +0000
Message-ID: <20200731085619.GA10081@dcvr> (raw)
In-Reply-To: <20200730080533.GA8841@dcvr>

v?fork failures seems to be the cause of locks not getting
released in -watch.  Ensure lock release doesn't get skipped
in ->done for both v1 and v2 inboxes.  We also need to do
everything we can to ensure DB handles, pipes and processes
get released even in the face of failure.

While we're at it, make failures around `git update-server-info'
non-fatal, since smart HTTP seems more popular anyways.
---
 lib/PublicInbox/Import.pm       | 37 +++++++++++++++++++------------
 lib/PublicInbox/V2Writable.pm   | 28 +++++++++++++++++------
 lib/PublicInbox/WatchMaildir.pm | 39 +++++++++++++++++++++++----------
 t/psgi_search.t                 |  1 +
 4 files changed, 72 insertions(+), 33 deletions(-)

diff --git a/lib/PublicInbox/Import.pm b/lib/PublicInbox/Import.pm
index b50c662c..f752826e 100644
--- a/lib/PublicInbox/Import.pm
+++ b/lib/PublicInbox/Import.pm
@@ -175,13 +175,16 @@ sub _update_git_info ($$) {
 		my $env = { GIT_INDEX_FILE => $index };
 		run_die([@cmd, qw(read-tree -m -v -i), $self->{ref}], $env);
 	}
-	run_die([@cmd, 'update-server-info']);
+	eval { run_die([@cmd, 'update-server-info']) };
 	my $ibx = $self->{ibx};
-	($ibx && $self->{path_type} eq '2/38') and eval {
-		require PublicInbox::SearchIdx;
-		my $s = PublicInbox::SearchIdx->new($ibx);
-		$s->index_sync({ ref => $self->{ref} });
-	};
+	if ($ibx && $ibx->version == 1 && -d "$ibx->{inboxdir}/public-inbox" &&
+				eval { require PublicInbox::SearchIdx }) {
+		eval {
+			my $s = PublicInbox::SearchIdx->new($ibx);
+			$s->index_sync({ ref => $self->{ref} });
+		};
+		warn "$ibx->{inboxdir} index failed: $@\n" if $@;
+	}
 	eval { run_die([@cmd, qw(gc --auto)]) } if $do_gc;
 }
 
@@ -460,17 +463,23 @@ sub init_bare {
 sub done {
 	my ($self) = @_;
 	my $w = delete $self->{out} or return;
-	my $r = delete $self->{in} or die 'BUG: missing {in} when done';
-	print $w "done\n" or wfail;
-	my $pid = delete $self->{pid} or die 'BUG: missing {pid} when done';
-	waitpid($pid, 0) == $pid or die 'fast-import did not finish';
-	$? == 0 or die "fast-import failed: $?";
-
+	eval {
+		my $r = delete $self->{in} or die 'BUG: missing {in} when done';
+		print $w "done\n" or wfail;
+		my $pid = delete $self->{pid} or
+				die 'BUG: missing {pid} when done';
+		waitpid($pid, 0) == $pid or die 'fast-import did not finish';
+		$? == 0 or die "fast-import failed: $?";
+	};
+	my $wait_err = $@;
 	my $nchg = delete $self->{nchg};
-	_update_git_info($self, 1) if $nchg;
+	if ($nchg && !$wait_err) {
+		eval { _update_git_info($self, 1) };
+		warn "E: $self->{git}->{git_dir} update info: $@\n" if $@;
+	}
 	$self->lock_release(!!$nchg);
-
 	$self->{git}->cleanup;
+	die $wait_err if $wait_err;
 }
 
 sub atfork_child {
diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm
index e071bc1e..e1c9a393 100644
--- a/lib/PublicInbox/V2Writable.pm
+++ b/lib/PublicInbox/V2Writable.pm
@@ -660,21 +660,35 @@ sub barrier { checkpoint($_[0], 1) };
 # public
 sub done {
 	my ($self) = @_;
-	my $im = delete $self->{im};
-	$im->done if $im; # PublicInbox::Import::done
-	checkpoint($self);
-	my $mm = delete $self->{mm};
-	$mm->{dbh}->commit if $mm;
+	my $err = '';
+	if (my $im = delete $self->{im}) {
+		eval { $im->done }; # PublicInbox::Import::done
+		$err .= "import done: $@\n" if $@;
+	}
+	if (!$err) {
+		eval { checkpoint($self) };
+		$err .= "checkpoint: $@\n" if $@;
+	}
+	if (my $mm = delete $self->{mm}) {
+		my $m = $err ? 'rollback' : 'commit';
+		eval { $mm->{dbh}->$m };
+		$err .= "msgmap $m: $@\n" if $@;
+	}
 	my $shards = delete $self->{idx_shards};
 	if ($shards) {
-		$_->remote_close for @$shards;
+		for (@$shards) {
+			eval { $_->remote_close };
+			$err .= "shard close: $@\n" if $@;
+		}
 	}
-	$self->{over}->disconnect;
+	eval { $self->{over}->disconnect };
+	$err .= "over disconnect: $@\n" if $@;
 	delete $self->{bnote};
 	my $nbytes = $self->{total_bytes};
 	$self->{total_bytes} = 0;
 	$self->lock_release(!!$nbytes) if $shards;
 	$self->{ibx}->git->cleanup;
+	die $err if $err;
 }
 
 sub fill_alternates ($$) {
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 7547f6e4..3cfb254d 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -132,17 +132,25 @@ sub _done_for_now {
 sub remove_eml_i { # each_inbox callback
 	my ($ibx, $arg) = @_;
 	my ($self, $eml, $loc) = @$arg;
+	my $im;
 	eval {
-		my $im = _importer_for($self, $ibx);
+		$im = _importer_for($self, $ibx);
 		$im->remove($eml, 'spam');
 		if (my $scrub = $ibx->filter($im)) {
 			my $scrubbed = $scrub->scrub($eml, 1);
-			$scrubbed or return;
-			$scrubbed == REJECT() and return;
-			$im->remove($scrubbed, 'spam');
+			if ($scrubbed && $scrubbed != REJECT) {
+				$im->remove($scrubbed, 'spam');
+			}
 		}
 	};
-	warn "error removing spam at: $loc from $ibx->{name}: $@\n" if $@;
+	if ($@) {
+		warn "error removing spam at: $loc from $ibx->{name}: $@\n";
+		local $PublicInbox::DS::in_loop = 0; # waitpid() synchronously
+		if ($im) {
+			eval { $im->done };
+			warn "$ibx->{name} ->done failed: $@\n" if $@;
+		}
+	}
 }
 
 sub _remove_spam {
@@ -155,7 +163,6 @@ sub _remove_spam {
 
 sub import_eml ($$$) {
 	my ($self, $ibx, $eml) = @_;
-	my $im = _importer_for($self, $ibx);
 
 	# any header match means it's eligible for the inbox:
 	if (my $watch_hdrs = $ibx->{-watchheaders}) {
@@ -167,13 +174,21 @@ sub import_eml ($$$) {
 		}
 		return unless $ok;
 	}
-
-	if (my $scrub = $ibx->filter($im)) {
-		my $ret = $scrub->scrub($eml) or return;
-		$ret == REJECT() and return;
-		$eml = $ret;
+	my $im;
+	eval {
+		$im = _importer_for($self, $ibx);
+		if (my $scrub = $ibx->filter($im)) {
+			my $scrubbed = $scrub->scrub($eml) or return;
+			$scrubbed == REJECT and return;
+			$eml = $scrubbed;
+		}
+		$im->add($eml, $self->{spamcheck});
+	};
+	if ($@) {
+		warn "$ibx->{name} add failed: $@\n";
+		eval { $im->done };
+		warn "$ibx->{name} ->done failed: $@\n" if $@;
 	}
-	$im->add($eml, $self->{spamcheck});
 }
 
 sub _try_path {
diff --git a/t/psgi_search.t b/t/psgi_search.t
index 64f8b1ac..2d12ba6a 100644
--- a/t/psgi_search.t
+++ b/t/psgi_search.t
@@ -14,6 +14,7 @@ my @mods = qw(DBD::SQLite Search::Xapian HTTP::Request::Common Plack::Test
 require_mods(@mods);
 use_ok($_) for (qw(HTTP::Request::Common Plack::Test));
 use_ok 'PublicInbox::WWW';
+use_ok 'PublicInbox::SearchIdx';
 my ($tmpdir, $for_destroy) = tmpdir();
 
 my $ibx = PublicInbox::Inbox->new({

  reply index

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-07-30  8:05 [PATCH] lock: show failure path Eric Wong
2020-07-31  8:56 ` Eric Wong [this message]
2020-07-31 21:36   ` [PATCH 2/1 v2] improve error handling on import fork / lock failures Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: http://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200731085619.GA10081@dcvr \
    --to=e@yhbt.net \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

Archives are clonable:
	git clone --mirror http://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

Example config snippet for mirrors

Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general

 note: .onion URLs require Tor: https://www.torproject.org/

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git