From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.0 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 3DB6A1FAE5 for ; Wed, 4 Apr 2018 21:25:01 +0000 (UTC) From: "Eric Wong (Contractor, The Linux Foundation)" To: meta@public-inbox.org Subject: [PATCH 4/4] v2writable: do not modify DBs while iterating for ->remove Date: Wed, 4 Apr 2018 21:25:00 +0000 Message-Id: <20180404212500.1859-5-e@80x24.org> In-Reply-To: <20180404212500.1859-1-e@80x24.org> References: <20180404212500.1859-1-e@80x24.org> List-Id: Xapian may become unhappy if a DB is modified during iteration: nntp://news.gmane.org/20180228004400.GU12724@survex.com --- lib/PublicInbox/V2Writable.pm | 46 +++++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/lib/PublicInbox/V2Writable.pm b/lib/PublicInbox/V2Writable.pm index 5b4d9c0..74953d3 100644 --- a/lib/PublicInbox/V2Writable.pm +++ b/lib/PublicInbox/V2Writable.pm @@ -256,6 +256,7 @@ sub remove_internal { my $mark; foreach my $mid (@$mids) { + my %gone; $srch->reopen->each_smsg_by_mid($mid, sub { my ($smsg) = @_; $smsg->load_expand; @@ -267,28 +268,35 @@ sub remove_internal { my $orig = $$msg; my $cur = PublicInbox::MIME->new($msg); if (content_id($cur) eq $cid) { - $mm->num_delete($smsg->num); - # $removed should only be set once assuming - # no bugs in our deduplication code: - $removed = $smsg; - $removed->{mime} = $cur; - my $oid = $smsg->{blob}; - if ($purge) { - $purge->{$oid} = 1; - } else { - ($mark, undef) = - $im->remove(\$orig, $cmt_msg); - } - $orig = undef; - $removed->num; # memoize this for callers - - foreach my $idx (@$parts) { - $idx->remote_remove($oid, $mid); - } - $self->{over}->remove_oid($oid, $mid); + $smsg->{mime} = $cur; + $gone{$smsg->num} = [ $smsg, \$orig ]; } 1; # continue }); + my $n = scalar keys %gone; + next unless $n; + if ($n > 1) { + warn "BUG: multiple articles linked to <$mid>\n", + join(',', sort keys %gone), "\n"; + } + foreach my $num (keys %gone) { + my ($smsg, $orig) = @{$gone{$num}}; + $mm->num_delete($num); + # $removed should only be set once assuming + # no bugs in our deduplication code: + $removed = $smsg; + my $oid = $smsg->{blob}; + if ($purge) { + $purge->{$oid} = 1; + } else { + ($mark, undef) = $im->remove($orig, $cmt_msg); + } + $orig = undef; + foreach my $idx (@$parts) { + $idx->remote_remove($oid, $mid); + } + $self->{over}->remove_oid($oid, $mid); + } $self->barrier; } -- EW