user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH] extsearchidx: extra assertions for deduplication flow
@ 2021-07-02 20:42 Eric Wong
  0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-07-02 20:42 UTC (permalink / raw)
  To: meta

I haven't found any bugs from this (still looking for missed
deduplication bugs), and it's a bit shorter and more likely to
catch future bugs.  Clean up an unnecessary ->{mid} array copy
while we're at it, too.
---
 lib/PublicInbox/ExtSearchIdx.pm | 19 ++++++++-----------
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index ee43e6f8..29414e4a 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -139,7 +139,7 @@ sub index_unseen ($) {
 
 sub do_finalize ($) {
 	my ($req) = @_;
-	if (my $indexed = $req->{indexed}) {
+	if (my $indexed = $req->{indexed}) { # duplicated messages
 		do_xpost($req, $_) for @$indexed;
 	} elsif (exists $req->{new_smsg}) { # totally unseen messsage
 		index_unseen($req);
@@ -164,11 +164,10 @@ sub do_step ($) { # main iterator for adding messages to the index
 							\&ck_existing, $req);
 				return; # ck_existing calls do_step
 			}
-			delete $req->{cur_smsg};
 			delete $req->{next_arg};
 		}
-		my $mid = shift(@{$req->{mids}});
-		last unless defined $mid;
+		die "BUG: {cur_smsg} still set" if $req->{cur_smsg};
+		my $mid = shift(@{$req->{mids}}) // last;
 		my ($id, $prev);
 		$req->{next_arg} = [ $mid, \$id, \$prev ];
 		# loop again
@@ -176,9 +175,8 @@ sub do_step ($) { # main iterator for adding messages to the index
 	do_finalize($req);
 }
 
-sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
-	my ($req) = @_;
-	my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+sub _blob_missing ($$) { # called when $smsg->{blob} is bad
+	my ($req, $smsg) = @_;
 	my $self = $req->{self};
 	my $xref3 = $self->{oidx}->get_xref3($smsg->{num});
 	my @keep = grep(!/:$smsg->{blob}\z/, @$xref3);
@@ -196,9 +194,9 @@ sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
 
 sub ck_existing { # git->cat_async callback
 	my ($bref, $oid, $type, $size, $req) = @_;
-	my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+	my $smsg = delete $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
 	if ($type eq 'missing') {
-		_blob_missing($req);
+		_blob_missing($req, $smsg);
 	} elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
 		my $self = $req->{self} // die 'BUG: {self} missing';
 		local $self->{current_info} = "$self->{current_info} $oid";
@@ -219,8 +217,7 @@ sub cur_ibx_xnum ($$) {
 	$req->{eml} = PublicInbox::Eml->new($bref);
 	$req->{chash} = content_hash($req->{eml});
 	$req->{mids} = mids($req->{eml});
-	my @q = @{$req->{mids}}; # copy
-	while (defined(my $mid = shift @q)) {
+	for my $mid (@{$req->{mids}}) {
 		my ($id, $prev);
 		while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
 			return $x->{num} if $x->{blob} eq $req->{oid};

^ permalink raw reply related	[flat|nested] only message in thread

only message in thread, other threads:[~2021-07-02 20:42 UTC | newest]

Thread overview: (only message) (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-07-02 20:42 [PATCH] extsearchidx: extra assertions for deduplication flow Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).