* [PATCH] extsearchidx: extra assertions for deduplication flow
@ 2021-07-02 20:42 Eric Wong
0 siblings, 0 replies; only message in thread
From: Eric Wong @ 2021-07-02 20:42 UTC (permalink / raw)
To: meta
I haven't found any bugs from this (still looking for missed
deduplication bugs), and it's a bit shorter and more likely to
catch future bugs. Clean up an unnecessary ->{mid} array copy
while we're at it, too.
---
lib/PublicInbox/ExtSearchIdx.pm | 19 ++++++++-----------
1 file changed, 8 insertions(+), 11 deletions(-)
diff --git a/lib/PublicInbox/ExtSearchIdx.pm b/lib/PublicInbox/ExtSearchIdx.pm
index ee43e6f8..29414e4a 100644
--- a/lib/PublicInbox/ExtSearchIdx.pm
+++ b/lib/PublicInbox/ExtSearchIdx.pm
@@ -139,7 +139,7 @@ sub index_unseen ($) {
sub do_finalize ($) {
my ($req) = @_;
- if (my $indexed = $req->{indexed}) {
+ if (my $indexed = $req->{indexed}) { # duplicated messages
do_xpost($req, $_) for @$indexed;
} elsif (exists $req->{new_smsg}) { # totally unseen messsage
index_unseen($req);
@@ -164,11 +164,10 @@ sub do_step ($) { # main iterator for adding messages to the index
\&ck_existing, $req);
return; # ck_existing calls do_step
}
- delete $req->{cur_smsg};
delete $req->{next_arg};
}
- my $mid = shift(@{$req->{mids}});
- last unless defined $mid;
+ die "BUG: {cur_smsg} still set" if $req->{cur_smsg};
+ my $mid = shift(@{$req->{mids}}) // last;
my ($id, $prev);
$req->{next_arg} = [ $mid, \$id, \$prev ];
# loop again
@@ -176,9 +175,8 @@ sub do_step ($) { # main iterator for adding messages to the index
do_finalize($req);
}
-sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
- my ($req) = @_;
- my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+sub _blob_missing ($$) { # called when $smsg->{blob} is bad
+ my ($req, $smsg) = @_;
my $self = $req->{self};
my $xref3 = $self->{oidx}->get_xref3($smsg->{num});
my @keep = grep(!/:$smsg->{blob}\z/, @$xref3);
@@ -196,9 +194,9 @@ sub _blob_missing ($) { # called when req->{cur_smsg}->{blob} is bad
sub ck_existing { # git->cat_async callback
my ($bref, $oid, $type, $size, $req) = @_;
- my $smsg = $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
+ my $smsg = delete $req->{cur_smsg} or die 'BUG: {cur_smsg} missing';
if ($type eq 'missing') {
- _blob_missing($req);
+ _blob_missing($req, $smsg);
} elsif (!is_bad_blob($oid, $type, $size, $smsg->{blob})) {
my $self = $req->{self} // die 'BUG: {self} missing';
local $self->{current_info} = "$self->{current_info} $oid";
@@ -219,8 +217,7 @@ sub cur_ibx_xnum ($$) {
$req->{eml} = PublicInbox::Eml->new($bref);
$req->{chash} = content_hash($req->{eml});
$req->{mids} = mids($req->{eml});
- my @q = @{$req->{mids}}; # copy
- while (defined(my $mid = shift @q)) {
+ for my $mid (@{$req->{mids}}) {
my ($id, $prev);
while (my $x = $ibx->over->next_by_mid($mid, \$id, \$prev)) {
return $x->{num} if $x->{blob} eq $req->{oid};
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2021-07-02 20:42 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2021-07-02 20:42 [PATCH] extsearchidx: extra assertions for deduplication flow Eric Wong
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).