From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-ASN: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 6982A1FB06 for ; Tue, 21 Sep 2021 07:41:59 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 03/12] lei_mail_sync: account for non-unique cases Date: Tue, 21 Sep 2021 07:41:50 +0000 Message-Id: <20210921074159.20052-4-e@80x24.org> In-Reply-To: <20210921074159.20052-1-e@80x24.org> References: <20210921074159.20052-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: NNTP servers, IMAP servers, and various MUAs may recycle "unique" identifiers due to software bugs or careless BOFHs. Warn about them, but always be prepared to account for them. --- lib/PublicInbox/LeiImport.pm | 8 ++++++-- lib/PublicInbox/LeiImportKw.pm | 12 ++++++++---- lib/PublicInbox/LeiInspect.pm | 14 +++++++------- lib/PublicInbox/LeiLcat.pm | 7 ++----- lib/PublicInbox/LeiMailSync.pm | 20 ++++++++++---------- lib/PublicInbox/LeiRefreshMailSync.pm | 2 +- 6 files changed, 34 insertions(+), 29 deletions(-) diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 40530914..3c30db8d 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -37,8 +37,12 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn substr($folder, 0, 0) = 'maildir:'; # add prefix my $lse = $self->{lse} //= $self->{lei}->{sto}->search; my $lms = $self->{-lms_ro} //= $self->{lei}->lms; # may be 0 or undef - my $oidbin = $lms ? $lms->name_oidbin($folder, $bn) : undef; - my @docids = defined($oidbin) ? $lse->over->oidbin_exists($oidbin) : (); + my @oidbin = $lms ? $lms->name_oidbin($folder, $bn) : (); + @oidbin > 1 and $self->{lei}->err("W: $folder/*/$$bn not unique:\n", + map { "\t".unpack('H*', $_)."\n" } @oidbin); + my %seen; + my @docids = sort { $a <=> $b } grep { !$seen{$_}++ } + map { $lse->over->oidbin_exists($_) } @oidbin; my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; if (scalar @docids) { $lse->kw_changed(undef, $kw, \@docids) or return; diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm index 379101c2..21c93515 100644 --- a/lib/PublicInbox/LeiImportKw.pm +++ b/lib/PublicInbox/LeiImportKw.pm @@ -34,11 +34,15 @@ sub ipc_atfork_child { sub ck_update_kw { # via wq_io_do my ($self, $url, $uid, $kw) = @_; - my $oidbin = $self->{-lms_ro}->imap_oidbin($url, $uid) // return; - my @docids = $self->{over}->oidbin_exists($oidbin) or return; + my @oidbin = $self->{-lms_ro}->num_oidbin($url, $uid); + my $uid_url = "$url/;UID=$uid"; + @oidbin > 1 and $self->{lei}->err("W: $uid_url not unique:\n", + map { "\t".unpack('H*', $_)."\n" } @oidbin); + my %seen; + my @docids = sort { $a <=> $b } grep { !$seen{$_}++ } + map { $self->{over}->oidbin_exists($_) } @oidbin; $self->{lse}->kw_changed(undef, $kw, \@docids) or return; - $self->{verbose} and - $self->{lei}->qerr('# '.unpack('H*', $oidbin)." => @$kw\n"); + $self->{verbose} and $self->{lei}->qerr("# $uid_url => @$kw\n"); $self->{sto}->wq_do('set_eml_vmd', undef, { kw => $kw }, \@docids); } diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm index ab2c98d9..722ba5b2 100644 --- a/lib/PublicInbox/LeiInspect.pm +++ b/lib/PublicInbox/LeiInspect.pm @@ -32,11 +32,9 @@ sub inspect_imap_uid ($$) { my ($lei, $uid_uri) = @_; my $ent = {}; my $lms = $lei->lms or return $ent; - my $oidhex = $lms->imap_oid($lei, $uid_uri); - if (ref(my $err = $oidhex)) { # arg2folder error - $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; - } - $ent->{$$uid_uri} = $oidhex; + my @oidhex = $lms->imap_oidhex($lei, $uid_uri); + $ent->{$$uid_uri} = @oidhex == 1 ? $oidhex[0] : + ((@oidhex == 0) ? undef : \@oidhex); $ent; } @@ -54,8 +52,10 @@ sub inspect_nntp_range { } $end //= $beg; for my $art ($beg..$end) { - my $oidbin = $lms->imap_oidbin($folders->[0], $art); - $ent->{$art} = $oidbin ? unpack('H*', $oidbin) : undef; + my @oidhex = map { unpack('H*', $_) } + $lms->num_oidbin($folders->[0], $art); + $ent->{$art} = @oidhex == 1 ? $oidhex[0] : + ((@oidhex == 0) ? undef : \@oidhex); } $ret; } diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm index 1e54c3bf..8f8e83bc 100644 --- a/lib/PublicInbox/LeiLcat.pm +++ b/lib/PublicInbox/LeiLcat.pm @@ -32,11 +32,8 @@ sub lcat_imap_uri ($$) { my $lms = $lei->lms or return; # cf. LeiXsearch->lcat_dump if (defined $uri->uid) { - my $oidhex = $lms->imap_oid($lei, $uri); - if (ref(my $err = $oidhex)) { # art2folder error - $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; - } - push @{$lei->{lcat_blob}}, $oidhex; + my @oidhex = $lms->imap_oidhex($lei, $uri); + push @{$lei->{lcat_blob}}, @oidhex; } elsif (defined(my $fid = $lms->fid_for($$uri))) { push @{$lei->{lcat_fid}}, $fid; } else { diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index d9b9e117..3e725d30 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -66,6 +66,7 @@ CREATE TABLE IF NOT EXISTS blob2num ( oidbin VARBINARY NOT NULL, fid INTEGER NOT NULL, /* folder ID */ uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */ + /* not UNIQUE(fid, uid), since we may have broken servers */ UNIQUE (oidbin, fid, uid) ) @@ -78,6 +79,7 @@ CREATE TABLE IF NOT EXISTS blob2name ( oidbin VARBINARY NOT NULL, fid INTEGER NOT NULL, /* folder ID */ name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */ + /* not UNIQUE(fid, name), since we may have broken software */ UNIQUE (oidbin, fid, name) ) @@ -522,14 +524,14 @@ EOM } } -sub imap_oidbin ($$$) { - my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY - my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return; +sub num_oidbin ($$$) { + my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY if IMAP + my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return (); my $sth = $self->{dbh}->prepare_cached(<execute($fid, $uid); - $sth->fetchrow_array; + map { $_->[0] } @{$sth->fetchall_arrayref}; } sub name_oidbin ($$$) { @@ -539,10 +541,10 @@ sub name_oidbin ($$$) { SELECT oidbin FROM blob2name WHERE fid = ? AND name = ? EOM $sth->execute($fid, $nm); - $sth->fetchrow_array; + map { $_->[0] } @{$sth->fetchall_arrayref}; } -sub imap_oid { +sub imap_oidhex { my ($self, $lei, $uid_uri) = @_; my $mailbox_uri = $uid_uri->clone; $mailbox_uri->uid(undef); @@ -550,12 +552,10 @@ sub imap_oid { if (my $err = $self->arg2folder($lei, $folders)) { if ($err->{fail}) { $lei->qerr("# no sync information for $mailbox_uri"); - return; } $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; } - my $oidbin = imap_oidbin($self, $folders->[0], $uid_uri->uid); - $oidbin ? unpack('H*', $oidbin) : undef; + map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid) } 1; diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm index 2f105005..92673492 100644 --- a/lib/PublicInbox/LeiRefreshMailSync.pm +++ b/lib/PublicInbox/LeiRefreshMailSync.pm @@ -36,7 +36,7 @@ sub pmdir_cb { # called via LeiPmdir->each_mdir_fn my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or die "BUG: $f was not from a Maildir?"; substr($folder, 0, 0) = 'maildir:'; # add prefix - return if defined($self->{lms}->name_oidbin($folder, $bn)); + return if scalar($self->{lms}->name_oidbin($folder, $bn)); my $eml = eml_from_path($f) // return; my $oidbin = $self->{lei}->git_oid($eml)->digest; $self->{lms}->set_src($oidbin, $folder, \$bn);