about summary refs log tree commit homepage
path: root/lib/PublicInbox
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-09-21 07:41:50 +0000
committerEric Wong <e@80x24.org>2021-09-21 19:18:35 +0000
commit8eaa877179910dce156179e9025d1e0df34089d8 (patch)
tree989189ccfd54d820144ce9593a9694fa18522d67 /lib/PublicInbox
parent0c385e6500f26babc47a0768b730ea38e290a5f5 (diff)
downloadpublic-inbox-8eaa877179910dce156179e9025d1e0df34089d8.tar.gz
NNTP servers, IMAP servers, and various MUAs may recycle
"unique" identifiers due to software bugs or careless BOFHs.
Warn about them, but always be prepared to account for them.
Diffstat (limited to 'lib/PublicInbox')
-rw-r--r--lib/PublicInbox/LeiImport.pm8
-rw-r--r--lib/PublicInbox/LeiImportKw.pm12
-rw-r--r--lib/PublicInbox/LeiInspect.pm14
-rw-r--r--lib/PublicInbox/LeiLcat.pm7
-rw-r--r--lib/PublicInbox/LeiMailSync.pm20
-rw-r--r--lib/PublicInbox/LeiRefreshMailSync.pm2
6 files changed, 34 insertions, 29 deletions
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 40530914..3c30db8d 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -37,8 +37,12 @@ sub pmdir_cb { # called via wq_io_do from LeiPmdir->each_mdir_fn
         substr($folder, 0, 0) = 'maildir:'; # add prefix
         my $lse = $self->{lse} //= $self->{lei}->{sto}->search;
         my $lms = $self->{-lms_ro} //= $self->{lei}->lms; # may be 0 or undef
-        my $oidbin = $lms ? $lms->name_oidbin($folder, $bn) : undef;
-        my @docids = defined($oidbin) ? $lse->over->oidbin_exists($oidbin) : ();
+        my @oidbin = $lms ? $lms->name_oidbin($folder, $bn) : ();
+        @oidbin > 1 and $self->{lei}->err("W: $folder/*/$$bn not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my %seen;
+        my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
+                        map { $lse->over->oidbin_exists($_) } @oidbin;
         my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
         if (scalar @docids) {
                 $lse->kw_changed(undef, $kw, \@docids) or return;
diff --git a/lib/PublicInbox/LeiImportKw.pm b/lib/PublicInbox/LeiImportKw.pm
index 379101c2..21c93515 100644
--- a/lib/PublicInbox/LeiImportKw.pm
+++ b/lib/PublicInbox/LeiImportKw.pm
@@ -34,11 +34,15 @@ sub ipc_atfork_child {
 
 sub ck_update_kw { # via wq_io_do
         my ($self, $url, $uid, $kw) = @_;
-        my $oidbin = $self->{-lms_ro}->imap_oidbin($url, $uid) // return;
-        my @docids = $self->{over}->oidbin_exists($oidbin) or return;
+        my @oidbin = $self->{-lms_ro}->num_oidbin($url, $uid);
+        my $uid_url = "$url/;UID=$uid";
+        @oidbin > 1 and $self->{lei}->err("W: $uid_url not unique:\n",
+                                map { "\t".unpack('H*', $_)."\n" } @oidbin);
+        my %seen;
+        my @docids = sort { $a <=> $b } grep { !$seen{$_}++ }
+                map { $self->{over}->oidbin_exists($_) } @oidbin;
         $self->{lse}->kw_changed(undef, $kw, \@docids) or return;
-        $self->{verbose} and
-                $self->{lei}->qerr('# '.unpack('H*', $oidbin)." => @$kw\n");
+        $self->{verbose} and $self->{lei}->qerr("# $uid_url => @$kw\n");
         $self->{sto}->wq_do('set_eml_vmd', undef, { kw => $kw }, \@docids);
 }
 
diff --git a/lib/PublicInbox/LeiInspect.pm b/lib/PublicInbox/LeiInspect.pm
index ab2c98d9..722ba5b2 100644
--- a/lib/PublicInbox/LeiInspect.pm
+++ b/lib/PublicInbox/LeiInspect.pm
@@ -32,11 +32,9 @@ sub inspect_imap_uid ($$) {
         my ($lei, $uid_uri) = @_;
         my $ent = {};
         my $lms = $lei->lms or return $ent;
-        my $oidhex = $lms->imap_oid($lei, $uid_uri);
-        if (ref(my $err = $oidhex)) { # arg2folder error
-                $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
-        }
-        $ent->{$$uid_uri} = $oidhex;
+        my @oidhex = $lms->imap_oidhex($lei, $uid_uri);
+        $ent->{$$uid_uri} = @oidhex == 1 ? $oidhex[0] :
+                        ((@oidhex == 0) ? undef : \@oidhex);
         $ent;
 }
 
@@ -54,8 +52,10 @@ sub inspect_nntp_range {
         }
         $end //= $beg;
         for my $art ($beg..$end) {
-                my $oidbin = $lms->imap_oidbin($folders->[0], $art);
-                $ent->{$art} = $oidbin ? unpack('H*', $oidbin) : undef;
+                my @oidhex = map { unpack('H*', $_) }
+                        $lms->num_oidbin($folders->[0], $art);
+                $ent->{$art} = @oidhex == 1 ? $oidhex[0] :
+                                ((@oidhex == 0) ? undef : \@oidhex);
         }
         $ret;
 }
diff --git a/lib/PublicInbox/LeiLcat.pm b/lib/PublicInbox/LeiLcat.pm
index 1e54c3bf..8f8e83bc 100644
--- a/lib/PublicInbox/LeiLcat.pm
+++ b/lib/PublicInbox/LeiLcat.pm
@@ -32,11 +32,8 @@ sub lcat_imap_uri ($$) {
         my $lms = $lei->lms or return;
         # cf. LeiXsearch->lcat_dump
         if (defined $uri->uid) {
-                my $oidhex = $lms->imap_oid($lei, $uri);
-                if (ref(my $err = $oidhex)) { # art2folder error
-                        $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
-                }
-                push @{$lei->{lcat_blob}}, $oidhex;
+                my @oidhex = $lms->imap_oidhex($lei, $uri);
+                push @{$lei->{lcat_blob}}, @oidhex;
         } elsif (defined(my $fid = $lms->fid_for($$uri))) {
                 push @{$lei->{lcat_fid}}, $fid;
         } else {
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index d9b9e117..3e725d30 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -66,6 +66,7 @@ CREATE TABLE IF NOT EXISTS blob2num (
         oidbin VARBINARY NOT NULL,
         fid INTEGER NOT NULL, /* folder ID */
         uid INTEGER NOT NULL, /* NNTP article number, IMAP UID, MH number */
+        /* not UNIQUE(fid, uid), since we may have broken servers */
         UNIQUE (oidbin, fid, uid)
 )
 
@@ -78,6 +79,7 @@ CREATE TABLE IF NOT EXISTS blob2name (
         oidbin VARBINARY NOT NULL,
         fid INTEGER NOT NULL, /* folder ID */
         name VARBINARY NOT NULL, /* Maildir basename, JMAP blobId */
+        /* not UNIQUE(fid, name), since we may have broken software */
         UNIQUE (oidbin, fid, name)
 )
 
@@ -522,14 +524,14 @@ EOM
         }
 }
 
-sub imap_oidbin ($$$) {
-        my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY
-        my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return;
+sub num_oidbin ($$$) {
+        my ($self, $url, $uid) = @_; # $url MUST have UIDVALIDITY if IMAP
+        my $fid = $self->{fmap}->{$url} //= fid_for($self, $url) // return ();
         my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
-SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
+SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
 EOM
         $sth->execute($fid, $uid);
-        $sth->fetchrow_array;
+        map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
 sub name_oidbin ($$$) {
@@ -539,10 +541,10 @@ sub name_oidbin ($$$) {
 SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
 EOM
         $sth->execute($fid, $nm);
-        $sth->fetchrow_array;
+        map { $_->[0] } @{$sth->fetchall_arrayref};
 }
 
-sub imap_oid {
+sub imap_oidhex {
         my ($self, $lei, $uid_uri) = @_;
         my $mailbox_uri = $uid_uri->clone;
         $mailbox_uri->uid(undef);
@@ -550,12 +552,10 @@ sub imap_oid {
         if (my $err = $self->arg2folder($lei, $folders)) {
                 if ($err->{fail}) {
                         $lei->qerr("# no sync information for $mailbox_uri");
-                        return;
                 }
                 $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
         }
-        my $oidbin = imap_oidbin($self, $folders->[0], $uid_uri->uid);
-        $oidbin ? unpack('H*', $oidbin) : undef;
+        map { unpack('H*',$_) } num_oidbin($self, $folders->[0], $uid_uri->uid)
 }
 
 1;
diff --git a/lib/PublicInbox/LeiRefreshMailSync.pm b/lib/PublicInbox/LeiRefreshMailSync.pm
index 2f105005..92673492 100644
--- a/lib/PublicInbox/LeiRefreshMailSync.pm
+++ b/lib/PublicInbox/LeiRefreshMailSync.pm
@@ -36,7 +36,7 @@ sub pmdir_cb { # called via LeiPmdir->each_mdir_fn
         my ($folder, $bn) = ($f =~ m!\A(.+?)/(?:new|cur)/([^/]+)\z!) or
                 die "BUG: $f was not from a Maildir?";
         substr($folder, 0, 0) = 'maildir:'; # add prefix
-        return if defined($self->{lms}->name_oidbin($folder, $bn));
+        return if scalar($self->{lms}->name_oidbin($folder, $bn));
         my $eml = eml_from_path($f) // return;
         my $oidbin = $self->{lei}->git_oid($eml)->digest;
         $self->{lms}->set_src($oidbin, $folder, \$bn);