about summary refs log tree commit homepage
path: root/lib/PublicInbox/LeiMailSync.pm
diff options
context:
space:
mode:
Diffstat (limited to 'lib/PublicInbox/LeiMailSync.pm')
-rw-r--r--lib/PublicInbox/LeiMailSync.pm309
1 files changed, 229 insertions, 80 deletions
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index 91cd1c93..c498421c 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -1,4 +1,4 @@
-# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# Copyright (C) all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 
 # for maintaining synchronization between lei/store <=> Maildir|MH|IMAP|JMAP
@@ -6,28 +6,30 @@ package PublicInbox::LeiMailSync;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::Lock);
-use DBI;
+use PublicInbox::Compat qw(uniqstr);
+use DBI qw(:sql_types); # SQL_BLOB
 use PublicInbox::ContentHash qw(git_sha);
 use Carp ();
+use PublicInbox::Git qw(%HEXLEN2SHA);
+use PublicInbox::IO qw(read_all);
 
 sub dbh_new {
-        my ($self, $rw) = @_;
+        my ($self) = @_;
         my $f = $self->{filename};
-        my $creat = $rw && !-s $f;
+        my $creat = !-s $f;
         if ($creat) {
-                require PublicInbox::Spawn;
+                require PublicInbox::Syscall;
                 open my $fh, '+>>', $f or Carp::croak "open($f): $!";
-                PublicInbox::Spawn::nodatacow_fd(fileno($fh));
+                PublicInbox::Syscall::nodatacow_fh($fh);
         }
         my $dbh = DBI->connect("dbi:SQLite:dbname=$f",'','', {
                 AutoCommit => 1,
                 RaiseError => 1,
                 PrintError => 0,
-                ReadOnly => !$rw,
                 sqlite_use_immediate_transaction => 1,
         });
         # no sqlite_unicode, here, all strings are binary
-        create_tables($self, $dbh) if $rw;
+        create_tables($self, $dbh);
         $dbh->do('PRAGMA journal_mode = WAL') if $creat;
         $dbh->do('PRAGMA case_sensitive_like = ON');
         $dbh;
@@ -42,12 +44,13 @@ sub new {
         }, $cls;
 }
 
-sub lms_write_prepare { ($_[0]->{dbh} //= dbh_new($_[0], 1)); $_[0] }
+sub lms_write_prepare { ($_[0]->{dbh} //= dbh_new($_[0])); $_[0] }
 
 sub lms_pause {
         my ($self) = @_;
         $self->{fmap} = {};
-        delete $self->{dbh};
+        my $dbh = delete $self->{dbh};
+        eval { $dbh->do('PRAGMA optimize') } if $dbh;
 }
 
 sub create_tables {
@@ -89,38 +92,68 @@ CREATE INDEX IF NOT EXISTS idx_fid_name ON blob2name(fid,name)
 
 }
 
+# used to fixup pre-1.7.0 folders
+sub update_fid ($$$) {
+        my ($dbh, $fid, $loc) = @_;
+        my $sth = $dbh->prepare(<<'');
+UPDATE folders SET loc = ? WHERE fid = ?
+
+        $sth->bind_param(1, $loc, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->execute;
+}
+
+sub get_fid ($$$) {
+        my ($sth, $folder, $dbh) = @_;
+        $sth->bind_param(1, $folder, SQL_BLOB);
+        $sth->execute;
+        my ($fid) = $sth->fetchrow_array;
+        if (defined $fid) { # for downgrade+upgrade (1.8 -> 1.7 -> 1.8)
+                my $del = $dbh->prepare_cached(<<'');
+DELETE FROM folders WHERE loc = ? AND fid != ?
+
+                $del->execute($folder, $fid);
+        } else {
+                $sth->bind_param(1, $folder, SQL_VARCHAR);
+                $sth->execute; # fixup old stuff
+                ($fid) = $sth->fetchrow_array;
+                update_fid($dbh, $fid, $folder) if defined($fid);
+        }
+        $fid;
+}
+
 sub fid_for {
-        my ($self, $folder, $rw) = @_;
-        my $dbh = $self->{dbh} //= dbh_new($self, $rw);
-        my $sel = 'SELECT fid FROM folders WHERE loc = ? LIMIT 1';
-        my ($fid) = $dbh->selectrow_array($sel, undef, $folder);
-        return $fid if defined $fid;
+        my ($self, $folder, $creat) = @_;
+        my $dbh = $self->{dbh} //= dbh_new($self);
+        my $sth = $dbh->prepare_cached(<<'', undef, 1);
+SELECT fid FROM folders WHERE loc = ? LIMIT 1
+
+        my $fid = get_fid($sth, $folder, $dbh);
+        return $fid if defined($fid);
 
         # caller had trailing slash (LeiToMail)
         if ($folder =~ s!\A((?:maildir|mh):.*?)/+\z!$1!i) {
-                ($fid) = $dbh->selectrow_array($sel, undef, $folder);
+                $fid = get_fid($sth, $folder, $dbh);
                 if (defined $fid) {
-                        $dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+                        update_fid($dbh, $fid, $folder);
                         return $fid;
                 }
         # sometimes we stored trailing slash..
         } elsif ($folder =~ m!\A(?:maildir|mh):!i) {
-                ($fid) = $dbh->selectrow_array($sel, undef, "$folder/");
+                $fid = get_fid($sth, $folder, $dbh);
                 if (defined $fid) {
-                        $dbh->do(<<EOM, undef, $folder, $fid) if $rw;
-UPDATE folders SET loc = ? WHERE fid = ?
-EOM
+                        update_fid($dbh, $fid, $folder);
                         return $fid;
                 }
-        } elsif ($rw && $folder =~ m!\Aimaps?://!i) {
+        } elsif ($creat && $folder =~ m!\Aimaps?://!i) {
                 require PublicInbox::URIimap;
-                PublicInbox::URIimap->new($folder)->uidvalidity //
+                my $uri = PublicInbox::URIimap->new($folder);
+                $uri->uidvalidity //
                         Carp::croak("BUG: $folder has no UIDVALIDITY");
+                defined($uri->uid) and Carp::confess("BUG: $folder has UID");
         }
-        return unless $rw;
 
+        return unless $creat;
         ($fid) = $dbh->selectrow_array('SELECT MAX(fid) FROM folders');
 
         $fid += 1;
@@ -128,8 +161,10 @@ EOM
         $dbh->do('DELETE FROM blob2name WHERE fid = ?', undef, $fid);
         $dbh->do('DELETE FROM blob2num WHERE fid = ?', undef, $fid);
 
-        my $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
-        $sth->execute($fid, $folder);
+        $sth = $dbh->prepare('INSERT INTO folders (fid, loc) VALUES (?, ?)');
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $folder, SQL_BLOB);
+        $sth->execute;
 
         $fid;
 }
@@ -144,36 +179,60 @@ sub set_src {
         my ($self, $oidbin, $folder, $id) = @_;
         my $lk = $self->lock_for_scope;
         my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
-        my $sth;
+        my $dbh = $self->{dbh};
+        my ($sth, @param3, $del_old);
         if (ref($id)) { # scalar name
-                $id = $$id;
-                $sth = $self->{dbh}->prepare_cached(<<'');
+                @param3 = ($$id, SQL_BLOB);
+                $sth = $dbh->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 
+                $del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2name WHERE oidbin = ? AND fid = ? AND name = ?
+
         } else { # numeric ID (IMAP UID, MH number)
-                $sth = $self->{dbh}->prepare_cached(<<'');
+                @param3 = ($id);
+                $sth = $dbh->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2num (oidbin, fid, uid) VALUES (?, ?, ?)
 
+                $del_old = $dbh->prepare_cached(<<'');
+DELETE FROM blob2num WHERE oidbin = ? AND fid = ? AND uid = ?
+
         }
-        $sth->execute($oidbin, $fid, $id);
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->bind_param(3, @param3);
+        my $ret = $sth->execute;
+        $del_old->execute($oidbin, $fid, $param3[0]);
+        $ret;
 }
 
 sub clear_src {
         my ($self, $folder, $id) = @_;
         my $lk = $self->lock_for_scope;
         my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
-        my $sth;
+        my ($sth, @param3);
         if (ref($id)) { # scalar name
-                $id = $$id;
+                @param3 = ($$id, SQL_BLOB);
                 $sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM blob2name WHERE fid = ? AND name = ?
 
         } else {
+                @param3 = ($id);
                 $sth = $self->{dbh}->prepare_cached(<<'');
 DELETE FROM blob2num WHERE fid = ? AND uid = ?
 
         }
-        $sth->execute($fid, $id);
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, @param3);
+        my $ret = $sth->execute;
+
+        # older versions may not have used SQL_BLOB:
+        if (defined($ret) && $ret == 0 && scalar(@param3) == 2) {
+                $sth->bind_param(1, $fid);
+                $sth->bind_param(2, $param3[0]);
+                $ret = $sth->execute;
+        }
+        $ret;
 }
 
 # Maildir-only
@@ -181,16 +240,34 @@ sub mv_src {
         my ($self, $folder, $oidbin, $id, $newbn) = @_;
         my $lk = $self->lock_for_scope;
         my $fid = $self->{fmap}->{$folder} //= fid_for($self, $folder, 1);
+        $self->{dbh}->begin_work;
         my $sth = $self->{dbh}->prepare_cached(<<'');
 UPDATE blob2name SET name = ? WHERE fid = ? AND oidbin = ? AND name = ?
 
-        my $nr = $sth->execute($newbn, $fid, $oidbin, $$id);
-        if ($nr == 0) { # may race with a clear_src, ensure new value exists
+        # eval since unique constraint may fail due to race
+        $sth->bind_param(1, $newbn, SQL_BLOB);
+        $sth->bind_param(2, $fid);
+        $sth->bind_param(3, $oidbin, SQL_BLOB);
+        $sth->bind_param(4, $$id, SQL_BLOB);
+        my $nr = eval { $sth->execute };
+        if (!defined($nr) || $nr == 0) { # $nr may be `0E0'
+                # delete from old, pre-SQL_BLOB rows:
+                my $del_old = $self->{dbh}->prepare_cached(<<'');
+DELETE FROM blob2name WHERE fid = ? AND oidbin = ? AND name = ?
+
+                $del_old->execute($fid, $oidbin, $$id); # missing-OK
+                $del_old->execute($fid, $oidbin, $newbn); # ditto
+
+                # may race with a clear_src, ensure new value exists
                 $sth = $self->{dbh}->prepare_cached(<<'');
 INSERT OR IGNORE INTO blob2name (oidbin, fid, name) VALUES (?, ?, ?)
 
-                $sth->execute($oidbin, $fid, $newbn);
+                $sth->bind_param(1, $oidbin, SQL_BLOB);
+                $sth->bind_param(2, $fid);
+                $sth->bind_param(3, $newbn, SQL_BLOB);
+                $sth->execute;
         }
+        $self->{dbh}->commit;
 }
 
 # read-only, iterates every oidbin + UID or name for a given folder
@@ -265,35 +342,79 @@ SELECT $op(uid) FROM blob2num WHERE fid = ?
         $ret;
 }
 
+# must be called with lock
+sub _forget_fids ($;@) {
+        my $dbh = shift;
+        $dbh->begin_work;
+        for my $t (qw(blob2name blob2num folders)) {
+                my $sth = $dbh->prepare_cached("DELETE FROM $t WHERE fid = ?");
+                $sth->execute($_) for @_;
+        }
+        $dbh->commit;
+}
+
 # returns a { location => [ list-of-ids-or-names ] } mapping
 sub locations_for {
         my ($self, $oidbin) = @_;
-        my ($fid, $sth, $id, %fid2id);
+        my ($fid, $sth, $id, %fid2id, %seen);
         my $dbh = $self->{dbh} //= dbh_new($self);
         $sth = $dbh->prepare('SELECT fid,uid FROM blob2num WHERE oidbin = ?');
-        $sth->execute($oidbin);
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->execute;
         while (my ($fid, $uid) = $sth->fetchrow_array) {
                 push @{$fid2id{$fid}}, $uid;
+                $seen{"$uid.$fid"} = 1;
         }
+
+        # deal with 1.7.0 DBs :<
+        $sth->bind_param(1, $oidbin, SQL_VARCHAR);
+        $sth->execute;
+        while (my ($fid, $uid) = $sth->fetchrow_array) {
+                next if $seen{"$uid.$fid"};
+                push @{$fid2id{$fid}}, $uid;
+        }
+
+        %seen = ();
         $sth = $dbh->prepare('SELECT fid,name FROM blob2name WHERE oidbin = ?');
-        $sth->execute($oidbin);
+        $sth->bind_param(1, $oidbin, SQL_BLOB);
+        $sth->execute;
+        while (my ($fid, $name) = $sth->fetchrow_array) {
+                push @{$fid2id{$fid}}, $name;
+                $seen{"$fid.$name"} = 1;
+        }
+
+        # deal with 1.7.0 DBs :<
+        $sth->bind_param(1, $oidbin, SQL_VARCHAR);
+        $sth->execute;
         while (my ($fid, $name) = $sth->fetchrow_array) {
+                next if $seen{"$fid.$name"};
                 push @{$fid2id{$fid}}, $name;
         }
+
         $sth = $dbh->prepare('SELECT loc FROM folders WHERE fid = ? LIMIT 1');
         my $ret = {};
+        my $drop_fids = $dbh->{ReadOnly} ? undef : {};
         while (my ($fid, $ids) = each %fid2id) {
                 $sth->execute($fid);
                 my ($loc) = $sth->fetchrow_array;
                 unless (defined $loc) {
+                        my $del = '';
+                        if ($drop_fids) {
+                                $del = ' (deleting)';
+                                $drop_fids->{$fid} = $fid;
+                        }
                         my $oidhex = unpack('H*', $oidbin);
-                        warn "E: fid=$fid for $oidhex unknown:\n", map {
-                                        'E: '.(ref() ? $$_ : "#$_")."\n";
+                        warn "E: fid=$fid for $oidhex stale/unknown:\n", map {
+                                        'E: '.(ref() ? $$_ : "#$_")."$del\n";
                                 } @$ids;
                         next;
                 }
                 $ret->{$loc} = $ids;
         }
+        if ($drop_fids && scalar(values %$drop_fids)) {
+                my $lk = $self->lock_for_scope;
+                _forget_fids($self->{dbh}, values %$drop_fids);
+        }
         scalar(keys %$ret) ? $ret : undef;
 }
 
@@ -301,29 +422,42 @@ sub locations_for {
 sub folders {
         my ($self, @pfx) = @_;
         my $sql = 'SELECT loc FROM folders';
+        my $re;
         if (defined($pfx[0])) {
-                $sql .= ' WHERE loc LIKE ? ESCAPE ?';
-                my $anywhere = !!$pfx[1];
-                $pfx[1] = '\\';
-                $pfx[0] =~ s/([%_\\])/\\$1/g; # glob chars
-                $pfx[0] .= '%';
-                substr($pfx[0], 0, 0, '%') if $anywhere;
-        } else {
-                @pfx = (); # [0] may've been undef
+                $sql .= ' WHERE loc REGEXP ?'; # DBD::SQLite uses perlre
+                if (ref($pfx[0])) { # assume qr// "Regexp"
+                        $re = $pfx[0];
+                } else {
+                        $re = !!$pfx[1] ? '.*' : '';
+                        $re .= quotemeta($pfx[0]);
+                        $re .= '.*';
+                }
         }
-        my $dbh = $self->{dbh} //= dbh_new($self);
-        map { $_->[0] } @{$dbh->selectall_arrayref($sql, undef, @pfx)};
+        my $sth = ($self->{dbh} //= dbh_new($self))->prepare($sql);
+        $sth->bind_param(1, $re) if defined($re);
+        $sth->execute;
+        map { $_->[0] } @{$sth->fetchall_arrayref};
+}
+
+sub blob_mismatch ($$$) {
+        my ($f, $oidhex, $rawref) = @_;
+        my $sha = $HEXLEN2SHA{length($oidhex)};
+        my $got = git_sha($sha, $rawref)->hexdigest;
+        $got eq $oidhex ? undef : warn("$f changed $oidhex => $got\n");
 }
 
 sub local_blob {
         my ($self, $oidhex, $vrfy) = @_;
         my $dbh = $self->{dbh} //= dbh_new($self);
+        my $oidbin = pack('H*', $oidhex);
+
         my $b2n = $dbh->prepare(<<'');
 SELECT f.loc,b.name FROM blob2name b
 LEFT JOIN folders f ON b.fid = f.fid
 WHERE b.oidbin = ?
 
-        $b2n->execute(pack('H*', $oidhex));
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
         while (my ($d, $n) = $b2n->fetchrow_array) {
                 substr($d, 0, length('maildir:')) = '';
                 # n.b. both mbsync and offlineimap use ":2," as a suffix
@@ -335,16 +469,28 @@ WHERE b.oidbin = ?
                         my $f = "$d/$x/$n";
                         open my $fh, '<', $f or next;
                         # some (buggy) Maildir writers are non-atomic:
-                        next unless -s $fh;
-                        local $/;
-                        my $raw = <$fh>;
-                        if ($vrfy && git_sha(1, \$raw)->hexdigest ne $oidhex) {
-                                warn "$f changed $oidhex\n";
-                                next;
-                        }
+                        my $raw = read_all($fh, -s $fh // next);
+                        next if $vrfy && blob_mismatch $f, $oidhex, \$raw;
                         return \$raw;
                 }
         }
+
+        # MH, except `uid' is not always unique (can be packed)
+        $b2n = $dbh->prepare(<<'');
+SELECT f.loc,b.uid FROM blob2num b
+LEFT JOIN folders f ON b.fid = f.fid
+WHERE b.oidbin = ? AND f.loc REGEXP '^mh:/'
+
+        $b2n->bind_param(1, $oidbin, SQL_BLOB);
+        $b2n->execute;
+        while (my ($f, $n) = $b2n->fetchrow_array) {
+                $f =~ s/\Amh://s or die "BUG: not MH: $f";
+                $f .= "/$n";
+                open my $fh, '<', $f or next;
+                my $raw = read_all($fh, -s $fh // next);
+                next if blob_mismatch $f, $oidhex, \$raw;
+                return \$raw;
+        }
         undef;
 }
 
@@ -420,20 +566,19 @@ EOM
 --all=@no not accepted (must be `local' and/or `remote')
 EOM
         }
-        my (%seen, @inc);
         my @all = $self->folders;
         for my $ok (@ok) {
                 if ($ok eq 'local') {
-                        @inc = grep(!m!\A[a-z0-9\+]+://!i, @all);
+                        push @$folders, grep(!m!\A[a-z0-9\+]+://!i, @all);
                 } elsif ($ok eq 'remote') {
-                        @inc = grep(m!\A[a-z0-9\+]+://!i, @all);
+                        push @$folders, grep(m!\A[a-z0-9\+]+://!i, @all);
                 } elsif ($ok ne '') {
                         return $lei->fail("--all=$all not understood");
                 } else {
-                        @inc = @all;
+                        push @$folders, @all;
                 }
-                push(@$folders, (grep { !$seen{$_}++ } @inc));
         }
+        @$folders = uniqstr @$folders;
         scalar(@$folders) || $lei->fail(<<EOM);
 no --mail-sync folders known to lei
 EOM
@@ -465,7 +610,7 @@ sub arg2folder {
 # using `$res' instead of `$orig'
 EOM
                         } else {
-                                $lei->err($res) if defined $res;
+                                warn($res, "\n") if defined $res;
                                 push @no, $orig;
                         }
                 } elsif (m!\A(?:nntps?|s?news)://!i) {
@@ -477,7 +622,7 @@ EOM
 # using `$res' instead of `$orig'
 EOM
                         } else {
-                                $lei->err($res) if defined $res;
+                                warn($res, "\n") if defined $res;
                                 push @no, $orig;
                         }
                 } else {
@@ -496,14 +641,10 @@ EOF
 sub forget_folders {
         my ($self, @folders) = @_;
         my $lk = $self->lock_for_scope;
-        for my $folder (@folders) {
-                my $fid = delete($self->{fmap}->{$folder}) //
-                        fid_for($self, $folder) // next;
-                for my $t (qw(blob2name blob2num folders)) {
-                        $self->{dbh}->do("DELETE FROM $t WHERE fid = ?",
-                                        undef, $fid);
-                }
-        }
+        _forget_fids($self->{dbh}, map {
+                delete($self->{fmap}->{$_}) //
+                        fid_for($self, $_) // ();
+        } @folders);
 }
 
 # only used for changing canonicalization errors
@@ -537,7 +678,8 @@ sub num_oidbin ($$$) {
 SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? ORDER BY _rowid_
 EOM
         $sth->execute($fid, $uid);
-        map { $_->[0] } @{$sth->fetchall_arrayref};
+        # for public-inbox <= 1.7.0:
+        uniqstr(map { $_->[0] } @{$sth->fetchall_arrayref});
 }
 
 sub name_oidbin ($$$) {
@@ -546,8 +688,15 @@ sub name_oidbin ($$$) {
         my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
 SELECT oidbin FROM blob2name WHERE fid = ? AND name = ?
 EOM
-        $sth->execute($fid, $nm);
-        map { $_->[0] } @{$sth->fetchall_arrayref};
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $nm, SQL_BLOB);
+        $sth->execute;
+        my @bin = map { $_->[0] } @{$sth->fetchall_arrayref};
+        $sth->bind_param(1, $fid);
+        $sth->bind_param(2, $nm, SQL_VARCHAR);
+        $sth->execute;
+        my @old = map { $_->[0] } @{$sth->fetchall_arrayref};
+        uniqstr @bin, @old # for public-inbox <= 1.7.0
 }
 
 sub imap_oidhex {