about summary refs log tree commit homepage
diff options
context:
space:
mode:
-rw-r--r--lib/PublicInbox/LeiImport.pm18
-rw-r--r--lib/PublicInbox/LeiMailSync.pm21
-rw-r--r--lib/PublicInbox/NetReader.pm68
3 files changed, 88 insertions, 19 deletions
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 01e6c93c..f9a46ec5 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -45,7 +45,16 @@ sub input_net_cb { # imap_each / nntp_each
         my ($uri, $uid, $kw, $eml, $self) = @_;
         my $vmd = $self->{-import_kw} ? { kw => $kw } : undef;
         $vmd->{sync_info} = [ $$uri, $uid ] if $self->{-mail_sync};
-        $self->input_eml_cb($eml, $vmd);
+        if (defined $eml) {
+                $self->input_eml_cb($eml, $vmd);
+        } elsif ($vmd) { # old message, kw only
+                my $oid = $self->{-lms_ro}->imap_oid2($uri, $uid) // return;
+                my @docids = $self->{lse}->over->blob_exists($oid) or return;
+                my $lei = $self->{lei};
+                $lei->qerr("# $oid => @$kw\n") if $lei->{opt}->{verbose};
+                $self->{lei}->{sto}->ipc_do('set_eml_vmd', undef,
+                                                $vmd, \@docids);
+        }
 }
 
 sub do_import_index ($$@) {
@@ -65,7 +74,12 @@ sub do_import_index ($$@) {
                 # $j = $net->net_concurrency($j); TODO
                 if ($lei->{opt}->{incremental} // 1) {
                         $net->{incremental} = 1;
-                        $net->{-lms_ro} = $lei->_lei_store->search->lms // 0;
+                        $net->{-lms_ro} = $sto->search->lms // 0;
+                        if ($self->{-import_kw}) {
+                                $net->{each_old} = 1;
+                                $self->{-lms_ro} = $net->{-lms_ro};
+                                $self->{lse} = $sto->search;
+                        }
                 }
         } else {
                 my $nproc = $self->detect_nproc;
diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm
index c7f78239..36cd564c 100644
--- a/lib/PublicInbox/LeiMailSync.pm
+++ b/lib/PublicInbox/LeiMailSync.pm
@@ -361,6 +361,17 @@ sub forget_folder {
         $dbh->do('DELETE FROM folders WHERE fid = ?', undef, $fid);
 }
 
+sub imap_oid2 ($$$) {
+        my ($self, $uri, $uid) = @_; # $uri MUST have UIDVALIDITY
+        my $fid = $self->{fmap}->{"$uri"} //= fid_for($self, "$uri") // return;
+        my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
+SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
+EOM
+        $sth->execute($fid, $uid);
+        my ($oidbin) = $sth->fetchrow_array;
+        $oidbin ? unpack('H*', $oidbin) : undef;
+}
+
 sub imap_oid {
         my ($self, $lei, $uid_uri) = @_;
         my $mailbox_uri = $uid_uri->clone;
@@ -373,16 +384,10 @@ sub imap_oid {
                 }
                 $lei->qerr(@{$err->{qerr}}) if $err->{qerr};
         }
-        my $fid = $self->{fmap}->{$folders->[0]} //=
-                fid_for($self, $folders->[0]) // return;
-        my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1);
-SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ?
-EOM
-        $sth->execute($fid, $uid_uri->uid);
-        my ($oidbin) = $sth->fetchrow_array;
-        $oidbin ? unpack('H*', $oidbin) : undef;
+        imap_oid2($self, $folders->[0], $uid_uri->uid);
 }
 
+
 # FIXME: something with "lei <up|q>" is causing uncommitted transaction
 # warnings, not sure what...
 sub DESTROY {
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 54c6b082..b97444fd 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -396,10 +396,8 @@ sub errors {
         undef;
 }
 
-sub _imap_do_msg ($$$$$) {
-        my ($self, $uri, $uid, $raw, $flags) = @_;
-        # our target audience expects LF-only, save storage
-        $$raw =~ s/\r\n/\n/sg;
+sub flags2kw ($$$$) {
+        my ($self, $uri, $uid, $flags) = @_;
         my $kw = [];
         for my $f (split(/ /, $flags)) {
                 if (my $k = $IMAPflags2kw{$f}) {
@@ -412,6 +410,14 @@ sub _imap_do_msg ($$$$$) {
                 }
         }
         @$kw = sort @$kw; # for all UI/UX purposes
+        $kw;
+}
+
+sub _imap_do_msg ($$$$$) {
+        my ($self, $uri, $uid, $raw, $flags) = @_;
+        # our target audience expects LF-only, save storage
+        $$raw =~ s/\r\n/\n/sg;
+        my $kw = flags2kw($self, $uri, $uid, $flags) // return;
         my ($eml_cb, @args) = @{$self->{eml_each}};
         $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
 }
@@ -447,17 +453,56 @@ sub itrk_last ($$;$$) {
         ($itrk, $l_uid, $l_uidval //= $r_uidval);
 }
 
+# import flags of already-seen messages
+sub each_old_flags ($$$$) {
+        my ($self, $mic, $uri, $l_uid) = @_;
+        $l_uid ||= 1;
+        my $sec = uri_section($uri);
+        my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 10000;
+        my ($eml_cb, @args) = @{$self->{eml_each}};
+        for (my $n = 1; $n <= $l_uid; $n += $bs) {
+                my $end = $n + $bs;
+                $end = $l_uid if $end > $l_uid;
+                my $r = $mic->fetch_hash("$n:$end", 'FLAGS');
+                if (!$r) {
+                        return if $!{EINTR} && $self->{quit};
+                        return "E: $uri UID FETCH $n:$end error: $!";
+                }
+                while (my ($uid, $per_uid) = each %$r) {
+                        my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS})
+                                // next;
+                        $eml_cb->($uri, $uid, $kw, undef, @args);
+                }
+        }
+}
+
+# returns true if PERMANENTFLAGS indicates FLAGS of already imported
+# messages are meaningful
+sub perm_fl_ok ($) {
+        my ($perm_fl) = @_;
+        return if !defined($perm_fl);
+        for my $f (split(/[ \t]+/, $perm_fl)) {
+                return 1 if $IMAPflags2kw{$f};
+        }
+        undef;
+}
+
 sub _imap_fetch_all ($$$) {
         my ($self, $mic, $orig_uri) = @_;
         my $sec = uri_section($orig_uri);
         my $mbx = $orig_uri->mailbox;
         $mic->Clear(1); # trim results history
-        $mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!";
-        my ($r_uidval, $r_uidnext);
+
+        # we need to check for mailbox writability to see if we care about
+        # FLAGS from already-imported messages.
+        my $cmd = $self->{each_old} ? 'select' : 'examine';
+        $mic->$cmd($mbx) or return "E: \U$cmd\E $mbx ($sec) failed: $!";
+
+        my ($r_uidval, $r_uidnext, $perm_fl);
         for ($mic->Results) {
+                /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and $perm_fl = $1;
                 /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1;
                 /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1;
-                last if $r_uidval && $r_uidnext;
         }
         $r_uidval //= $mic->uidvalidity($mbx) //
                 return "E: $orig_uri cannot get UIDVALIDITY";
@@ -486,6 +531,13 @@ EOF
 E: $uri local UID exceeds remote ($l_uid > $r_uid)
 E: $uri strangely, UIDVALIDLITY matches ($l_uidval)
 EOF
+        $mic->Uid(1); # the default, we hope
+        my $err;
+        if (!defined($single_uid) && $self->{each_old} &&
+                                perm_fl_ok($perm_fl)) {
+                $err = each_old_flags($self, $mic, $uri, $l_uid);
+                return $err if $err;
+        }
         return if $l_uid >= $r_uid; # nothing to do
         $l_uid ||= 1;
         my ($mod, $shard) = @{$self->{shard_info} // []};
@@ -493,13 +545,11 @@ EOF
                 my $m = $mod ? " [(UID % $mod) == $shard]" : '';
                 warn "# $uri fetching UID $l_uid:$r_uid$m\n";
         }
-        $mic->Uid(1); # the default, we hope
         my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
         my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
         my $key = $req;
         $key =~ s/\.PEEK//;
         my ($uids, $batch);
-        my $err;
         do {
                 # I wish "UID FETCH $START:*" could work, but:
                 # 1) servers do not need to return results in any order