diff options
-rw-r--r-- | lib/PublicInbox/LeiImport.pm | 18 | ||||
-rw-r--r-- | lib/PublicInbox/LeiMailSync.pm | 21 | ||||
-rw-r--r-- | lib/PublicInbox/NetReader.pm | 68 |
3 files changed, 88 insertions, 19 deletions
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 01e6c93c..f9a46ec5 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -45,7 +45,16 @@ sub input_net_cb { # imap_each / nntp_each my ($uri, $uid, $kw, $eml, $self) = @_; my $vmd = $self->{-import_kw} ? { kw => $kw } : undef; $vmd->{sync_info} = [ $$uri, $uid ] if $self->{-mail_sync}; - $self->input_eml_cb($eml, $vmd); + if (defined $eml) { + $self->input_eml_cb($eml, $vmd); + } elsif ($vmd) { # old message, kw only + my $oid = $self->{-lms_ro}->imap_oid2($uri, $uid) // return; + my @docids = $self->{lse}->over->blob_exists($oid) or return; + my $lei = $self->{lei}; + $lei->qerr("# $oid => @$kw\n") if $lei->{opt}->{verbose}; + $self->{lei}->{sto}->ipc_do('set_eml_vmd', undef, + $vmd, \@docids); + } } sub do_import_index ($$@) { @@ -65,7 +74,12 @@ sub do_import_index ($$@) { # $j = $net->net_concurrency($j); TODO if ($lei->{opt}->{incremental} // 1) { $net->{incremental} = 1; - $net->{-lms_ro} = $lei->_lei_store->search->lms // 0; + $net->{-lms_ro} = $sto->search->lms // 0; + if ($self->{-import_kw}) { + $net->{each_old} = 1; + $self->{-lms_ro} = $net->{-lms_ro}; + $self->{lse} = $sto->search; + } } } else { my $nproc = $self->detect_nproc; diff --git a/lib/PublicInbox/LeiMailSync.pm b/lib/PublicInbox/LeiMailSync.pm index c7f78239..36cd564c 100644 --- a/lib/PublicInbox/LeiMailSync.pm +++ b/lib/PublicInbox/LeiMailSync.pm @@ -361,6 +361,17 @@ sub forget_folder { $dbh->do('DELETE FROM folders WHERE fid = ?', undef, $fid); } +sub imap_oid2 ($$$) { + my ($self, $uri, $uid) = @_; # $uri MUST have UIDVALIDITY + my $fid = $self->{fmap}->{"$uri"} //= fid_for($self, "$uri") // return; + my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1); +SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? +EOM + $sth->execute($fid, $uid); + my ($oidbin) = $sth->fetchrow_array; + $oidbin ? unpack('H*', $oidbin) : undef; +} + sub imap_oid { my ($self, $lei, $uid_uri) = @_; my $mailbox_uri = $uid_uri->clone; @@ -373,16 +384,10 @@ sub imap_oid { } $lei->qerr(@{$err->{qerr}}) if $err->{qerr}; } - my $fid = $self->{fmap}->{$folders->[0]} //= - fid_for($self, $folders->[0]) // return; - my $sth = $self->{dbh}->prepare_cached(<<EOM, undef, 1); -SELECT oidbin FROM blob2num WHERE fid = ? AND uid = ? -EOM - $sth->execute($fid, $uid_uri->uid); - my ($oidbin) = $sth->fetchrow_array; - $oidbin ? unpack('H*', $oidbin) : undef; + imap_oid2($self, $folders->[0], $uid_uri->uid); } + # FIXME: something with "lei <up|q>" is causing uncommitted transaction # warnings, not sure what... sub DESTROY { diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm index 54c6b082..b97444fd 100644 --- a/lib/PublicInbox/NetReader.pm +++ b/lib/PublicInbox/NetReader.pm @@ -396,10 +396,8 @@ sub errors { undef; } -sub _imap_do_msg ($$$$$) { - my ($self, $uri, $uid, $raw, $flags) = @_; - # our target audience expects LF-only, save storage - $$raw =~ s/\r\n/\n/sg; +sub flags2kw ($$$$) { + my ($self, $uri, $uid, $flags) = @_; my $kw = []; for my $f (split(/ /, $flags)) { if (my $k = $IMAPflags2kw{$f}) { @@ -412,6 +410,14 @@ sub _imap_do_msg ($$$$$) { } } @$kw = sort @$kw; # for all UI/UX purposes + $kw; +} + +sub _imap_do_msg ($$$$$) { + my ($self, $uri, $uid, $raw, $flags) = @_; + # our target audience expects LF-only, save storage + $$raw =~ s/\r\n/\n/sg; + my $kw = flags2kw($self, $uri, $uid, $flags) // return; my ($eml_cb, @args) = @{$self->{eml_each}}; $eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args); } @@ -447,17 +453,56 @@ sub itrk_last ($$;$$) { ($itrk, $l_uid, $l_uidval //= $r_uidval); } +# import flags of already-seen messages +sub each_old_flags ($$$$) { + my ($self, $mic, $uri, $l_uid) = @_; + $l_uid ||= 1; + my $sec = uri_section($uri); + my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 10000; + my ($eml_cb, @args) = @{$self->{eml_each}}; + for (my $n = 1; $n <= $l_uid; $n += $bs) { + my $end = $n + $bs; + $end = $l_uid if $end > $l_uid; + my $r = $mic->fetch_hash("$n:$end", 'FLAGS'); + if (!$r) { + return if $!{EINTR} && $self->{quit}; + return "E: $uri UID FETCH $n:$end error: $!"; + } + while (my ($uid, $per_uid) = each %$r) { + my $kw = flags2kw($self, $uri, $uid, $per_uid->{FLAGS}) + // next; + $eml_cb->($uri, $uid, $kw, undef, @args); + } + } +} + +# returns true if PERMANENTFLAGS indicates FLAGS of already imported +# messages are meaningful +sub perm_fl_ok ($) { + my ($perm_fl) = @_; + return if !defined($perm_fl); + for my $f (split(/[ \t]+/, $perm_fl)) { + return 1 if $IMAPflags2kw{$f}; + } + undef; +} + sub _imap_fetch_all ($$$) { my ($self, $mic, $orig_uri) = @_; my $sec = uri_section($orig_uri); my $mbx = $orig_uri->mailbox; $mic->Clear(1); # trim results history - $mic->examine($mbx) or return "E: EXAMINE $mbx ($sec) failed: $!"; - my ($r_uidval, $r_uidnext); + + # we need to check for mailbox writability to see if we care about + # FLAGS from already-imported messages. + my $cmd = $self->{each_old} ? 'select' : 'examine'; + $mic->$cmd($mbx) or return "E: \U$cmd\E $mbx ($sec) failed: $!"; + + my ($r_uidval, $r_uidnext, $perm_fl); for ($mic->Results) { + /^\* OK \[PERMANENTFLAGS \(([^\)]*)\)\].*/ and $perm_fl = $1; /^\* OK \[UIDVALIDITY ([0-9]+)\].*/ and $r_uidval = $1; /^\* OK \[UIDNEXT ([0-9]+)\].*/ and $r_uidnext = $1; - last if $r_uidval && $r_uidnext; } $r_uidval //= $mic->uidvalidity($mbx) // return "E: $orig_uri cannot get UIDVALIDITY"; @@ -486,6 +531,13 @@ EOF E: $uri local UID exceeds remote ($l_uid > $r_uid) E: $uri strangely, UIDVALIDLITY matches ($l_uidval) EOF + $mic->Uid(1); # the default, we hope + my $err; + if (!defined($single_uid) && $self->{each_old} && + perm_fl_ok($perm_fl)) { + $err = each_old_flags($self, $mic, $uri, $l_uid); + return $err if $err; + } return if $l_uid >= $r_uid; # nothing to do $l_uid ||= 1; my ($mod, $shard) = @{$self->{shard_info} // []}; @@ -493,13 +545,11 @@ EOF my $m = $mod ? " [(UID % $mod) == $shard]" : ''; warn "# $uri fetching UID $l_uid:$r_uid$m\n"; } - $mic->Uid(1); # the default, we hope my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1; my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK'; my $key = $req; $key =~ s/\.PEEK//; my ($uids, $batch); - my $err; do { # I wish "UID FETCH $START:*" could work, but: # 1) servers do not need to return results in any order |