about summary refs log tree commit homepage
path: root/lib/PublicInbox/WatchMaildir.pm
diff options
context:
space:
mode:
authorEric Wong <e@yhbt.net>2020-06-27 10:03:43 +0000
committerEric Wong <e@yhbt.net>2020-06-28 22:27:19 +0000
commitbb7b738bc49558f869a14a2eaecbc5e7f401de51 (patch)
treea3a13161965221ed159807a5552279748fdb3f16 /lib/PublicInbox/WatchMaildir.pm
parent4c2d7dce2526d978e43021707ecb60a4bc277138 (diff)
downloadpublic-inbox-bb7b738bc49558f869a14a2eaecbc5e7f401de51.tar.gz
For mailboxes with many gaps in the UID sequence,
performing a UID SEARCH beforehand can reduce the
number of articles to fetch.

However, the downside to this is we may end up with
an arbitrarly large list of UIDs from the server.
Diffstat (limited to 'lib/PublicInbox/WatchMaildir.pm')
-rw-r--r--lib/PublicInbox/WatchMaildir.pm88
1 files changed, 54 insertions, 34 deletions
diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 24989130..b82b5102 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -335,6 +335,27 @@ sub mic_for ($$$) { # mic = Mail::IMAPClient
         $mic;
 }
 
+sub imap_import_msg ($$$$$$) {
+        my ($self, $itrk, $url, $r_uidval, $uid, $raw) = @_;
+        # our target audience expects LF-only, save storage
+        $$raw =~ s/\r\n/\n/sg;
+
+        my $inboxes = $self->{imap}->{$url};
+        if (ref($inboxes)) {
+                for my $ibx (@$inboxes) {
+                        my $eml = PublicInbox::Eml->new($$raw);
+                        my $x = import_eml($self, $ibx, $eml);
+                }
+        } elsif ($inboxes eq 'watchspam') {
+                my $eml = PublicInbox::Eml->new($raw);
+                my $arg = [ $self, $eml, "$url UID:$uid" ];
+                $self->{config}->each_inbox(\&remove_eml_i, $arg);
+        } else {
+                die "BUG: destination unknown $inboxes";
+        }
+        $itrk->update_last($url, $r_uidval, $uid);
+}
+
 sub imap_fetch_all ($$$) {
         my ($self, $mic, $uri) = @_;
         my $sec = imap_section($uri);
@@ -367,52 +388,51 @@ sub imap_fetch_all ($$$) {
         }
         return if $l_uid >= $r_uid; # nothing to do
 
+        warn "I: $url fetching UID $l_uid:$r_uid\n";
         $mic->Uid(1); # the default, we hope
+        my $uids;
         my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
         my $key = $req;
         $key =~ s/\.PEEK//;
-        my $inboxes = $self->{imap}->{$url};
-        warn "I: $url fetching $l_uid..$r_uid\n";
-        my $uid = -1;
+        my $uid;
         my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
         local $SIG{__WARN__} = sub {
+                $uid //= -1;
                 $warn_cb->("$url UID:$uid\n");
                 $warn_cb->(@_);
         };
         my $err;
-        $itrk->{dbh}->begin_work;
-        for my $u ($l_uid..$r_uid) {
-                $uid = $u;
-                local $0 = "UID:$uid $mbx $sec";
-                my $r = $mic->fetch_hash($uid, $req);
-                unless ($r) { # network error?
-                        $err = "E: $url UID FETCH $uid error: $!\n";
-                        last;
-                }
-
-                # messages get deleted, so holes appear
-                defined(my $raw = delete $r->{$uid}->{$key}) or next;
-
-                # our target audience expects LF-only, save storage
-                $raw =~ s/\r\n/\n/sg;
-
-                if (ref($inboxes)) {
-                        for my $ibx (@$inboxes) {
-                                my $eml = PublicInbox::Eml->new($raw);
-                                my $x = import_eml($self, $ibx, $eml);
+        do {
+                $uids = $mic->search("UID $l_uid:*") or
+                        return "E: $url UID SEARCH $l_uid:* error: $!";
+                return if scalar(@$uids) == 0;
+
+                # RFC 3501 doesn't seem to indicate order of UID SEARCH
+                # responses, so sort it ourselves
+                @$uids = sort { $a <=> $b } @$uids;
+
+                # Did we actually get new messages?
+                return if $uids->[0] < $l_uid;
+
+                $l_uid = $uids->[-1] + 1; # for next search
+
+                $itrk->{dbh}->begin_work;
+                while (defined(($uid = shift(@$uids)))) {
+                        local $0 = "UID:$uid $mbx $sec";
+                        my $r = $mic->fetch_hash($uid, $req);
+                        unless ($r) { # network error?
+                                $err = "E: $url UID FETCH $uid error: $!";
+                                last;
                         }
-                } elsif ($inboxes eq 'watchspam') {
-                        my $eml = PublicInbox::Eml->new($raw);
-                        my $arg = [ $self, $eml, "$uri UID:$uid" ];
-                        $self->{config}->each_inbox(\&remove_eml_i, $arg);
-                } else {
-                        die "BUG: destination unknown $inboxes";
+                        # messages get deleted, so holes appear
+                        defined(my $raw = delete $r->{$uid}->{$key}) or next;
+                        imap_import_msg($self, $itrk, $url, $r_uidval, $uid,
+                                        \$raw);
+                        last if $self->{quit};
                 }
-                $itrk->update_last($url, $r_uidval, $uid);
-                last if $self->{quit};
-        }
-        _done_for_now($self);
-        $itrk->{dbh}->commit;
+                _done_for_now($self);
+                $itrk->{dbh}->commit;
+        } until ($err || $self->{quit});
         $err;
 }