about summary refs log tree commit homepage
path: root/lib/PublicInbox/NetReader.pm
diff options
context:
space:
mode:
authorEric Wong <e@80x24.org>2021-10-08 22:00:19 +0000
committerEric Wong <e@80x24.org>2021-10-09 08:44:05 +0000
commit9f64411551e971251d17b3d346a7f250df5ef350 (patch)
tree93275a1d1341007036991e64865e1a067c752af0 /lib/PublicInbox/NetReader.pm
parentdd5a4f6497d4a5d94f61186a85516a56d7db8c29 (diff)
downloadpublic-inbox-9f64411551e971251d17b3d346a7f250df5ef350.tar.gz
We'll be supporting pipelining in a future commit, since
Tor is too slow and increasing batch size can use too much
memory.
Diffstat (limited to 'lib/PublicInbox/NetReader.pm')
-rw-r--r--lib/PublicInbox/NetReader.pm68
1 files changed, 39 insertions, 29 deletions
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 2b74af41..4da19ab9 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -539,6 +539,41 @@ sub perm_fl_ok ($) {
 # may be overridden in NetWriter or Watch
 sub folder_select { $_[0]->{each_old} ? 'select' : 'examine' }
 
+sub _imap_fetch_bodies ($$$$) {
+        my ($self, $mic, $uri, $uids) = @_;
+        my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
+        my $key = $req;
+        $key =~ s/\.PEEK//;
+        my $sec = uri_section($uri);
+        my $mbx = $uri->mailbox;
+        my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1;
+        my ($last_uid, $err);
+        my $use_fl = $self->{-use_fl};
+
+        while (scalar @$uids) {
+                my @batch = splice(@$uids, 0, $bs);
+                my $batch = join(',', @batch);
+                local $0 = "UID:$batch $mbx $sec";
+                my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
+                unless ($r) { # network error?
+                        last if $!{EINTR} && $self->{quit};
+                        $err = "E: $uri UID FETCH $batch error: $!";
+                        last;
+                }
+                for my $uid (@batch) {
+                        # messages get deleted, so holes appear
+                        my $per_uid = delete $r->{$uid} // next;
+                        my $raw = delete($per_uid->{$key}) // next;
+                        my $fl = $use_fl ? $per_uid->{FLAGS} : undef;
+                        _imap_do_msg($self, $uri, $uid, \$raw, $fl);
+                        $last_uid = $uid;
+                        last if $self->{quit};
+                }
+                last if $self->{quit};
+        }
+        ($last_uid, $err);
+}
+
 sub _imap_fetch_all ($$$) {
         my ($self, $mic, $orig_uri) = @_;
         my $sec = uri_section($orig_uri);
@@ -586,6 +621,7 @@ EOF
         $mic->Uid(1); # the default, we hope
         my $err;
         my $use_fl = perm_fl_ok($perm_fl);
+        local $self->{-use_fl} = $use_fl;
         if (!defined($single_uid) && $self->{each_old} && $use_fl) {
                 $err = each_old_flags($self, $mic, $uri, $l_uid);
                 return $err if $err;
@@ -597,15 +633,12 @@ EOF
                 my $m = $mod ? " [(UID % $mod) == $shard]" : '';
                 warn "# $uri fetching UID $l_uid:$r_uid$m\n";
         }
-        my $bs = $self->{cfg_opt}->{$sec}->{batch_size} // 1;
-        my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
-        my $key = $req;
-        $key =~ s/\.PEEK//;
-        my ($uids, $batch);
+        my $fetch_cb = \&_imap_fetch_bodies;
         do {
                 # I wish "UID FETCH $START:*" could work, but:
                 # 1) servers do not need to return results in any order
                 # 2) Mail::IMAPClient doesn't offer a streaming API
+                my $uids;
                 if (defined $single_uid) {
                         $uids = [ $single_uid ];
                 } elsif (!($uids = $mic->search("UID $l_uid:*"))) {
@@ -623,31 +656,8 @@ EOF
                 return if $uids->[0] < $l_uid;
 
                 $l_uid = $uids->[-1] + 1; # for next search
-                my $last_uid;
-                my $n = $self->{max_batch};
-
                 @$uids = grep { ($_ % $mod) == $shard } @$uids if $mod;
-                while (scalar @$uids) {
-                        my @batch = splice(@$uids, 0, $bs);
-                        $batch = join(',', @batch);
-                        local $0 = "UID:$batch $mbx $sec";
-                        my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
-                        unless ($r) { # network error?
-                                last if $!{EINTR} && $self->{quit};
-                                $err = "E: $uri UID FETCH $batch error: $!";
-                                last;
-                        }
-                        for my $uid (@batch) {
-                                # messages get deleted, so holes appear
-                                my $per_uid = delete $r->{$uid} // next;
-                                my $raw = delete($per_uid->{$key}) // next;
-                                my $fl = $use_fl ? $per_uid->{FLAGS} : undef;
-                                _imap_do_msg($self, $uri, $uid, \$raw, $fl);
-                                $last_uid = $uid;
-                                last if $self->{quit};
-                        }
-                        last if $self->{quit};
-                }
+                (my $last_uid, $err) = $fetch_cb->($self, $mic, $uri, $uids);
                 run_commit_cb($self);
                 $itrk->update_last($r_uidval, $last_uid) if $itrk;
         } until ($err || $self->{quit} || defined($single_uid));