From mboxrd@z Thu Jan  1 00:00:00 1970
Return-Path: <e@yhbt.net>
X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net
X-Spam-Level: 
X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00
	shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2
Received: from localhost (dcvr.yhbt.net [127.0.0.1])
	by dcvr.yhbt.net (Postfix) with ESMTP id 5A7D41F922
	for <meta@public-inbox.org>; Sat, 27 Jun 2020 10:04:04 +0000 (UTC)
From: Eric Wong <e@yhbt.net>
To: meta@public-inbox.org
Subject: [PATCH 22/34] watch: support imap.fetchBatchSize parameter
Date: Sat, 27 Jun 2020 10:03:48 +0000
Message-Id: <20200627100400.9871-23-e@yhbt.net>
In-Reply-To: <20200627100400.9871-1-e@yhbt.net>
References: <20200627100400.9871-1-e@yhbt.net>
MIME-Version: 1.0
Content-Transfer-Encoding: 8bit
List-Id: <meta.public-inbox.org>

IMAP allows retrieving multiple messages with a single command,
and Mail::IMAPClient supports that.  Unfortunately, it means we
slurp multiple messages into memory at once.  This option allows
users to trade off memory usage to reduce network round-trips.

Ideally, we'd support pipelining; but AFAIK no widely installed
Perl IMAP library supports it.
---
 lib/PublicInbox/WatchMaildir.pm | 47 ++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 13 deletions(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index d492e5d65b7..05aa6594147 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -274,6 +274,15 @@ sub imap_common_init ($) {
 		$self->{imap_opt}->{$sec}->{poll_intvl} = $to if $to;
 		$to = cfg_intvl($cfg, 'imap', 'IdleInterval', $sec, $url);
 		$self->{imap_opt}->{$sec}->{idle_intvl} = $to if $to;
+
+		my $key = lc("imap.$sec.fetchBatchSize");
+		my $bs = $cfg->{lc($key)} //
+			$cfg->urlmatch('imap.fetchBatchSize', $url) // next;
+		if ($bs =~ /\A([0-9]+)\z/) {
+			$self->{imap_opt}->{$sec}->{batch_size} = $bs;
+		} else {
+			warn "W: $key=$bs is not an integer\n";
+		}
 	}
 	$mic_args;
 }
@@ -389,25 +398,31 @@ sub imap_fetch_all ($$$) {
 
 	warn "I: $url fetching UID $l_uid:$r_uid\n";
 	$mic->Uid(1); # the default, we hope
-	my $uids;
+	my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
 	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
+
+	# TODO: FLAGS may be useful for personal use
 	my $key = $req;
 	$key =~ s/\.PEEK//;
-	my $uid;
+	my ($uids, $batch);
 	my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
 	local $SIG{__WARN__} = sub {
-		$uid //= -1;
-		$warn_cb->("$url UID:$uid\n");
+		$batch //= '?';
+		$warn_cb->("$url UID:$batch\n");
 		$warn_cb->(@_);
 	};
 	my $err;
 	do {
+		# I wish "UID FETCH $START:*" could work, but:
+		# 1) servers do not need to return results in any order
+		# 2) Mail::IMAPClient doesn't offer a streaming API
 		$uids = $mic->search("UID $l_uid:*") or
 			return "E: $url UID SEARCH $l_uid:* error: $!";
 		return if scalar(@$uids) == 0;
 
 		# RFC 3501 doesn't seem to indicate order of UID SEARCH
-		# responses, so sort it ourselves
+		# responses, so sort it ourselves.  Order matters so
+		# IMAPTracker can store the newest UID.
 		@$uids = sort { $a <=> $b } @$uids;
 
 		# Did we actually get new messages?
@@ -416,17 +431,23 @@ sub imap_fetch_all ($$$) {
 		$l_uid = $uids->[-1] + 1; # for next search
 		my $last_uid;
 
-		while (defined(($uid = shift(@$uids)))) {
-			local $0 = "UID:$uid $mbx $sec";
-			my $r = $mic->fetch_hash($uid, $req);
+		while (scalar @$uids) {
+			my @batch = splice(@$uids, 0, $bs);
+			$batch = join(',', @batch);
+			local $0 = "UID:$batch $mbx $sec";
+			my $r = $mic->fetch_hash($batch, $req);
 			unless ($r) { # network error?
-				$err = "E: $url UID FETCH $uid error: $!";
+				$err = "E: $url UID FETCH $batch error: $!";
 				last;
 			}
-			# messages get deleted, so holes appear
-			defined(my $raw = delete $r->{$uid}->{$key}) or next;
-			imap_import_msg($self, $url, $uid, \$raw);
-			$last_uid = $uid;
+			for my $uid (@batch) {
+				# messages get deleted, so holes appear
+				my $per_uid = delete $r->{$uid} // next;
+				my $raw = delete($per_uid->{$key}) // next;
+				imap_import_msg($self, $url, $uid, \$raw);
+				$last_uid = $uid;
+				last if $self->{quit};
+			}
 			last if $self->{quit};
 		}
 		_done_for_now($self);