user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 17/34] watch: use UID SEARCH to avoid empty UID FETCH
  2020-06-27 10:03  6% [PATCH 00/34] watch: add IMAP and NNTP support Eric Wong
@ 2020-06-27 10:03  7% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2020-06-27 10:03 UTC (permalink / raw)
  To: meta

For mailboxes with many gaps in the UID sequence,
performing a UID SEARCH beforehand can reduce the
number of articles to fetch.

However, the downside to this is we may end up with
an arbitrarly large list of UIDs from the server.
---
 lib/PublicInbox/WatchMaildir.pm | 88 ++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 34 deletions(-)

diff --git a/lib/PublicInbox/WatchMaildir.pm b/lib/PublicInbox/WatchMaildir.pm
index 24989130979..b82b51025e6 100644
--- a/lib/PublicInbox/WatchMaildir.pm
+++ b/lib/PublicInbox/WatchMaildir.pm
@@ -335,6 +335,27 @@ sub mic_for ($$$) { # mic = Mail::IMAPClient
 	$mic;
 }
 
+sub imap_import_msg ($$$$$$) {
+	my ($self, $itrk, $url, $r_uidval, $uid, $raw) = @_;
+	# our target audience expects LF-only, save storage
+	$$raw =~ s/\r\n/\n/sg;
+
+	my $inboxes = $self->{imap}->{$url};
+	if (ref($inboxes)) {
+		for my $ibx (@$inboxes) {
+			my $eml = PublicInbox::Eml->new($$raw);
+			my $x = import_eml($self, $ibx, $eml);
+		}
+	} elsif ($inboxes eq 'watchspam') {
+		my $eml = PublicInbox::Eml->new($raw);
+		my $arg = [ $self, $eml, "$url UID:$uid" ];
+		$self->{config}->each_inbox(\&remove_eml_i, $arg);
+	} else {
+		die "BUG: destination unknown $inboxes";
+	}
+	$itrk->update_last($url, $r_uidval, $uid);
+}
+
 sub imap_fetch_all ($$$) {
 	my ($self, $mic, $uri) = @_;
 	my $sec = imap_section($uri);
@@ -367,52 +388,51 @@ sub imap_fetch_all ($$$) {
 	}
 	return if $l_uid >= $r_uid; # nothing to do
 
+	warn "I: $url fetching UID $l_uid:$r_uid\n";
 	$mic->Uid(1); # the default, we hope
+	my $uids;
 	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
 	my $key = $req;
 	$key =~ s/\.PEEK//;
-	my $inboxes = $self->{imap}->{$url};
-	warn "I: $url fetching $l_uid..$r_uid\n";
-	my $uid = -1;
+	my $uid;
 	my $warn_cb = $SIG{__WARN__} || sub { print STDERR @_ };
 	local $SIG{__WARN__} = sub {
+		$uid //= -1;
 		$warn_cb->("$url UID:$uid\n");
 		$warn_cb->(@_);
 	};
 	my $err;
-	$itrk->{dbh}->begin_work;
-	for my $u ($l_uid..$r_uid) {
-		$uid = $u;
-		local $0 = "UID:$uid $mbx $sec";
-		my $r = $mic->fetch_hash($uid, $req);
-		unless ($r) { # network error?
-			$err = "E: $url UID FETCH $uid error: $!\n";
-			last;
-		}
-
-		# messages get deleted, so holes appear
-		defined(my $raw = delete $r->{$uid}->{$key}) or next;
-
-		# our target audience expects LF-only, save storage
-		$raw =~ s/\r\n/\n/sg;
-
-		if (ref($inboxes)) {
-			for my $ibx (@$inboxes) {
-				my $eml = PublicInbox::Eml->new($raw);
-				my $x = import_eml($self, $ibx, $eml);
+	do {
+		$uids = $mic->search("UID $l_uid:*") or
+			return "E: $url UID SEARCH $l_uid:* error: $!";
+		return if scalar(@$uids) == 0;
+
+		# RFC 3501 doesn't seem to indicate order of UID SEARCH
+		# responses, so sort it ourselves
+		@$uids = sort { $a <=> $b } @$uids;
+
+		# Did we actually get new messages?
+		return if $uids->[0] < $l_uid;
+
+		$l_uid = $uids->[-1] + 1; # for next search
+
+		$itrk->{dbh}->begin_work;
+		while (defined(($uid = shift(@$uids)))) {
+			local $0 = "UID:$uid $mbx $sec";
+			my $r = $mic->fetch_hash($uid, $req);
+			unless ($r) { # network error?
+				$err = "E: $url UID FETCH $uid error: $!";
+				last;
 			}
-		} elsif ($inboxes eq 'watchspam') {
-			my $eml = PublicInbox::Eml->new($raw);
-			my $arg = [ $self, $eml, "$uri UID:$uid" ];
-			$self->{config}->each_inbox(\&remove_eml_i, $arg);
-		} else {
-			die "BUG: destination unknown $inboxes";
+			# messages get deleted, so holes appear
+			defined(my $raw = delete $r->{$uid}->{$key}) or next;
+			imap_import_msg($self, $itrk, $url, $r_uidval, $uid,
+					\$raw);
+			last if $self->{quit};
 		}
-		$itrk->update_last($url, $r_uidval, $uid);
-		last if $self->{quit};
-	}
-	_done_for_now($self);
-	$itrk->{dbh}->commit;
+		_done_for_now($self);
+		$itrk->{dbh}->commit;
+	} until ($err || $self->{quit});
 	$err;
 }
 

^ permalink raw reply related	[relevance 7%]

* [PATCH 00/34] watch: add IMAP and NNTP support
@ 2020-06-27 10:03  6% Eric Wong
  2020-06-27 10:03  7% ` [PATCH 17/34] watch: use UID SEARCH to avoid empty UID FETCH Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2020-06-27 10:03 UTC (permalink / raw)
  To: meta

Some fairly major changes to -watch.  Filesys::Notify::Simple is
no longer used, and -watch now uses inotify, signalfd or kevent
like the read-only daemons.

Credentials are handled via Net::Netrc (Perl standard library)
or "git-credential", so we do no password storage on our own.

NNTP (and non-IDLE IMAP) may allow more parallelization in the
future.

One significant project-wide change is getting rid of "use
fields".  It gets in my way more than it helps, and it's
probably alien to a fair amount of Perl hackers.  AFAIK, it's
never really been popular outside of Danga::Socket-based
projects.


Eric W. Biederman (1):
  IMAPTracker: Add a helper to track our place in reading imap mailboxes

Eric Wong (33):
  inboxwritable: ensure ssoma.lock exists on init
  inbox: warn on ->on_inbox_unlock exception
  imaptracker: use ~/.local/share/public-inbox/imap.sqlite3
  watchmaildir: hoist out compile_watchheaders
  watchmaildir: fix check for spam vs ham inbox conflicts
  URI IMAP support
  watch: preliminary IMAP support
  kqnotify|fake_inotify: detect Maildir write ops
  watch: remove Filesys::Notify::Simple dependency
  watch: use signalfd for Maildir watching
  ds: remove fields.pm usage
  watch: wire up IMAP IDLE reapers to DS
  watch: support IMAP polling
  config: support ->urlmatch method for -watch
  watch: stop importers before forking
  watch: use UID SEARCH to avoid empty UID FETCH
  ds: add_timer: allow passing arg to callback.
  imaptracker: add {url} field to reduce args
  imaptracker: drop {dbname} field
  watch: avoid long transaction when writing to IMAPTracker
  watch: support imap.fetchBatchSize parameter
  watch: imap: be quieter about disconnecting on quit
  watch: support multiple watch: directives per-inbox
  watch: remove {mdir} array
  watch: just use ->urlmatch
  testcommon: $ENV{TAIL} supports non-@ARGV redirects
  watch: add NNTP support
  watch: show user-specified URL consistently.
  watch: enable autoflush for STDOUT and STDERR
  watch: use our own "git credential" wrapper
  watch: support ~/.netrc via Net::Netrc
  imaptracker: use flock(2) around writes
  watch: simplify internal structures

 Documentation/public-inbox-watch.pod |   3 +-
 INSTALL                              |   8 -
 MANIFEST                             |  11 +
 Makefile.PL                          |   4 -
 ci/deps.perl                         |   1 -
 lib/PublicInbox/Config.pm            |  21 +-
 lib/PublicInbox/DS.pm                |  29 +-
 lib/PublicInbox/Daemon.pm            |  19 +-
 lib/PublicInbox/DirIdle.pm           |  49 ++
 lib/PublicInbox/FakeInotify.pm       |  56 +-
 lib/PublicInbox/GitAsyncCat.pm       |   4 +-
 lib/PublicInbox/GitCredential.pm     |  55 ++
 lib/PublicInbox/HTTP.pm              |  23 +-
 lib/PublicInbox/HTTPD/Async.pm       |  22 +-
 lib/PublicInbox/IMAP.pm              |  19 +-
 lib/PublicInbox/IMAPTracker.pm       |  82 +++
 lib/PublicInbox/In2Tie.pm            |  13 +
 lib/PublicInbox/Inbox.pm             |   1 +
 lib/PublicInbox/InboxIdle.pm         |  20 +-
 lib/PublicInbox/InboxWritable.pm     |   3 +
 lib/PublicInbox/KQNotify.pm          |  38 +-
 lib/PublicInbox/Listener.pm          |   8 +-
 lib/PublicInbox/NNTP.pm              |  12 +-
 lib/PublicInbox/NNTPdeflate.pm       |   5 +-
 lib/PublicInbox/ParentPipe.pm        |   8 +-
 lib/PublicInbox/Sigfd.pm             |  21 +-
 lib/PublicInbox/TestCommon.pm        |  40 +-
 lib/PublicInbox/URIimap.pm           | 113 +++
 lib/PublicInbox/WatchMaildir.pm      | 998 +++++++++++++++++++++++----
 script/public-inbox-watch            |  33 +-
 t/config.t                           |  18 +
 t/dir_idle.t                         |   6 +
 t/fake_inotify.t                     |  45 ++
 t/imap_tracker.t                     |  54 ++
 t/imapd.t                            |  74 ++
 t/kqnotify.t                         |  41 ++
 t/nntpd.t                            |  52 ++
 t/uri_imap.t                         |  65 ++
 t/watch_filter_rubylang.t            |   2 +-
 t/watch_imap.t                       |  21 +
 t/watch_maildir.t                    |  96 ++-
 t/watch_maildir_v2.t                 |   4 +-
 t/watch_multiple_headers.t           |   2 +-
 t/watch_nntp.t                       |  17 +
 xt/mem-imapd-tls.t                   |  18 +-
 45 files changed, 1944 insertions(+), 290 deletions(-)
 create mode 100644 lib/PublicInbox/DirIdle.pm
 create mode 100644 lib/PublicInbox/GitCredential.pm
 create mode 100644 lib/PublicInbox/IMAPTracker.pm
 create mode 100644 lib/PublicInbox/URIimap.pm
 create mode 100644 t/dir_idle.t
 create mode 100644 t/fake_inotify.t
 create mode 100644 t/imap_tracker.t
 create mode 100644 t/kqnotify.t
 create mode 100644 t/uri_imap.t
 create mode 100644 t/watch_imap.t
 create mode 100644 t/watch_nntp.t


^ permalink raw reply	[relevance 6%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2020-06-27 10:03  6% [PATCH 00/34] watch: add IMAP and NNTP support Eric Wong
2020-06-27 10:03  7% ` [PATCH 17/34] watch: use UID SEARCH to avoid empty UID FETCH Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).