user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
Search results ordered by [date|relevance]  view[summary|nested|Atom feed]
thread overview below | download mbox.gz: |
* [PATCH 0/7] "lei q -o imaps://..." support
@ 2021-02-21  7:41  7% Eric Wong
  2021-02-21  7:41  5% ` [PATCH 6/7] net_reader: use and accept URIimap objects in more places Eric Wong
  0 siblings, 1 reply; 2+ results
From: Eric Wong @ 2021-02-21  7:41 UTC (permalink / raw)
  To: meta

-a/--augment dedupe is now parallel for both Maildirs and IMAP
stores (probably not worth the serialization cost for mbox*).

LeiAuth remains inefficient, unfortunately; but wq_broadcast
has been added to address it in the future.

The parallelization work for IMAP for "lei q" can also be done
for "lei convert" and "lei import", but it'll probably be opt-in
in case people care about preserving UID order.

Eric Wong (7):
  inbox_writable: require PublicInbox::MdirReader
  lei q: support IMAP/IMAPS --output destinations
  ipc: add wq_broadcast
  lei q: move augment into lei2mail workers
  ipc: support setting a locked number of WQ workers
  net_reader: use and accept URIimap objects in more places
  lei2mail: parallel augment for lock-free stores

 lib/PublicInbox/IPC.pm           |  35 +++++++--
 lib/PublicInbox/InboxWritable.pm |   1 +
 lib/PublicInbox/LeiAuth.pm       |   2 +-
 lib/PublicInbox/LeiOverview.pm   |   7 +-
 lib/PublicInbox/LeiQuery.pm      |  24 +++++--
 lib/PublicInbox/LeiToMail.pm     |  93 ++++++++++++++++++++++--
 lib/PublicInbox/LeiXSearch.pm    |  48 ++++++-------
 lib/PublicInbox/NetReader.pm     |  75 +++++++++++---------
 lib/PublicInbox/NetWriter.pm     |  12 ++++
 lib/PublicInbox/WQWorker.pm      |   8 +--
 lib/PublicInbox/Watch.pm         |  11 +--
 t/ipc.t                          |  39 +++++-----
 t/lei-externals.t                |   3 +-
 xt/net_writer-imap.t             | 118 ++++++++++++++++++++++++++++---
 14 files changed, 362 insertions(+), 114 deletions(-)


^ permalink raw reply	[relevance 7%]

* [PATCH 6/7] net_reader: use and accept URIimap objects in more places
  2021-02-21  7:41  7% [PATCH 0/7] "lei q -o imaps://..." support Eric Wong
@ 2021-02-21  7:41  5% ` Eric Wong
  0 siblings, 0 replies; 2+ results
From: Eric Wong @ 2021-02-21  7:41 UTC (permalink / raw)
  To: meta

This flexibility should save us some code down-the-line.
---
 lib/PublicInbox/LeiToMail.pm |  7 ++--
 lib/PublicInbox/NetReader.pm | 63 +++++++++++++++++-------------------
 lib/PublicInbox/Watch.pm     | 11 ++++---
 xt/net_writer-imap.t         | 10 +++---
 4 files changed, 44 insertions(+), 47 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index e5398912..b5d560c7 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -337,7 +337,7 @@ sub _imap_write_cb ($$) {
 	my $dedupe = $lei->{dedupe};
 	$dedupe->prepare_dedupe if $dedupe;
 	my $imap_append = $lei->{nwr}->can('imap_append');
-	my $mic = $lei->{nwr}->mic_get($lei->{ovv}->{dst});
+	my $mic = $lei->{nwr}->mic_get($self->{uri});
 	my $folder = $self->{uri}->mailbox;
 	sub { # for git_to_mail
 		my ($bref, $smsg, $eml) = @_;
@@ -436,16 +436,15 @@ sub _augment_imap { # PublicInbox::NetReader::imap_each cb
 
 sub _do_augment_imap {
 	my ($self, $lei) = @_;
-	my $dst = $lei->{ovv}->{dst};
 	my $nwr = $lei->{nwr};
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
-			$nwr->imap_each($dst, \&_augment_imap, $lei);
+			$nwr->imap_each($self->{uri}, \&_augment_imap, $lei);
 			$dedupe->pause_dedupe;
 		}
 	} else { # clobber existing IMAP folder
-		$nwr->imap_delete_all($dst);
+		$nwr->imap_delete_all($self->{uri});
 	}
 }
 
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 541094a0..4c412491 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -12,8 +12,8 @@ our %IMAPflags2kw = map {; "\\\u$_" => $_ } qw(seen answered flagged draft);
 
 # TODO: trim this down, this is huge
 our @EXPORT = qw(uri_new uri_scheme uri_section
-		mic_for nn_new nn_for
-		imap_url nntp_url
+		nn_new nn_for
+		imap_uri nntp_url
 		cfg_bool cfg_intvl imap_common_init
 		);
 
@@ -213,11 +213,11 @@ W: see https://rt.cpan.org/Ticket/Display.html?id=129967 for updates
 	$nn;
 }
 
-sub imap_url {
+sub imap_uri {
 	my ($url) = @_;
 	require PublicInbox::URIimap;
 	my $uri = PublicInbox::URIimap->new($url);
-	$uri ? $uri->canonical->as_string : undef;
+	$uri ? $uri->canonical : undef;
 }
 
 my %IS_NNTP = (news => 1, snews => 1, nntp => 1);
@@ -262,21 +262,20 @@ sub imap_common_init ($;$) {
 	require PublicInbox::URIimap;
 	my $cfg = $self->{pi_cfg} // $lei->_lei_cfg;
 	my $mic_args = {}; # scheme://authority => Mail:IMAPClient arg
-	for my $url (@{$self->{imap_order}}) {
-		my $uri = PublicInbox::URIimap->new($url);
+	for my $uri (@{$self->{imap_order}}) {
 		my $sec = uri_section($uri);
 		for my $k (qw(Starttls Debug Compress)) {
-			my $bool = cfg_bool($cfg, "imap.$k", $url) // next;
+			my $bool = cfg_bool($cfg, "imap.$k", $$uri) // next;
 			$mic_args->{$sec}->{$k} = $bool;
 		}
-		my $to = cfg_intvl($cfg, 'imap.timeout', $url);
+		my $to = cfg_intvl($cfg, 'imap.timeout', $$uri);
 		$mic_args->{$sec}->{Timeout} = $to if $to;
 		for my $k (qw(pollInterval idleInterval)) {
-			$to = cfg_intvl($cfg, "imap.$k", $url) // next;
+			$to = cfg_intvl($cfg, "imap.$k", $$uri) // next;
 			$self->{imap_opt}->{$sec}->{$k} = $to;
 		}
 		my $k = 'imap.fetchBatchSize';
-		my $bs = $cfg->urlmatch($k, $url) // next;
+		my $bs = $cfg->urlmatch($k, $$uri) // next;
 		if ($bs =~ /\A([0-9]+)\z/) {
 			$self->{imap_opt}->{$sec}->{batch_size} = $bs;
 		} else {
@@ -286,23 +285,22 @@ sub imap_common_init ($;$) {
 	# make sure we can connect and cache the credentials in memory
 	$self->{mic_arg} = {}; # schema://authority => IMAPClient->new args
 	my $mics = {}; # schema://authority => IMAPClient obj
-	for my $url (@{$self->{imap_order}}) {
-		my $uri = PublicInbox::URIimap->new($url);
+	for my $uri (@{$self->{imap_order}}) {
 		my $sec = uri_section($uri);
 		$mics->{$sec} //= mic_for($self, "$sec/", $mic_args, $lei);
 		next unless $self->isa('PublicInbox::NetWriter');
 		my $dst = $uri->mailbox // next;
 		my $mic = $mics->{$sec};
 		next if $mic->exists($dst); # already exists
-		$mic->create($dst) or die "CREATE $dst failed <$url>: $@";
+		$mic->create($dst) or die "CREATE $dst failed <$uri>: $@";
 	}
 	$mics;
 }
 
 sub add_url {
 	my ($self, $arg) = @_;
-	if (my $url = imap_url($arg)) {
-		push @{$self->{imap_order}}, $url;
+	if (my $uri = imap_uri($arg)) {
+		push @{$self->{imap_order}}, $uri;
 	} else {
 		push @{$self->{unsupported_url}}, $arg;
 	}
@@ -321,7 +319,7 @@ sub errors {
 }
 
 sub _imap_do_msg ($$$$$) {
-	my ($self, $url, $uid, $raw, $flags) = @_;
+	my ($self, $uri, $uid, $raw, $flags) = @_;
 	# our target audience expects LF-only, save storage
 	$$raw =~ s/\r\n/\n/sg;
 	my $kw = [];
@@ -330,12 +328,11 @@ sub _imap_do_msg ($$$$$) {
 		push @$kw, $k;
 	}
 	my ($eml_cb, @args) = @{$self->{eml_each}};
-	$eml_cb->($url, $uid, $kw, PublicInbox::Eml->new($raw), @args);
+	$eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
 }
 
 sub _imap_fetch_all ($$$) {
-	my ($self, $mic, $url) = @_;
-	my $uri = PublicInbox::URIimap->new($url);
+	my ($self, $mic, $uri) = @_;
 	my $sec = uri_section($uri);
 	my $mbx = $uri->mailbox;
 	$mic->Clear(1); # trim results history
@@ -347,27 +344,27 @@ sub _imap_fetch_all ($$$) {
 		last if $r_uidval && $r_uidnext;
 	}
 	$r_uidval //= $mic->uidvalidity($mbx) //
-		return "E: $url cannot get UIDVALIDITY";
+		return "E: $uri cannot get UIDVALIDITY";
 	$r_uidnext //= $mic->uidnext($mbx) //
-		return "E: $url cannot get UIDNEXT";
+		return "E: $uri cannot get UIDNEXT";
 	my $itrk = $self->{incremental} ?
-			PublicInbox::IMAPTracker->new($url) : 0;
+			PublicInbox::IMAPTracker->new($$uri) : 0;
 	my ($l_uidval, $l_uid) = $itrk ? $itrk->get_last : ();
 	$l_uidval //= $r_uidval; # first time
 	$l_uid //= 0;
 	if ($l_uidval != $r_uidval) {
-		return "E: $url UIDVALIDITY mismatch\n".
+		return "E: $uri UIDVALIDITY mismatch\n".
 			"E: local=$l_uidval != remote=$r_uidval";
 	}
 	my $r_uid = $r_uidnext - 1;
 	if ($l_uid > $r_uid) {
-		return "E: $url local UID exceeds remote ($l_uid > $r_uid)\n".
-			"E: $url strangely, UIDVALIDLITY matches ($l_uidval)\n";
+		return "E: $uri local UID exceeds remote ($l_uid > $r_uid)\n".
+			"E: $uri strangely, UIDVALIDLITY matches ($l_uidval)\n";
 	}
 	return if $l_uid >= $r_uid; # nothing to do
 	$l_uid ||= 1;
 
-	warn "# $url fetching UID $l_uid:$r_uid\n" unless $self->{quiet};
+	warn "# $uri fetching UID $l_uid:$r_uid\n" unless $self->{quiet};
 	$mic->Uid(1); # the default, we hope
 	my $bs = $self->{imap_opt}->{$sec}->{batch_size} // 1;
 	my $req = $mic->imap4rev1 ? 'BODY.PEEK[]' : 'RFC822.PEEK';
@@ -380,7 +377,7 @@ sub _imap_fetch_all ($$$) {
 		# 1) servers do not need to return results in any order
 		# 2) Mail::IMAPClient doesn't offer a streaming API
 		$uids = $mic->search("UID $l_uid:*") or
-			return "E: $url UID SEARCH $l_uid:* error: $!";
+			return "E: $uri UID SEARCH $l_uid:* error: $!";
 		return if scalar(@$uids) == 0;
 
 		# RFC 3501 doesn't seem to indicate order of UID SEARCH
@@ -400,14 +397,14 @@ sub _imap_fetch_all ($$$) {
 			local $0 = "UID:$batch $mbx $sec";
 			my $r = $mic->fetch_hash($batch, $req, 'FLAGS');
 			unless ($r) { # network error?
-				$err = "E: $url UID FETCH $batch error: $!";
+				$err = "E: $uri UID FETCH $batch error: $!";
 				last;
 			}
 			for my $uid (@batch) {
 				# messages get deleted, so holes appear
 				my $per_uid = delete $r->{$uid} // next;
 				my $raw = delete($per_uid->{$key}) // next;
-				_imap_do_msg($self, $url, $uid, \$raw,
+				_imap_do_msg($self, $uri, $uid, \$raw,
 						$per_uid->{FLAGS});
 				$last_uid = $uid;
 				last if $self->{quit};
@@ -424,7 +421,7 @@ sub mic_get {
 	my ($self, $sec) = @_;
 	my $mic_arg = $self->{mic_arg}->{$sec};
 	unless ($mic_arg) {
-		my $uri = PublicInbox::URIimap->new($sec);
+		my $uri = ref $sec ? $sec : PublicInbox::URIimap->new($sec);
 		$sec = uri_section($uri);
 		$mic_arg = $self->{mic_arg}->{$sec} or
 			die "BUG: no Mail::IMAPClient->new arg for $sec";
@@ -440,14 +437,14 @@ sub mic_get {
 
 sub imap_each {
 	my ($self, $url, $eml_cb, @args) = @_;
-	my $uri = PublicInbox::URIimap->new($url);
+	my $uri = ref($url) ? $url : PublicInbox::URIimap->new($url);
 	my $sec = uri_section($uri);
 	local $0 = $uri->mailbox." $sec";
-	my $mic = mic_get($self, $sec);
+	my $mic = mic_get($self, $uri);
 	my $err;
 	if ($mic) {
 		local $self->{eml_each} = [ $eml_cb, @args ];
-		$err = _imap_fetch_all($self, $mic, $url);
+		$err = _imap_fetch_all($self, $mic, $uri);
 	} else {
 		$err = "E: not connected: $!";
 	}
diff --git a/lib/PublicInbox/Watch.pm b/lib/PublicInbox/Watch.pm
index c64689a1..8d13ea35 100644
--- a/lib/PublicInbox/Watch.pm
+++ b/lib/PublicInbox/Watch.pm
@@ -60,9 +60,9 @@ sub new {
 			if (is_maildir($dir)) {
 				# skip "new", no MUA has seen it, yet.
 				$mdmap{"$dir/cur"} = 'watchspam';
-			} elsif ($url = imap_url($dir)) {
-				$imap{$url} = 'watchspam';
-				push @imap, $url;
+			} elsif (my $uri = imap_uri($dir)) {
+				$imap{$$uri} = 'watchspam';
+				push @imap, $uri;
 			} elsif ($url = nntp_url($dir)) {
 				$nntp{$url} = 'watchspam';
 				push @nntp, $url;
@@ -92,11 +92,12 @@ sub new {
 				return if is_watchspam($cur, $cur_dst, $ibx);
 				push @{$mdmap{$new} //= []}, $ibx;
 				push @$cur_dst, $ibx;
-			} elsif ($url = imap_url($watch)) {
+			} elsif (my $uri = imap_uri($watch)) {
+				my $url = $$uri;
 				return if is_watchspam($url, $imap{$url}, $ibx);
 				compile_watchheaders($ibx);
 				my $n = push @{$imap{$url} ||= []}, $ibx;
-				push @imap, $url if $n == 1;
+				push @imap, $uri if $n == 1;
 			} elsif ($url = nntp_url($watch)) {
 				return if is_watchspam($url, $nntp{$url}, $ibx);
 				compile_watchheaders($ibx);
diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t
index 4832245a..64f822cf 100644
--- a/xt/net_writer-imap.t
+++ b/xt/net_writer-imap.t
@@ -79,7 +79,7 @@ my $mics = do {
 };
 my $mic = (values %$mics)[0];
 my $cleanup = PublicInbox::OnDestroy->new($$, sub {
-	my $mic = $nwr->mic_get($imap_url);
+	my $mic = $nwr->mic_get($uri);
 	$mic->delete($folder) or fail "delete $folder <$folder_uri>: $@";
 	if ($tmpdir && -f "$tmpdir/.gitconfig") {
 		local $ENV{HOME} = $tmpdir;
@@ -94,7 +94,7 @@ my $imap_slurp_all = sub {
 	my ($u, $uid, $kw, $eml, $res) = @_;
 	push @$res, [ $kw, $eml ];
 };
-$nwr->imap_each($$folder_uri, $imap_slurp_all, my $res = []);
+$nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []);
 is(scalar(@$res), 1, 'got appended message');
 my $plack_qp_eml = eml_load('t/plack-qp.eml');
 is_deeply($res, [ [ [ 'seen' ], $plack_qp_eml ] ],
@@ -119,13 +119,13 @@ test_lei(sub {
 	$set_cred_helper->("$ENV{HOME}/.gitconfig", $cred_set) if $cred_set;
 
 	lei_ok qw(q f:qp@example.com -o), $$folder_uri;
-	$nwr->imap_each($$folder_uri, $imap_slurp_all, my $res = []);
+	$nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []);
 	is(scalar(@$res), 1, 'got one deduped result') or diag explain($res);
 	is_deeply($res->[0]->[1], $plack_qp_eml,
 			'lei q wrote expected result');
 
 	lei_ok qw(q f:matz -a -o), $$folder_uri;
-	$nwr->imap_each($$folder_uri, $imap_slurp_all, my $aug = []);
+	$nwr->imap_each($folder_uri, $imap_slurp_all, my $aug = []);
 	is(scalar(@$aug), 2, '2 results after augment') or diag explain($aug);
 	my $exp = $res->[0]->[1]->as_string;
 	is(scalar(grep { $_->[1]->as_string eq $exp } @$aug), 1,
@@ -135,7 +135,7 @@ test_lei(sub {
 			'new result shown after augment');
 
 	lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_uri->as_string;
-	$nwr->imap_each($$folder_uri, $imap_slurp_all, my $empty = []);
+	$nwr->imap_each($folder_uri, $imap_slurp_all, my $empty = []);
 	is(scalar(@$empty), 0, 'no results w/o augment');
 
 });

^ permalink raw reply related	[relevance 5%]

Results 1-2 of 2 | reverse | options above
-- pct% links below jump to the message on this page, permalinks otherwise --
2021-02-21  7:41  7% [PATCH 0/7] "lei q -o imaps://..." support Eric Wong
2021-02-21  7:41  5% ` [PATCH 6/7] net_reader: use and accept URIimap objects in more places Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).