user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 1/6] lei q: support --import-augment for IMAP
Date: Thu,  4 Mar 2021 17:03:11 +0800	[thread overview]
Message-ID: <20210304090316.9568-2-e@80x24.org> (raw)
In-Reply-To: <20210304090316.9568-1-e@80x24.org>

IMAP is similar to Maildir and we can now preserve keyword
updates done on IMAP folders.
---
 lib/PublicInbox/LeiToMail.pm | 48 ++++++++++++++++++++++--------------
 lib/PublicInbox/NetReader.pm |  9 +++++--
 lib/PublicInbox/NetWriter.pm | 41 ++++++++++++++++++++++++++----
 xt/net_writer-imap.t         | 36 ++++++++++++++++++++++++---
 4 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 3420b06e..b3228a59 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -267,6 +267,17 @@ sub _mbox_write_cb ($$) {
 	}
 }
 
+sub update_kw_maybe ($$$$) {
+	my ($lei, $lse, $eml, $kw) = @_;
+	return unless $lse;
+	my $x = $lse->kw_changed($eml, $kw);
+	if ($x) {
+		$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+	} elsif (!defined($x)) {
+		# TODO: xkw
+	}
+}
+
 sub _augment_or_unlink { # maildir_each_eml cb
 	my ($f, $kw, $eml, $lei, $lse, $mod, $shard, $unlink) = @_;
 	if ($mod) {
@@ -276,14 +287,7 @@ sub _augment_or_unlink { # maildir_each_eml cb
 				$1 : sha256_hex($f);
 		my $recno = hex(substr($hex, 0, 8));
 		return if ($recno % $mod) != $shard;
-		if ($lse) {
-			my $x = $lse->kw_changed($eml, $kw);
-			if ($x) {
-				$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
-			} elsif (!defined($x)) {
-				# TODO: xkw
-			}
-		}
+		update_kw_maybe($lei, $lse, $eml, $kw);
 	}
 	$unlink ? unlink($f) : _augment($eml, $lei);
 }
@@ -446,26 +450,32 @@ sub _do_augment_maildir {
 	}
 }
 
-sub _post_augment_maildir {
-	my ($self, $lei) = @_;
-	$lei->{opt}->{'import-augment'} or return;
-	my $wait = $lei->{sto}->ipc_do('checkpoint', 1);
-}
-
-sub _augment_imap { # PublicInbox::NetReader::imap_each cb
-	my ($url, $uid, $kw, $eml, $lei) = @_;
-	_augment($eml, $lei);
+sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb
+	my ($url, $uid, $kw, $eml, $lei, $lse, $delete_mic) = @_;
+	update_kw_maybe($lei, $lse, $eml, $kw);
+	if ($delete_mic) {
+		$lei->{net}->imap_delete_1($url, $uid, $delete_mic);
+	} else {
+		_augment($eml, $lei);
+	}
 }
 
 sub _do_augment_imap {
 	my ($self, $lei) = @_;
 	my $net = $lei->{net};
+	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-augment'};
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
-			$net->imap_each($self->{uri}, \&_augment_imap, $lei);
+			$net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+					$lei, $lse);
 			$dedupe->pause_dedupe;
 		}
+	} elsif ($lse) {
+		my $delete_mic;
+		$net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+					$lei, $lse, \$delete_mic);
+		$delete_mic->expunge if $delete_mic;
 	} elsif (!$self->{-wq_worker_nr}) { # undef or 0
 		# clobber existing IMAP folder
 		$net->imap_delete_all($self->{uri});
@@ -539,6 +549,8 @@ sub do_augment { # slow, runs in wq worker
 # fast (spawn compressor or mkdir), runs in same process as pre_augment
 sub post_augment {
 	my ($self, $lei, @args) = @_;
+	my $wait = $lei->{opt}->{'import-augment'} ?
+			$lei->{sto}->ipc_do('checkpoint', 1) : 0;
 	# _post_augment_mbox
 	my $m = $self->can("_post_augment_$self->{base_type}") or return;
 	$m->($self, $lei, @args);
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 96d3b2ed..f5f71005 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -346,9 +346,14 @@ sub _imap_do_msg ($$$$$) {
 	$$raw =~ s/\r\n/\n/sg;
 	my $kw = [];
 	for my $f (split(/ /, $flags)) {
-		my $k = $IMAPflags2kw{$f} // next; # TODO: X-Label?
-		push @$kw, $k;
+		if (my $k = $IMAPflags2kw{$f}) {
+			push @$kw, $k;
+		} elsif ($f eq "\\Recent") { # not in JMAP
+		} elsif ($self->{verbose}) {
+			warn "# unknown IMAP flag $f <$uri;uid=$uid>\n";
+		}
 	}
+	@$kw = sort @$kw; # for all UI/UX purposes
 	my ($eml_cb, @args) = @{$self->{eml_each}};
 	$eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
 }
diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm
index e26e9815..49ac02a6 100644
--- a/lib/PublicInbox/NetWriter.pm
+++ b/lib/PublicInbox/NetWriter.pm
@@ -13,27 +13,58 @@ my %IMAPkw2flags;
 @IMAPkw2flags{values %PublicInbox::NetReader::IMAPflags2kw} =
 				keys %PublicInbox::NetReader::IMAPflags2kw;
 
+sub kw2flags ($) { join(' ', map { $IMAPkw2flags{$_} } @{$_[0]}) }
+
 sub imap_append {
 	my ($mic, $folder, $bref, $smsg, $eml) = @_;
 	$bref //= \($eml->as_string);
 	$smsg //= bless {}, 'PublicInbox::Smsg';
 	bless($smsg, 'PublicInbox::Smsg') if ref($smsg) eq 'HASH';
 	$smsg->{ts} //= msg_timestamp($eml // PublicInbox::Eml->new($$bref));
-	my @f = map { $IMAPkw2flags{$_} } @{$smsg->{kw}};
-	$mic->append_string($folder, $$bref, "@f", $smsg->internaldate) or
+	my $f = kw2flags($smsg->{kw});
+	$mic->append_string($folder, $$bref, $f, $smsg->internaldate) or
 		die "APPEND $folder: $@";
 }
 
+sub mic_for_folder {
+	my ($self, $uri) = @_;
+	if (!ref($uri)) {
+		my $u = PublicInbox::URIimap->new($uri);
+		$_[1] = $uri = $u;
+	}
+	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
+	$mic->select($uri->mailbox) or return;
+	$mic;
+}
+
 sub imap_delete_all {
 	my ($self, $url) = @_;
-	my $uri = PublicInbox::URIimap->new($url);
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
 	my $sec = $self->can('uri_section')->($uri);
 	local $0 = $uri->mailbox." $sec";
-	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
-	$mic->select($uri->mailbox) or return; # non-existent
 	if ($mic->delete_message('1:*')) {
 		$mic->expunge;
 	}
 }
 
+sub imap_delete_1 {
+	my ($self, $url, $uid, $delete_mic) = @_;
+	$$delete_mic //= mic_for_folder($self, my $uri = $url) or return;
+	$$delete_mic->delete_message($uid);
+}
+
+sub imap_set_kw {
+	my ($self, $url, $uid, $kw) = @_;
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
+	$mic->set_flag(kw2flags($kw), $uid);
+	$mic; # caller must ->expunge
+}
+
+sub imap_unset_kw {
+	my ($self, $url, $uid, $kw) = @_;
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
+	$mic->unset_flag(kw2flags($kw), $uid);
+	$mic; # caller must ->expunge
+}
+
 1;
diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t
index da435926..c24fa993 100644
--- a/xt/net_writer-imap.t
+++ b/xt/net_writer-imap.t
@@ -91,7 +91,7 @@ my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg';
 $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml'));
 $nwr->{quiet} = 1;
 my $imap_slurp_all = sub {
-	my ($u, $uid, $kw, $eml, $res) = @_;
+	my ($url, $uid, $kw, $eml, $res) = @_;
 	push @$res, [ $kw, $eml ];
 };
 $nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []);
@@ -138,10 +138,38 @@ test_lei(sub {
 	$nwr->imap_each($folder_uri, $imap_slurp_all, my $empty = []);
 	is(scalar(@$empty), 0, 'no results w/o augment');
 
-	lei_ok qw(convert -F eml t/msg_iter-order.eml -o), $$folder_uri;
+	my $f = 't/utf8.eml'; # <testmessage@example.com>
+	$exp = eml_load($f);
+	lei_ok qw(convert -F eml -o), $$folder_uri, $f;
+	my (@uid, @res);
+	$nwr->imap_each($folder_uri, sub {
+		my ($u, $uid, $kw, $eml) = @_;
+		push @uid, $uid;
+		push @res, [ $kw, $eml ];
+	});
+	is_deeply(\@res, [ [ [], $exp ] ], 'converted to IMAP destination');
+	is(scalar(@uid), 1, 'got one UID back');
+	lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external);
+	is_deeply(json_utf8->decode($lei_out), [undef],
+		'no results before import');
+
+	lei_ok qw(import -F eml), $f, \'import local copy w/o keywords';
+
+	$nwr->imap_set_kw($folder_uri, $uid[0], [ 'seen' ])->expunge
+		or BAIL_OUT "expunge $@";
+	@res = ();
+	$nwr->imap_each($folder_uri, $imap_slurp_all, \@res);
+	is_deeply(\@res, [ [ ['seen'], $exp ] ], 'seen flag set') or
+		diag explain(\@res);
+
+	lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_uri->as_string,
+		\'clobber folder but import flag';
 	$nwr->imap_each($folder_uri, $imap_slurp_all, $empty = []);
-	is_deeply($empty, [ [ [], eml_load('t/msg_iter-order.eml') ] ],
-		'converted to IMAP destination');
+	is_deeply($empty, [], 'clobbered folder');
+	lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external);
+	$res = json_utf8->decode($lei_out)->[0];
+	is_deeply([@$res{qw(m kw)}], ['<testmessage@example.com>', ['seen']],
+		'kw set');
 });
 
 undef $cleanup; # remove temporary folder

  reply	other threads:[~2021-03-04  9:03 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
2021-03-04  9:03 ` Eric Wong [this message]
2021-03-04  9:03 ` [PATCH 2/6] lei: dclose: do not EPOLL_CTL_DEL w/o event_init Eric Wong
2021-03-04  9:03 ` [PATCH 3/6] lei_xsearch: cleanup {pkt_op_p} on exceptions Eric Wong
2021-03-04  9:03 ` [PATCH 4/6] lei q: --import-augment for mbox and mbox.gz Eric Wong
2021-03-04  9:03 ` [PATCH 5/6] t/lei_to_mail: no need to cat in FIFO test Eric Wong
2021-03-04  9:03 ` [PATCH 6/6] lei q: s/import-augment/import-before/g Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210304090316.9568-2-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).