user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP
@ 2021-03-04  9:03 Eric Wong
  2021-03-04  9:03 ` [PATCH 1/6] lei q: support --import-augment for IMAP Eric Wong
                   ` (5 more replies)
  0 siblings, 6 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

mbox support was the trickiest, and necessitated
PATCH 2/6 and 3/6 in addition to
https://public-inbox.org/meta/20210304012039.26900-1-e@80x24.org/
("ds: import croak properly")

6/6 completes the renaming.

Eric Wong (6):
  lei q: support --import-augment for IMAP
  lei: dclose: do not EPOLL_CTL_DEL w/o event_init
  lei_xsearch: cleanup {pkt_op_p} on exceptions
  lei q: --import-augment for mbox and mbox.gz
  t/lei_to_mail: no need to cat in FIFO test
  lei q: s/import-augment/import-before/g

 lib/PublicInbox/LEI.pm        |   4 +-
 lib/PublicInbox/LeiQuery.pm   |   2 +-
 lib/PublicInbox/LeiToMail.pm  | 115 +++++++++++++++++++++++-----------
 lib/PublicInbox/LeiXSearch.pm |   6 ++
 lib/PublicInbox/NetReader.pm  |   9 ++-
 lib/PublicInbox/NetWriter.pm  |  41 ++++++++++--
 t/lei-q-kw.t                  |  80 +++++++++++++++++++++--
 t/lei_to_mail.t               |   7 ++-
 xt/net_writer-imap.t          |  36 +++++++++--
 9 files changed, 241 insertions(+), 59 deletions(-)

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 1/6] lei q: support --import-augment for IMAP
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  2021-03-04  9:03 ` [PATCH 2/6] lei: dclose: do not EPOLL_CTL_DEL w/o event_init Eric Wong
                   ` (4 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

IMAP is similar to Maildir and we can now preserve keyword
updates done on IMAP folders.
---
 lib/PublicInbox/LeiToMail.pm | 48 ++++++++++++++++++++++--------------
 lib/PublicInbox/NetReader.pm |  9 +++++--
 lib/PublicInbox/NetWriter.pm | 41 ++++++++++++++++++++++++++----
 xt/net_writer-imap.t         | 36 ++++++++++++++++++++++++---
 4 files changed, 105 insertions(+), 29 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 3420b06e..b3228a59 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -267,6 +267,17 @@ sub _mbox_write_cb ($$) {
 	}
 }
 
+sub update_kw_maybe ($$$$) {
+	my ($lei, $lse, $eml, $kw) = @_;
+	return unless $lse;
+	my $x = $lse->kw_changed($eml, $kw);
+	if ($x) {
+		$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
+	} elsif (!defined($x)) {
+		# TODO: xkw
+	}
+}
+
 sub _augment_or_unlink { # maildir_each_eml cb
 	my ($f, $kw, $eml, $lei, $lse, $mod, $shard, $unlink) = @_;
 	if ($mod) {
@@ -276,14 +287,7 @@ sub _augment_or_unlink { # maildir_each_eml cb
 				$1 : sha256_hex($f);
 		my $recno = hex(substr($hex, 0, 8));
 		return if ($recno % $mod) != $shard;
-		if ($lse) {
-			my $x = $lse->kw_changed($eml, $kw);
-			if ($x) {
-				$lei->{sto}->ipc_do('set_eml', $eml, @$kw);
-			} elsif (!defined($x)) {
-				# TODO: xkw
-			}
-		}
+		update_kw_maybe($lei, $lse, $eml, $kw);
 	}
 	$unlink ? unlink($f) : _augment($eml, $lei);
 }
@@ -446,26 +450,32 @@ sub _do_augment_maildir {
 	}
 }
 
-sub _post_augment_maildir {
-	my ($self, $lei) = @_;
-	$lei->{opt}->{'import-augment'} or return;
-	my $wait = $lei->{sto}->ipc_do('checkpoint', 1);
-}
-
-sub _augment_imap { # PublicInbox::NetReader::imap_each cb
-	my ($url, $uid, $kw, $eml, $lei) = @_;
-	_augment($eml, $lei);
+sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb
+	my ($url, $uid, $kw, $eml, $lei, $lse, $delete_mic) = @_;
+	update_kw_maybe($lei, $lse, $eml, $kw);
+	if ($delete_mic) {
+		$lei->{net}->imap_delete_1($url, $uid, $delete_mic);
+	} else {
+		_augment($eml, $lei);
+	}
 }
 
 sub _do_augment_imap {
 	my ($self, $lei) = @_;
 	my $net = $lei->{net};
+	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-augment'};
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
-			$net->imap_each($self->{uri}, \&_augment_imap, $lei);
+			$net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+					$lei, $lse);
 			$dedupe->pause_dedupe;
 		}
+	} elsif ($lse) {
+		my $delete_mic;
+		$net->imap_each($self->{uri}, \&_imap_augment_or_delete,
+					$lei, $lse, \$delete_mic);
+		$delete_mic->expunge if $delete_mic;
 	} elsif (!$self->{-wq_worker_nr}) { # undef or 0
 		# clobber existing IMAP folder
 		$net->imap_delete_all($self->{uri});
@@ -539,6 +549,8 @@ sub do_augment { # slow, runs in wq worker
 # fast (spawn compressor or mkdir), runs in same process as pre_augment
 sub post_augment {
 	my ($self, $lei, @args) = @_;
+	my $wait = $lei->{opt}->{'import-augment'} ?
+			$lei->{sto}->ipc_do('checkpoint', 1) : 0;
 	# _post_augment_mbox
 	my $m = $self->can("_post_augment_$self->{base_type}") or return;
 	$m->($self, $lei, @args);
diff --git a/lib/PublicInbox/NetReader.pm b/lib/PublicInbox/NetReader.pm
index 96d3b2ed..f5f71005 100644
--- a/lib/PublicInbox/NetReader.pm
+++ b/lib/PublicInbox/NetReader.pm
@@ -346,9 +346,14 @@ sub _imap_do_msg ($$$$$) {
 	$$raw =~ s/\r\n/\n/sg;
 	my $kw = [];
 	for my $f (split(/ /, $flags)) {
-		my $k = $IMAPflags2kw{$f} // next; # TODO: X-Label?
-		push @$kw, $k;
+		if (my $k = $IMAPflags2kw{$f}) {
+			push @$kw, $k;
+		} elsif ($f eq "\\Recent") { # not in JMAP
+		} elsif ($self->{verbose}) {
+			warn "# unknown IMAP flag $f <$uri;uid=$uid>\n";
+		}
 	}
+	@$kw = sort @$kw; # for all UI/UX purposes
 	my ($eml_cb, @args) = @{$self->{eml_each}};
 	$eml_cb->($uri, $uid, $kw, PublicInbox::Eml->new($raw), @args);
 }
diff --git a/lib/PublicInbox/NetWriter.pm b/lib/PublicInbox/NetWriter.pm
index e26e9815..49ac02a6 100644
--- a/lib/PublicInbox/NetWriter.pm
+++ b/lib/PublicInbox/NetWriter.pm
@@ -13,27 +13,58 @@ my %IMAPkw2flags;
 @IMAPkw2flags{values %PublicInbox::NetReader::IMAPflags2kw} =
 				keys %PublicInbox::NetReader::IMAPflags2kw;
 
+sub kw2flags ($) { join(' ', map { $IMAPkw2flags{$_} } @{$_[0]}) }
+
 sub imap_append {
 	my ($mic, $folder, $bref, $smsg, $eml) = @_;
 	$bref //= \($eml->as_string);
 	$smsg //= bless {}, 'PublicInbox::Smsg';
 	bless($smsg, 'PublicInbox::Smsg') if ref($smsg) eq 'HASH';
 	$smsg->{ts} //= msg_timestamp($eml // PublicInbox::Eml->new($$bref));
-	my @f = map { $IMAPkw2flags{$_} } @{$smsg->{kw}};
-	$mic->append_string($folder, $$bref, "@f", $smsg->internaldate) or
+	my $f = kw2flags($smsg->{kw});
+	$mic->append_string($folder, $$bref, $f, $smsg->internaldate) or
 		die "APPEND $folder: $@";
 }
 
+sub mic_for_folder {
+	my ($self, $uri) = @_;
+	if (!ref($uri)) {
+		my $u = PublicInbox::URIimap->new($uri);
+		$_[1] = $uri = $u;
+	}
+	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
+	$mic->select($uri->mailbox) or return;
+	$mic;
+}
+
 sub imap_delete_all {
 	my ($self, $url) = @_;
-	my $uri = PublicInbox::URIimap->new($url);
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
 	my $sec = $self->can('uri_section')->($uri);
 	local $0 = $uri->mailbox." $sec";
-	my $mic = $self->mic_get($uri) or die "E: not connected: $@";
-	$mic->select($uri->mailbox) or return; # non-existent
 	if ($mic->delete_message('1:*')) {
 		$mic->expunge;
 	}
 }
 
+sub imap_delete_1 {
+	my ($self, $url, $uid, $delete_mic) = @_;
+	$$delete_mic //= mic_for_folder($self, my $uri = $url) or return;
+	$$delete_mic->delete_message($uid);
+}
+
+sub imap_set_kw {
+	my ($self, $url, $uid, $kw) = @_;
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
+	$mic->set_flag(kw2flags($kw), $uid);
+	$mic; # caller must ->expunge
+}
+
+sub imap_unset_kw {
+	my ($self, $url, $uid, $kw) = @_;
+	my $mic = mic_for_folder($self, my $uri = $url) or return;
+	$mic->unset_flag(kw2flags($kw), $uid);
+	$mic; # caller must ->expunge
+}
+
 1;
diff --git a/xt/net_writer-imap.t b/xt/net_writer-imap.t
index da435926..c24fa993 100644
--- a/xt/net_writer-imap.t
+++ b/xt/net_writer-imap.t
@@ -91,7 +91,7 @@ my $smsg = bless { kw => [ 'seen' ] }, 'PublicInbox::Smsg';
 $imap_append->($mic, $folder, undef, $smsg, eml_load('t/plack-qp.eml'));
 $nwr->{quiet} = 1;
 my $imap_slurp_all = sub {
-	my ($u, $uid, $kw, $eml, $res) = @_;
+	my ($url, $uid, $kw, $eml, $res) = @_;
 	push @$res, [ $kw, $eml ];
 };
 $nwr->imap_each($folder_uri, $imap_slurp_all, my $res = []);
@@ -138,10 +138,38 @@ test_lei(sub {
 	$nwr->imap_each($folder_uri, $imap_slurp_all, my $empty = []);
 	is(scalar(@$empty), 0, 'no results w/o augment');
 
-	lei_ok qw(convert -F eml t/msg_iter-order.eml -o), $$folder_uri;
+	my $f = 't/utf8.eml'; # <testmessage@example.com>
+	$exp = eml_load($f);
+	lei_ok qw(convert -F eml -o), $$folder_uri, $f;
+	my (@uid, @res);
+	$nwr->imap_each($folder_uri, sub {
+		my ($u, $uid, $kw, $eml) = @_;
+		push @uid, $uid;
+		push @res, [ $kw, $eml ];
+	});
+	is_deeply(\@res, [ [ [], $exp ] ], 'converted to IMAP destination');
+	is(scalar(@uid), 1, 'got one UID back');
+	lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external);
+	is_deeply(json_utf8->decode($lei_out), [undef],
+		'no results before import');
+
+	lei_ok qw(import -F eml), $f, \'import local copy w/o keywords';
+
+	$nwr->imap_set_kw($folder_uri, $uid[0], [ 'seen' ])->expunge
+		or BAIL_OUT "expunge $@";
+	@res = ();
+	$nwr->imap_each($folder_uri, $imap_slurp_all, \@res);
+	is_deeply(\@res, [ [ ['seen'], $exp ] ], 'seen flag set') or
+		diag explain(\@res);
+
+	lei_ok qw(q s:thisbetternotgiveanyresult -o), $folder_uri->as_string,
+		\'clobber folder but import flag';
 	$nwr->imap_each($folder_uri, $imap_slurp_all, $empty = []);
-	is_deeply($empty, [ [ [], eml_load('t/msg_iter-order.eml') ] ],
-		'converted to IMAP destination');
+	is_deeply($empty, [], 'clobbered folder');
+	lei_ok qw(q -o /dev/stdout m:testmessage@example.com --no-external);
+	$res = json_utf8->decode($lei_out)->[0];
+	is_deeply([@$res{qw(m kw)}], ['<testmessage@example.com>', ['seen']],
+		'kw set');
 });
 
 undef $cleanup; # remove temporary folder

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 2/6] lei: dclose: do not EPOLL_CTL_DEL w/o event_init
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
  2021-03-04  9:03 ` [PATCH 1/6] lei q: support --import-augment for IMAP Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  2021-03-04  9:03 ` [PATCH 3/6] lei_xsearch: cleanup {pkt_op_p} on exceptions Eric Wong
                   ` (3 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

It's possible we'll hit a die() statement which triggers
lei->dclose, but aren't in the event loop, yet.
---
 lib/PublicInbox/LEI.pm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 1e5b04ca..fdd9f8c8 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -973,7 +973,7 @@ sub dclose {
 	if (my $sto = delete $self->{sto}) {
 		$sto->ipc_do('done');
 	}
-	$self->close if $self->{sock}; # PublicInbox::DS::close
+	$self->close if $self->{-event_init_done}; # PublicInbox::DS::close
 }
 
 # for long-running results

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 3/6] lei_xsearch: cleanup {pkt_op_p} on exceptions
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
  2021-03-04  9:03 ` [PATCH 1/6] lei q: support --import-augment for IMAP Eric Wong
  2021-03-04  9:03 ` [PATCH 2/6] lei: dclose: do not EPOLL_CTL_DEL w/o event_init Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  2021-03-04  9:03 ` [PATCH 4/6] lei q: --import-augment for mbox and mbox.gz Eric Wong
                   ` (2 subsequent siblings)
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

We must ensure pkt_op_p doesn't live beyond the scope of
->do_query in the top-level lei-daemon, otherwise it can leave a
stray socket hanging around in case of exceptions.
---
 lib/PublicInbox/LeiXSearch.pm | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 45815180..059aa284 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -416,6 +416,11 @@ sub ipc_atfork_child {
 	$self->SUPER::ipc_atfork_child;
 }
 
+sub delete_pkt_op { # OnDestroy callback
+	my $unclosed_after_die = delete($_[0])->{pkt_op_p} or return;
+	close $unclosed_after_die;
+}
+
 sub do_query {
 	my ($self, $lei) = @_;
 	my $l2m = $lei->{l2m};
@@ -431,6 +436,7 @@ sub do_query {
 		'incr_start_query' => [ \&incr_start_query, $self, $l2m ],
 	};
 	$lei->{auth}->op_merge($ops, $l2m) if $l2m && $lei->{auth};
+	my $od = PublicInbox::OnDestroy->new($$, \&delete_pkt_op, $lei);
 	($lei->{pkt_op_c}, $lei->{pkt_op_p}) = PublicInbox::PktOp->pair($ops);
 	$lei->{1}->autoflush(1);
 	$lei->start_pager if delete $lei->{need_pager};

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 4/6] lei q: --import-augment for mbox and mbox.gz
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
                   ` (2 preceding siblings ...)
  2021-03-04  9:03 ` [PATCH 3/6] lei_xsearch: cleanup {pkt_op_p} on exceptions Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  2021-03-04  9:03 ` [PATCH 5/6] t/lei_to_mail: no need to cat in FIFO test Eric Wong
  2021-03-04  9:03 ` [PATCH 6/6] lei q: s/import-augment/import-before/g Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

The trickiest output formats we support due to the possibility
of filesystem FIFOS and pipes for <gzip|xz|bzip2>.

This completes another phase of keyword sync support.
---
 lib/PublicInbox/LeiToMail.pm | 65 ++++++++++++++++++++++---------
 t/lei-q-kw.t                 | 74 +++++++++++++++++++++++++++++++++++-
 2 files changed, 119 insertions(+), 20 deletions(-)

diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index b3228a59..6290f35e 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -246,6 +246,13 @@ sub _augment { # MboxReader eml_cb
 	$lei->{dedupe}->is_dup($eml);
 }
 
+sub _mbox_augment_kw_maybe {
+	my ($eml, $lei, $lse, $augment) = @_;
+	my @kw = PublicInbox::LeiStore::mbox_keywords($eml);
+	update_kw_maybe($lei, $lse, $eml, \@kw);
+	_augment($eml, $lei) if $augment;
+}
+
 sub _mbox_write_cb ($$) {
 	my ($self, $lei) = @_;
 	my $ovv = $lei->{ovv};
@@ -391,7 +398,7 @@ sub new {
 				"$dst exists and is not a directory\n";
 		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
 	} elsif (substr($fmt, 0, 4) eq 'mbox') {
-		require PublicInbox::MboxReader if $lei->{opt}->{augment};
+		require PublicInbox::MboxReader;
 		(-d $dst || (-e _ && !-w _)) and die
 			"$dst exists and is not a writable file\n";
 		$self->can("eml2$fmt") or die "bad mbox format: $fmt\n";
@@ -485,8 +492,8 @@ sub _do_augment_imap {
 sub _pre_augment_mbox {
 	my ($self, $lei) = @_;
 	my $dst = $lei->{ovv}->{dst};
+	my $out = $lei->{1};
 	if ($dst ne '/dev/stdout') {
-		my $out;
 		if (-p $dst) {
 			open $out, '>', $dst or die "open($dst): $!";
 		} elsif (-f _ || !-e _) {
@@ -495,36 +502,56 @@ sub _pre_augment_mbox {
 					PublicInbox::MboxLock->defaults;
 			$self->{mbl} = PublicInbox::MboxLock->acq($dst, 1, $m);
 			$out = $self->{mbl}->{fh};
-			if (!$lei->{opt}->{augment} and !truncate($out, 0)) {
-				die "truncate($dst): $!";
-			}
 		}
 		$lei->{old_1} = $lei->{1}; # keep for spawning MUA
-		$lei->{1} = $out;
 	}
 	# Perl does SEEK_END even with O_APPEND :<
-	$self->{seekable} = seek($lei->{1}, 0, SEEK_SET);
+	$self->{seekable} = seek($out, 0, SEEK_SET);
 	if (!$self->{seekable} && $! != ESPIPE && $dst ne '/dev/stdout') {
 		die "seek($dst): $!\n";
 	}
+	if (!$self->{seekable}) {
+		my $ia = $lei->{opt}->{'import-augment'};
+		die "--import-augment specified but $dst is not seekable\n"
+			if $ia && !ref($ia);
+		die "--augment specified but $dst is not seekable\n" if
+			$lei->{opt}->{augment};
+	}
 	state $zsfx_allow = join('|', keys %zsfx2cmd);
-	($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/) or return;
-	pipe(my ($r, $w)) or die "pipe: $!";
-	$lei->{zpipe} = [ $r, $w ];
+	if (($self->{zsfx}) = ($dst =~ /\.($zsfx_allow)\z/)) {
+		pipe(my ($r, $w)) or die "pipe: $!";
+		$lei->{zpipe} = [ $r, $w ];
+	}
+	$lei->{1} = $out;
+	undef;
 }
 
 sub _do_augment_mbox {
 	my ($self, $lei) = @_;
-	return if !$lei->{opt}->{augment};
-	my $dedupe = $lei->{dedupe};
-	my $dst = $lei->{ovv}->{dst};
-	die "cannot augment $dst, not seekable\n" if !$self->{seekable};
+	return unless $self->{seekable};
+	my $opt = $lei->{opt};
 	my $out = $lei->{1};
-	if (-s $out && $dedupe && $dedupe->prepare_dedupe) {
-		my $zsfx = $self->{zsfx};
-		my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) :
-				dup_src($out);
-		my $fmt = $lei->{ovv}->{fmt};
+	my ($fmt, $dst) = @{$lei->{ovv}}{qw(fmt dst)};
+	return unless -s $out;
+	unless ($opt->{augment} || $opt->{'import-augment'}) {
+		truncate($out, 0) or die "truncate($dst): $!";
+		return;
+	}
+	my $zsfx = $self->{zsfx};
+	my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) : dup_src($out);
+	my $dedupe;
+	if ($opt->{augment}) {
+		$dedupe = $lei->{dedupe};
+		$dedupe->prepare_dedupe if $dedupe;
+	}
+	if ($opt->{'import-augment'}) { # the default
+		my $lse = $lei->{sto}->search;
+		PublicInbox::MboxReader->$fmt($rd, \&_mbox_augment_kw_maybe,
+						$lei, $lse, $opt->{augment});
+		if (!$opt->{augment} and !truncate($out, 0)) {
+			die "truncate($dst): $!";
+		}
+	} else { # --augment --no-import-augment
 		PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
 	}
 	# maybe some systems don't honor O_APPEND, Perl does this:
diff --git a/t/lei-q-kw.t b/t/lei-q-kw.t
index 97b2e08f..babe9749 100644
--- a/t/lei-q-kw.t
+++ b/t/lei-q-kw.t
@@ -2,6 +2,12 @@
 # Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict; use v5.10.1; use PublicInbox::TestCommon;
+use POSIX qw(mkfifo);
+use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
+use IO::Uncompress::Gunzip qw(gunzip);
+use IO::Compress::Gzip qw(gzip);
+use PublicInbox::MboxReader;
+use PublicInbox::Spawn qw(popen_rd);
 test_lei(sub {
 lei_ok(qw(import -F eml t/plack-qp.eml));
 my $o = "$ENV{HOME}/dst";
@@ -28,6 +34,72 @@ lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com));
 @fn = glob("$o/cur/*:2,S");
 is(scalar(@fn), 1, "`seen' flag (but not `replied') set on Maildir file");
 
-# TODO: other destination types
+SKIP: {
+	$o = "$ENV{HOME}/fifo";
+	mkfifo($o, 0600) or skip("mkfifo not supported: $!", 1);
+	# cat(1) since lei() may not execve for FD_CLOEXEC to work
+	my $cat = popen_rd(['cat', $o]);
+	ok(!lei(qw(q --import-augment bogus -o), "mboxrd:$o"),
+		'--import-augment fails on non-seekable output');
+	is(do { local $/; <$cat> }, '', 'no output on FIFO');
+};
+
+lei_ok qw(import -F eml t/utf8.eml), \'for augment test';
+my $read_file = sub {
+	if ($_[0] =~ /\.gz\z/) {
+		gunzip($_[0] => \(my $buf = ''), MultiStream => 1) or
+			BAIL_OUT 'gunzip';
+		$buf;
+	} else {
+		open my $fh, '+<', $_[0] or BAIL_OUT $!;
+		do { local $/; <$fh> };
+	}
+};
+
+my $write_file = sub {
+	if ($_[0] =~ /\.gz\z/) {
+		gzip(\($_[1]), $_[0]) or BAIL_OUT 'gzip';
+	} else {
+		open my $fh, '>', $_[0] or BAIL_OUT $!;
+		print $fh $_[1] or BAIL_OUT $!;
+		close $fh or BAIL_OUT;
+	}
+};
+
+my $exp = {
+	'<qp@example.com>' => eml_load('t/plack-qp.eml'),
+	'<testmessage@example.com>' => eml_load('t/utf8.eml'),
+};
+$exp->{'<qp@example.com>'}->header_set('Status', 'OR');
+$exp->{'<testmessage@example.com>'}->header_set('Status', 'O');
+for my $sfx ('', '.gz') {
+	$o = "$ENV{HOME}/dst.mboxrd$sfx";
+	lei_ok(qw(q -o), "mboxrd:$o", qw(m:qp@example.com));
+	my $buf = $read_file->($o);
+	$buf =~ s/^Status: [^\n]*\n//sm or BAIL_OUT "no status in $buf";
+	$write_file->($o, $buf);
+	lei_ok(qw(q -o), "mboxrd:$o", qw(rereadandimportkwchange));
+	$buf = $read_file->($o);
+	is($buf, '', 'emptied');
+	lei_ok(qw(q -o), "mboxrd:$o", qw(m:qp@example.com));
+	$buf = $read_file->($o);
+	$buf =~ s/\nStatus: O\n\n/\nStatus: OR\n\n/s or
+		BAIL_OUT "no Status in $buf";
+	$write_file->($o, $buf);
+	lei_ok(qw(q -a -o), "mboxrd:$o", qw(m:testmessage@example.com));
+	$buf = $read_file->($o);
+	open my $fh, '<', \$buf or BAIL_OUT "PerlIO::scalar $!";
+	my %res;
+	PublicInbox::MboxReader->mboxrd($fh, sub {
+		my ($eml) = @_;
+		$res{$eml->header_raw('Message-ID')} = $eml;
+	});
+	is_deeply(\%res, $exp, '--augment worked');
+
+	lei_ok(qw(q -o), "mboxrd:/dev/stdout", qw(m:qp@example.com)) or
+		diag $lei_err;
+	like($lei_out, qr/^Status: OR\n/sm, 'Status set by previous augment');
+}
+
 });
 done_testing;

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 5/6] t/lei_to_mail: no need to cat in FIFO test
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
                   ` (3 preceding siblings ...)
  2021-03-04  9:03 ` [PATCH 4/6] lei q: --import-augment for mbox and mbox.gz Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  2021-03-04  9:03 ` [PATCH 6/6] lei q: s/import-augment/import-before/g Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

We're not forking, here, so there's no need to rely on FD_CLOEXEC
to resolve deadlock issues.
---
 t/lei_to_mail.t | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index 7898cc48..585db689 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -6,8 +6,8 @@ use v5.10.1;
 use Test::More;
 use PublicInbox::TestCommon;
 use PublicInbox::Eml;
-use Fcntl qw(SEEK_SET);
-use PublicInbox::Spawn qw(popen_rd which);
+use Fcntl qw(SEEK_SET O_RDONLY O_NONBLOCK);
+use PublicInbox::Spawn qw(popen_rd);
 use List::Util qw(shuffle);
 require_mods(qw(DBD::SQLite));
 require PublicInbox::MdirReader;
@@ -242,11 +242,12 @@ SKIP: { # FIFO support
 	use POSIX qw(mkfifo);
 	my $fn = "$tmpdir/fifo";
 	mkfifo($fn, 0600) or skip("mkfifo not supported: $!", 1);
-	my $cat = popen_rd([which('cat'), $fn]);
+	sysopen(my $cat, $fn, O_RDONLY|O_NONBLOCK) or BAIL_OUT $!;
 	my $wcb = $wcb_get->('mboxo', $fn);
 	$wcb->(\(my $x = $buf), $deadbeef);
 	$commit->($wcb);
 	my $cmp = '';
+	$cat->blocking(1);
 	PublicInbox::MboxReader->mboxo($cat, sub { $cmp .= $as_orig->(@_) });
 	is($cmp, $buf, 'message written to FIFO');
 }

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [PATCH 6/6] lei q: s/import-augment/import-before/g
  2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
                   ` (4 preceding siblings ...)
  2021-03-04  9:03 ` [PATCH 5/6] t/lei_to_mail: no need to cat in FIFO test Eric Wong
@ 2021-03-04  9:03 ` Eric Wong
  5 siblings, 0 replies; 7+ messages in thread
From: Eric Wong @ 2021-03-04  9:03 UTC (permalink / raw)
  To: meta

Since this importing of keywords is active even when --augment
isn't specified, calling it --import-before seems more
appropriate.

In the future, this will likely default to adding unseen emails
to lei/store, not just updating keywords.

Link: https://public-inbox.org/meta/20210303222930.GA18597@dcvr/T/
---
 lib/PublicInbox/LEI.pm       |  2 +-
 lib/PublicInbox/LeiQuery.pm  |  2 +-
 lib/PublicInbox/LeiToMail.pm | 16 ++++++++--------
 t/lei-q-kw.t                 | 10 +++++-----
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index fdd9f8c8..50276a50 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -113,7 +113,7 @@ our %CMD = ( # sorted in order of importance/use:
 	qw(save-as=s output|mfolder|o=s format|f=s dedupe|d=s threads|t+
 	sort|s=s reverse|r offset=i remote! local! external! pretty
 	include|I=s@ exclude=s@ only=s@ jobs|j=s globoff|g augment|a
-	import-remote! import-augment! lock=s@
+	import-remote! import-before! lock=s@
 	alert=s@ mua=s no-torsocks torsocks=s verbose|v+ quiet|q C=s@),
 	PublicInbox::LeiQuery::curl_opt(), opt_dash('limit|n=i', '[0-9]+') ],
 
diff --git a/lib/PublicInbox/LeiQuery.pm b/lib/PublicInbox/LeiQuery.pm
index c630d628..493a8382 100644
--- a/lib/PublicInbox/LeiQuery.pm
+++ b/lib/PublicInbox/LeiQuery.pm
@@ -52,7 +52,7 @@ sub lei_q {
 	my $sto = $self->_lei_store(1);
 	my $lse = $sto->search;
 	if (($opt->{'import-remote'} //= 1) |
-			($opt->{'import-augment'} //= 1)) {
+			($opt->{'import-before'} //= 1)) {
 		$sto->write_prepare($self);
 	}
 	if ($opt->{'local'} //= scalar(@only) ? 0 : 1) {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 6290f35e..1e2060fe 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -438,7 +438,7 @@ sub _pre_augment_maildir {
 sub _do_augment_maildir {
 	my ($self, $lei) = @_;
 	my $dst = $lei->{ovv}->{dst};
-	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-augment'};
+	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-before'};
 	my ($mod, $shard) = @{$self->{shard_info} // []};
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
@@ -470,7 +470,7 @@ sub _imap_augment_or_delete { # PublicInbox::NetReader::imap_each cb
 sub _do_augment_imap {
 	my ($self, $lei) = @_;
 	my $net = $lei->{net};
-	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-augment'};
+	my $lse = $lei->{sto}->search if $lei->{opt}->{'import-before'};
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
@@ -511,8 +511,8 @@ sub _pre_augment_mbox {
 		die "seek($dst): $!\n";
 	}
 	if (!$self->{seekable}) {
-		my $ia = $lei->{opt}->{'import-augment'};
-		die "--import-augment specified but $dst is not seekable\n"
+		my $ia = $lei->{opt}->{'import-before'};
+		die "--import-before specified but $dst is not seekable\n"
 			if $ia && !ref($ia);
 		die "--augment specified but $dst is not seekable\n" if
 			$lei->{opt}->{augment};
@@ -533,7 +533,7 @@ sub _do_augment_mbox {
 	my $out = $lei->{1};
 	my ($fmt, $dst) = @{$lei->{ovv}}{qw(fmt dst)};
 	return unless -s $out;
-	unless ($opt->{augment} || $opt->{'import-augment'}) {
+	unless ($opt->{augment} || $opt->{'import-before'}) {
 		truncate($out, 0) or die "truncate($dst): $!";
 		return;
 	}
@@ -544,14 +544,14 @@ sub _do_augment_mbox {
 		$dedupe = $lei->{dedupe};
 		$dedupe->prepare_dedupe if $dedupe;
 	}
-	if ($opt->{'import-augment'}) { # the default
+	if ($opt->{'import-before'}) { # the default
 		my $lse = $lei->{sto}->search;
 		PublicInbox::MboxReader->$fmt($rd, \&_mbox_augment_kw_maybe,
 						$lei, $lse, $opt->{augment});
 		if (!$opt->{augment} and !truncate($out, 0)) {
 			die "truncate($dst): $!";
 		}
-	} else { # --augment --no-import-augment
+	} else { # --augment --no-import-before
 		PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
 	}
 	# maybe some systems don't honor O_APPEND, Perl does this:
@@ -576,7 +576,7 @@ sub do_augment { # slow, runs in wq worker
 # fast (spawn compressor or mkdir), runs in same process as pre_augment
 sub post_augment {
 	my ($self, $lei, @args) = @_;
-	my $wait = $lei->{opt}->{'import-augment'} ?
+	my $wait = $lei->{opt}->{'import-before'} ?
 			$lei->{sto}->ipc_do('checkpoint', 1) : 0;
 	# _post_augment_mbox
 	my $m = $self->can("_post_augment_$self->{base_type}") or return;
diff --git a/t/lei-q-kw.t b/t/lei-q-kw.t
index babe9749..9daeb5b1 100644
--- a/t/lei-q-kw.t
+++ b/t/lei-q-kw.t
@@ -23,13 +23,13 @@ lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com));
 @fn = glob("$o/cur/*:2,S");
 is(scalar(@fn), 1, "`seen' flag set on Maildir file");
 
-# ensure --no-import-augment works
+# ensure --no-import-before works
 my $n = $fn[0];
 $n =~ s/,S\z/,RS/;
 rename($fn[0], $n) or BAIL_OUT "rename $!";
-lei_ok(qw(q --no-import-augment -o), "maildir:$o",
+lei_ok(qw(q --no-import-before -o), "maildir:$o",
 	qw(m:bogus-noresults@example.com));
-ok(!glob("$o/cur/*"), '--no-import-augment cleared destination');
+ok(!glob("$o/cur/*"), '--no-import-before cleared destination');
 lei_ok(qw(q -o), "maildir:$o", qw(m:qp@example.com));
 @fn = glob("$o/cur/*:2,S");
 is(scalar(@fn), 1, "`seen' flag (but not `replied') set on Maildir file");
@@ -39,8 +39,8 @@ SKIP: {
 	mkfifo($o, 0600) or skip("mkfifo not supported: $!", 1);
 	# cat(1) since lei() may not execve for FD_CLOEXEC to work
 	my $cat = popen_rd(['cat', $o]);
-	ok(!lei(qw(q --import-augment bogus -o), "mboxrd:$o"),
-		'--import-augment fails on non-seekable output');
+	ok(!lei(qw(q --import-before bogus -o), "mboxrd:$o"),
+		'--import-before fails on non-seekable output');
 	is(do { local $/; <$cat> }, '', 'no output on FIFO');
 };
 

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-03-04  9:03 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-04  9:03 [PATCH 0/6] lei q --import-augment => --import-before; mbox + IMAP Eric Wong
2021-03-04  9:03 ` [PATCH 1/6] lei q: support --import-augment for IMAP Eric Wong
2021-03-04  9:03 ` [PATCH 2/6] lei: dclose: do not EPOLL_CTL_DEL w/o event_init Eric Wong
2021-03-04  9:03 ` [PATCH 3/6] lei_xsearch: cleanup {pkt_op_p} on exceptions Eric Wong
2021-03-04  9:03 ` [PATCH 4/6] lei q: --import-augment for mbox and mbox.gz Eric Wong
2021-03-04  9:03 ` [PATCH 5/6] t/lei_to_mail: no need to cat in FIFO test Eric Wong
2021-03-04  9:03 ` [PATCH 6/6] lei q: s/import-augment/import-before/g Eric Wong

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git