user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 17/19] lei import: support Maildirs
Date: Sun,  7 Feb 2021 08:51:59 +0000	[thread overview]
Message-ID: <20210207085201.13871-18-e@80x24.org> (raw)
In-Reply-To: <20210207085201.13871-1-e@80x24.org>

It seems to be working trivially, though I'm probably
going to split out Maildir reading into a separate
package rather than using LeiToMail.
---
 MANIFEST                     |  1 +
 lib/PublicInbox/LeiImport.pm | 20 +++++++++++++++++---
 lib/PublicInbox/LeiStore.pm  |  8 +++++++-
 lib/PublicInbox/LeiToMail.pm | 11 ++++++-----
 t/lei-import-maildir.t       | 33 +++++++++++++++++++++++++++++++++
 t/lei_to_mail.t              |  6 +++---
 6 files changed, 67 insertions(+), 12 deletions(-)
 create mode 100644 t/lei-import-maildir.t

diff --git a/MANIFEST b/MANIFEST
index 521f1f68..7f417743 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -359,6 +359,7 @@ t/iso-2202-jp.eml
 t/kqnotify.t
 t/lei-daemon.t
 t/lei-externals.t
+t/lei-import-maildir.t
 t/lei-import.t
 t/lei-mirror.t
 t/lei.t
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 2b2dc2f7..a63bfdfd 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -8,6 +8,8 @@ use v5.10.1;
 use parent qw(PublicInbox::IPC);
 use PublicInbox::MboxReader;
 use PublicInbox::Eml;
+use PublicInbox::InboxWritable qw(eml_from_path);
+use PublicInbox::PktOp;
 
 sub _import_eml { # MboxReader callback
 	my ($eml, $sto, $set_kw) = @_;
@@ -35,7 +37,9 @@ sub call { # the main "lei import" method
 	$lei->{opt}->{kw} //= 1;
 	my $fmt = $lei->{opt}->{'format'};
 	my $self = $lei->{imp} = bless {}, $cls;
-	return $lei->fail('--format unspecified') if !$fmt;
+	if (my @f = grep { -f } @argv && !$fmt) {
+		return $lei->fail("--format unset for regular files:\n@f");
+	}
 	$self->{0} = $lei->{0} if $lei->{opt}->{stdin};
 	my $ops = {
 		'!' => [ $lei->can('fail_handler'), $lei ],
@@ -75,14 +79,14 @@ sub _import_fh {
 		if ($fmt eq 'eml') {
 			my $buf = do { local $/; <$fh> } //
 				return $lei->child_error(1 >> 8, <<"");
-		error reading $x: $!
+error reading $x: $!
 
 			my $eml = PublicInbox::Eml->new(\$buf);
 			_import_eml($eml, $lei->{sto}, $set_kw);
 		} else { # some mbox
 			my $cb = PublicInbox::MboxReader->can($fmt);
 			$cb or return $lei->child_error(1 >> 8, <<"");
-	--format $fmt unsupported for $x
+--format $fmt unsupported for $x
 
 			$cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw);
 		}
@@ -90,6 +94,11 @@ sub _import_fh {
 	$lei->child_error(1 >> 8, "<stdin>: $@") if $@;
 }
 
+sub _import_maildir { # maildir_each_file cb
+	my ($f, $sto, $set_kw) = @_;
+	$sto->ipc_do('set_eml_from_maildir', $f, $set_kw);
+}
+
 sub import_path_url {
 	my ($self, $x) = @_;
 	my $lei = $self->{lei};
@@ -99,6 +108,11 @@ sub import_path_url {
 unable to open $x: $!
 
 		_import_fh($lei, $fh, $x);
+	} elsif (-d _ && (-d "$x/cur" || -d "$x/new")) {
+		require PublicInbox::LeiToMail;
+		PublicInbox::LeiToMail::maildir_each_file($x,
+					\&_import_maildir,
+					$lei->{sto}, $lei->{opt}->{kw});
 	} else {
 		$lei->fail("$x unsupported (TODO)");
 	}
diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm
index 3a215973..546d500b 100644
--- a/lib/PublicInbox/LeiStore.pm
+++ b/lib/PublicInbox/LeiStore.pm
@@ -12,7 +12,7 @@ use v5.10.1;
 use parent qw(PublicInbox::Lock PublicInbox::IPC);
 use PublicInbox::ExtSearchIdx;
 use PublicInbox::Import;
-use PublicInbox::InboxWritable;
+use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::V2Writable;
 use PublicInbox::ContentHash qw(content_hash content_digest);
 use PublicInbox::MID qw(mids mids_in);
@@ -224,6 +224,12 @@ sub set_eml {
 	add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw);
 }
 
+sub set_eml_from_maildir {
+	my ($self, $f, $set_kw) = @_;
+	my $eml = eml_from_path($f) or return;
+	set_eml($self, $eml, $set_kw ? maildir_keywords($f) : ());
+}
+
 sub done {
 	my ($self) = @_;
 	my $err = '';
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index 857aeb63..a5a196db 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -266,8 +266,9 @@ sub _mbox_write_cb ($$) {
 	}
 }
 
-sub _maildir_each_file ($$;@) {
+sub maildir_each_file ($$;@) {
 	my ($dir, $cb, @arg) = @_;
+	$dir .= '/' unless substr($dir, -1) eq '/';
 	for my $d (qw(new/ cur/)) {
 		my $pfx = $dir.$d;
 		opendir my $dh, $pfx or next;
@@ -277,13 +278,13 @@ sub _maildir_each_file ($$;@) {
 	}
 }
 
-sub _augment_file { # _maildir_each_file cb
+sub _augment_file { # maildir_each_file cb
 	my ($f, $lei) = @_;
 	my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return;
 	_augment($eml, $lei);
 }
 
-# _maildir_each_file callback, \&CORE::unlink doesn't work with it
+# maildir_each_file callback, \&CORE::unlink doesn't work with it
 sub _unlink { unlink($_[0]) }
 
 sub _rand () {
@@ -389,11 +390,11 @@ sub _do_augment_maildir {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
 			require PublicInbox::InboxWritable; # eml_from_path
-			_maildir_each_file($dst, \&_augment_file, $lei);
+			maildir_each_file($dst, \&_augment_file, $lei);
 			$dedupe->pause_dedupe;
 		}
 	} else { # clobber existing Maildir
-		_maildir_each_file($dst, \&_unlink);
+		maildir_each_file($dst, \&_unlink);
 	}
 }
 
diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t
new file mode 100644
index 00000000..5842e19e
--- /dev/null
+++ b/t/lei-import-maildir.t
@@ -0,0 +1,33 @@
+#!perl -w
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+use strict; use v5.10.1; use PublicInbox::TestCommon;
+use Cwd qw(abs_path);
+test_lei(sub {
+	my $md = "$ENV{HOME}/md";
+	for ($md, "$md/new", "$md/cur", "$md/tmp") {
+		mkdir($_) or BAIL_OUT("mkdir $_: $!");
+	}
+	symlink(abs_path('t/data/0001.patch'), "$md/cur/x:2,S") or
+		BAIL_OUT "symlink $md $!";
+	ok($lei->(qw(import), $md), 'import Maildir');
+	ok($lei->(qw(q s:boolean)), 'lei q');
+	my $res = json_utf8->decode($lei_out);
+	like($res->[0]->{'s'}, qr/use boolean/, 'got expected result');
+	is_deeply($res->[0]->{kw}, ['seen'], 'keyword set');
+	is($res->[1], undef, 'only got one result');
+
+	ok($lei->(qw(import), $md), 'import Maildir again');
+	ok($lei->(qw(q -d none s:boolean)), 'lei q w/o dedupe');
+	my $r2 = json_utf8->decode($lei_out);
+	is_deeply($r2, $res, 'idempotent import');
+
+	rename("$md/cur/x:2,S", "$md/cur/x:2,SR") or BAIL_OUT "rename: $!";
+	ok($lei->(qw(import), $md), 'import Maildir after +answered');
+	ok($lei->(qw(q -d none s:boolean)), 'lei q after +answered');
+	$res = json_utf8->decode($lei_out);
+	like($res->[0]->{'s'}, qr/use boolean/, 'got expected result');
+	is_deeply($res->[0]->{kw}, ['answered', 'seen'], 'keywords set');
+	is($res->[1], undef, 'only got one result');
+});
+done_testing;
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index f7535687..a25795ca 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -237,7 +237,7 @@ SKIP: { # FIFO support
 	$wcb->(\(my $x = $buf), $b4dc0ffee);
 
 	my @f;
-	PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift });
+	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
 	open my $fh, $f[0] or BAIL_OUT $!;
 	is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
 
@@ -246,7 +246,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\nx\n"), $deadcafe);
 
 	my @x = ();
-	PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @x, shift });
+	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift });
 	is(scalar(@x), 1, 'wrote one new file');
 	ok(!-f $f[0], 'old file clobbered');
 	open $fh, $x[0] or BAIL_OUT $!;
@@ -257,7 +257,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\ny\n"), $deadcafe);
 	$wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
 	@f = ();
-	PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift });
+	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
 	is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
 	my @new = grep(!/\A\Q$x[0]\E\z/, @f);
 	is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');

  parent reply	other threads:[~2021-02-07  8:52 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-07  8:51 [PATCH 00/19] lei import Maildir, remote mboxrd fixes Eric Wong
2021-02-07  8:51 ` [PATCH 01/19] spawn: pi_fork_exec: restore parent sigmask in child Eric Wong
2021-02-07  8:51 ` [PATCH 02/19] spawn: pi_fork_exec: support "pgid" Eric Wong
2021-02-07 23:10   ` dprintf(3) portability? [was [02/19] spawn: pi_fork_exec: support "pgid"] Eric Wong
2021-02-07  8:51 ` [PATCH 03/19] lei add-external: handle interrupts with --mirror Eric Wong
2021-02-07  8:51 ` [PATCH 04/19] spawn_pp: die more consistently in child Eric Wong
2021-02-07  8:51 ` [PATCH 05/19] ipc: do not die inside wq_worker child process Eric Wong
2021-02-07  8:51 ` [PATCH 06/19] ipc: trim down the Storable checks Eric Wong
2021-02-07  8:51 ` [PATCH 07/19] Makefile.PL: depend on IO::Uncompress::Gunzip Eric Wong
2021-02-07  8:51 ` [PATCH 08/19] xapcmd: avoid potential die surprise in children Eric Wong
2021-02-07  8:51 ` [PATCH 09/19] tests: guard setup_public_inboxes for SQLite and Xapian Eric Wong
2021-02-07  8:51 ` [PATCH 10/19] Revert "ipc: add support for asynchronous callbacks" Eric Wong
2021-02-07  8:51 ` [PATCH 11/19] ipc: wq_do => wq_io_do Eric Wong
2021-02-07  8:51 ` [PATCH 12/19] lei: more consistent IPC exit and error handling Eric Wong
2021-02-07  8:51 ` [PATCH 13/19] lei: remove --mua-cmd alias for --mua Eric Wong
2021-02-07  8:51 ` [PATCH 14/19] lei: replace --thread with --threads Eric Wong
2021-02-07  8:51 ` [PATCH 15/19] lei q: improve remote mboxrd UX Eric Wong
2021-02-07  9:32   ` [PATCH 20/19] lei_xsearch: allow quieting regular mset progress, too Eric Wong
2021-02-07  8:51 ` [PATCH 16/19] lei q: SIGWINCH process group with the terminal Eric Wong
2021-02-07  8:51 ` Eric Wong [this message]
2021-02-07  8:52 ` [PATCH 18/19] imap: avoid unnecessary on-stack delete Eric Wong
2021-02-07  8:52 ` [PATCH 19/19] httpd/async: " Eric Wong
2021-02-07 10:40 ` [PATCH 21/19] lei q: fix arbitrary --mua command handling Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210207085201.13871-18-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 17/19] lei import: support Maildirs' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://7fh6tueqddpjyxjmgtdiueylzoqt6pt7hec3pukyptlmohoowvhde4yd.onion/inbox.comp.mail.public-inbox.meta
	nntp://ie5yzdi7fg72h7s4sdcztq5evakq23rdt33mfyfcddc5u3ndnw24ogqd.onion/inbox.comp.mail.public-inbox.meta
	nntp://4uok3hntl7oi7b4uf4rtfwefqeexfzil2w6kgk2jn5z2f764irre7byd.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git