user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier
Date: Tue,  9 Feb 2021 07:09:31 -0100
Message-ID: <20210209080937.4678-6-e@80x24.org> (raw)
In-Reply-To: <20210209080937.4678-1-e@80x24.org>

We'll do more requires in the top-level lei-daemon process to
save work in workers.  We can also work towards aborting on
user errors in lei-daemon rather than worker processes.

"lei import -f mbox*" is finally tested inside t/lei_to_mail.t
---
 MANIFEST                      |  1 +
 lib/PublicInbox/LeiImport.pm  | 25 +++++++++++++++----------
 lib/PublicInbox/LeiToMail.pm  | 26 ++++++++++----------------
 lib/PublicInbox/MdirReader.pm | 21 +++++++++++++++++++++
 lib/PublicInbox/TestCommon.pm |  4 +++-
 t/lei-import.t                |  5 ++++-
 t/lei_to_mail.t               | 19 ++++++++++++++++---
 7 files changed, 70 insertions(+), 31 deletions(-)
 create mode 100644 lib/PublicInbox/MdirReader.pm

diff --git a/MANIFEST b/MANIFEST
index 7f417743..6b3fc812 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -199,6 +199,7 @@ lib/PublicInbox/ManifestJsGz.pm
 lib/PublicInbox/Mbox.pm
 lib/PublicInbox/MboxGz.pm
 lib/PublicInbox/MboxReader.pm
+lib/PublicInbox/MdirReader.pm
 lib/PublicInbox/MiscIdx.pm
 lib/PublicInbox/MiscSearch.pm
 lib/PublicInbox/MsgIter.pm
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index a63bfdfd..8358d9d4 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -6,7 +6,6 @@ package PublicInbox::LeiImport;
 use strict;
 use v5.10.1;
 use parent qw(PublicInbox::IPC);
-use PublicInbox::MboxReader;
 use PublicInbox::Eml;
 use PublicInbox::InboxWritable qw(eml_from_path);
 use PublicInbox::PktOp;
@@ -37,8 +36,17 @@ sub call { # the main "lei import" method
 	$lei->{opt}->{kw} //= 1;
 	my $fmt = $lei->{opt}->{'format'};
 	my $self = $lei->{imp} = bless {}, $cls;
-	if (my @f = grep { -f } @argv && !$fmt) {
-		return $lei->fail("--format unset for regular files:\n@f");
+	my @f;
+	for my $x (@argv) {
+		if (-f $x) { push @f, $x }
+		elsif (-d _) { require PublicInbox::MdirReader }
+	}
+	(@f && !$fmt) and
+		return $lei->fail("--format unset for regular file(s):\n@f");
+	if (@f && $fmt ne 'eml') {
+		require PublicInbox::MboxReader;
+		PublicInbox::MboxReader->can($fmt) or
+			return $lei->fail( "--format=$fmt unrecognized\n");
 	}
 	$self->{0} = $lei->{0} if $lei->{opt}->{stdin};
 	my $ops = {
@@ -83,11 +91,9 @@ error reading $x: $!
 
 			my $eml = PublicInbox::Eml->new(\$buf);
 			_import_eml($eml, $lei->{sto}, $set_kw);
-		} else { # some mbox
-			my $cb = PublicInbox::MboxReader->can($fmt);
-			$cb or return $lei->child_error(1 >> 8, <<"");
---format $fmt unsupported for $x
-
+		} else { # some mbox (->can already checked in call);
+			my $cb = PublicInbox::MboxReader->can($fmt) //
+				die "BUG: bad fmt=$fmt";
 			$cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw);
 		}
 	};
@@ -109,8 +115,7 @@ unable to open $x: $!
 
 		_import_fh($lei, $fh, $x);
 	} elsif (-d _ && (-d "$x/cur" || -d "$x/new")) {
-		require PublicInbox::LeiToMail;
-		PublicInbox::LeiToMail::maildir_each_file($x,
+		PublicInbox::MdirReader::maildir_each_file($x,
 					\&_import_maildir,
 					$lei->{sto}, $lei->{opt}->{kw});
 	} else {
diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm
index a5a196db..e3e512be 100644
--- a/lib/PublicInbox/LeiToMail.pm
+++ b/lib/PublicInbox/LeiToMail.pm
@@ -18,6 +18,7 @@ use Symbol qw(gensym);
 use IO::Handle; # ->autoflush
 use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY);
 use Errno qw(EEXIST ESPIPE ENOENT EPIPE);
+my ($maildir_each_file);
 
 # struggles with short-lived repos, Gcf2Client makes little sense with lei;
 # but we may use in-process libgit2 in the future.
@@ -266,18 +267,6 @@ sub _mbox_write_cb ($$) {
 	}
 }
 
-sub maildir_each_file ($$;@) {
-	my ($dir, $cb, @arg) = @_;
-	$dir .= '/' unless substr($dir, -1) eq '/';
-	for my $d (qw(new/ cur/)) {
-		my $pfx = $dir.$d;
-		opendir my $dh, $pfx or next;
-		while (defined(my $fn = readdir($dh))) {
-			$cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
-		}
-	}
-}
-
 sub _augment_file { # maildir_each_file cb
 	my ($f, $lei) = @_;
 	my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return;
@@ -354,11 +343,18 @@ sub new {
 	my $dst = $lei->{ovv}->{dst};
 	my $self = bless {}, $cls;
 	if ($fmt eq 'maildir') {
+		$maildir_each_file //= do {
+			require PublicInbox::MdirReader;
+			PublicInbox::MdirReader->can('maildir_each_file');
+		};
+		$lei->{opt}->{augment} and
+			require PublicInbox::InboxWritable; # eml_from_path
 		$self->{base_type} = 'maildir';
 		-e $dst && !-d _ and die
 				"$dst exists and is not a directory\n";
 		$lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/';
 	} elsif (substr($fmt, 0, 4) eq 'mbox') {
+		require PublicInbox::MboxReader if $lei->{opt}->{augment};
 		(-d $dst || (-e _ && !-w _)) and die
 			"$dst exists and is not a writable file\n";
 		$self->can("eml2$fmt") or die "bad mbox --format=$fmt\n";
@@ -389,12 +385,11 @@ sub _do_augment_maildir {
 	if ($lei->{opt}->{augment}) {
 		my $dedupe = $lei->{dedupe};
 		if ($dedupe && $dedupe->prepare_dedupe) {
-			require PublicInbox::InboxWritable; # eml_from_path
-			maildir_each_file($dst, \&_augment_file, $lei);
+			$maildir_each_file->($dst, \&_augment_file, $lei);
 			$dedupe->pause_dedupe;
 		}
 	} else { # clobber existing Maildir
-		maildir_each_file($dst, \&_unlink);
+		$maildir_each_file->($dst, \&_unlink);
 	}
 }
 
@@ -435,7 +430,6 @@ sub _do_augment_mbox {
 		my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) :
 				dup_src($out);
 		my $fmt = $lei->{ovv}->{fmt};
-		require PublicInbox::MboxReader;
 		PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei);
 	}
 	# maybe some systems don't honor O_APPEND, Perl does this:
diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm
new file mode 100644
index 00000000..c6a0e7a8
--- /dev/null
+++ b/lib/PublicInbox/MdirReader.pm
@@ -0,0 +1,21 @@
+# Copyright (C) 2020-2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# Maildirs for now, MH eventually
+package PublicInbox::MdirReader;
+use strict;
+use v5.10.1;
+
+sub maildir_each_file ($$;@) {
+	my ($dir, $cb, @arg) = @_;
+	$dir .= '/' unless substr($dir, -1) eq '/';
+	for my $d (qw(new/ cur/)) {
+		my $pfx = $dir.$d;
+		opendir my $dh, $pfx or next;
+		while (defined(my $fn = readdir($dh))) {
+			$cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/;
+		}
+	}
+}
+
+1;
diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm
index ec9191b6..53f13437 100644
--- a/lib/PublicInbox/TestCommon.pm
+++ b/lib/PublicInbox/TestCommon.pm
@@ -14,7 +14,7 @@ BEGIN {
 	@EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods
 		run_script start_script key2sub xsys xsys_e xqx eml_load tick
 		have_xapian_compact json_utf8 setup_public_inboxes
-		tcp_host_port test_lei $lei $lei_out $lei_err $lei_opt);
+		tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt);
 	require Test::More;
 	my @methods = grep(!/\W/, @Test::More::EXPORT);
 	eval(join('', map { "*$_=\\&Test::More::$_;" } @methods));
@@ -457,6 +457,8 @@ our $lei = sub {
 	$res;
 };
 
+sub lei (@) { $lei->(@_) }
+
 sub json_utf8 () {
 	state $x = ref(PublicInbox::Config->json)->new->utf8->canonical;
 }
diff --git a/t/lei-import.t b/t/lei-import.t
index 709d89fa..b691798a 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -3,12 +3,14 @@
 # License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
 use strict; use v5.10.1; use PublicInbox::TestCommon;
 test_lei(sub {
+ok(!$lei->(qw(import -f bogus), 't/plack-qp.eml'), 'fails with bogus format');
+like($lei_err, qr/\bbogus unrecognized/, 'gave error message');
 
 ok($lei->(qw(q s:boolean)), 'search miss before import');
 unlike($lei_out, qr/boolean/i, 'no results, yet');
 open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!;
 ok($lei->([qw(import -f eml -)], undef, { %$lei_opt, 0 => $fh }),
-	'import single file from stdin');
+	'import single file from stdin') or diag $lei_err;
 close $fh;
 ok($lei->(qw(q s:boolean)), 'search hit after import');
 ok($lei->(qw(import -f eml), 't/data/message_embed.eml'),
@@ -35,5 +37,6 @@ $res = json_utf8->decode($lei_out);
 is($res->[1], undef, 'only one result');
 is_deeply($res->[0]->{kw}, [], 'no keywords set');
 
+# see t/lei_to_mail.t for "import -f mbox*"
 });
 done_testing;
diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t
index a25795ca..77e9902e 100644
--- a/t/lei_to_mail.t
+++ b/t/lei_to_mail.t
@@ -10,6 +10,7 @@ use Fcntl qw(SEEK_SET);
 use PublicInbox::Spawn qw(popen_rd which);
 use List::Util qw(shuffle);
 require_mods(qw(DBD::SQLite));
+require PublicInbox::MdirReader;
 require PublicInbox::MboxReader;
 require PublicInbox::LeiOverview;
 require PublicInbox::LEI;
@@ -127,6 +128,17 @@ my $orig = do {
 	is(do { local $/; <$fh> }, $raw, 'jobs > 1');
 	$raw;
 };
+
+test_lei(sub {
+	ok(lei(qw(import -f), $mbox, $fn), 'imported mbox');
+	ok(lei(qw(q s:x)), 'lei q works') or diag $lei_err;
+	my $res = json_utf8->decode($lei_out);
+	my $x = $res->[0];
+	is($x->{'s'}, 'x', 'subject imported') or diag $lei_out;
+	is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out;
+	is($res->[1], undef, 'only one result');
+});
+
 for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma?
 	my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd');
 	SKIP: {
@@ -230,6 +242,7 @@ SKIP: { # FIFO support
 }
 
 { # Maildir support
+	my $each_file = PublicInbox::MdirReader->can('maildir_each_file');
 	my $md = "$tmpdir/maildir/";
 	my $wcb = $wcb_get->('maildir', $md);
 	is(ref($wcb), 'CODE', 'got Maildir callback');
@@ -237,7 +250,7 @@ SKIP: { # FIFO support
 	$wcb->(\(my $x = $buf), $b4dc0ffee);
 
 	my @f;
-	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
+	$each_file->($md, sub { push @f, shift });
 	open my $fh, $f[0] or BAIL_OUT $!;
 	is(do { local $/; <$fh> }, $buf, 'wrote to Maildir');
 
@@ -246,7 +259,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\nx\n"), $deadcafe);
 
 	my @x = ();
-	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift });
+	$each_file->($md, sub { push @x, shift });
 	is(scalar(@x), 1, 'wrote one new file');
 	ok(!-f $f[0], 'old file clobbered');
 	open $fh, $x[0] or BAIL_OUT $!;
@@ -257,7 +270,7 @@ SKIP: { # FIFO support
 	$wcb->(\($x = $buf."\ny\n"), $deadcafe);
 	$wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe
 	@f = ();
-	PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift });
+	$each_file->($md, sub { push @f, shift });
 	is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there');
 	my @new = grep(!/\A\Q$x[0]\E\z/, @f);
 	is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');

  parent reply	other threads:[~2021-02-09  8:09 UTC|newest]

Thread overview: 12+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-09  8:09 [PATCH 00/11] Maildir code consolidation, test updates Eric Wong
2021-02-09  8:09 ` [PATCH 01/11] t/thread-index-gap.t: avoid unnecessary map Eric Wong
2021-02-09  8:09 ` [PATCH 02/11] test_common: disable fsync on the CLI where possible Eric Wong
2021-02-09  8:09 ` [PATCH 03/11] t/cgi.t: modernizations and style updates Eric Wong
2021-02-09  8:09 ` [PATCH 04/11] git: ->qx: respect caller's $/ in array context Eric Wong
2021-02-09  8:09 ` Eric Wong [this message]
2021-02-09  8:09 ` [PATCH 06/11] t/run.perl: fix for >128 tests Eric Wong
2021-02-09  8:09 ` [PATCH 07/11] use MdirReader in -watch and InboxWritable Eric Wong
2021-02-09  8:09 ` [PATCH 08/11] lei q: prefix --alert ops with ':' instead of '-' Eric Wong
2021-02-09  8:09 ` [PATCH 09/11] t/run.perl: drop Cwd dependency Eric Wong
2021-02-09  8:09 ` [PATCH 10/11] lei: replace "I:"-prefixed info messages with "#" Eric Wong
2021-02-09  8:09 ` [PATCH 11/11] tests|lei: fixes for TEST_RUN_MODE=0 and lei oneshot Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210209080937.4678-6-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

user/dev discussion of public-inbox itself

This inbox may be cloned and mirrored by anyone:

	git clone --mirror https://public-inbox.org/meta
	git clone --mirror http://czquwvybam4bgbro.onion/meta
	git clone --mirror http://hjrcffqmbrq6wope.onion/meta
	git clone --mirror http://ou63pmih66umazou.onion/meta

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V1 meta meta/ https://public-inbox.org/meta \
		meta@public-inbox.org
	public-inbox-index meta

Example config snippet for mirrors.
Newsgroups are available over NNTP:
	nntp://news.public-inbox.org/inbox.comp.mail.public-inbox.meta
	nntp://ou63pmih66umazou.onion/inbox.comp.mail.public-inbox.meta
	nntp://czquwvybam4bgbro.onion/inbox.comp.mail.public-inbox.meta
	nntp://hjrcffqmbrq6wope.onion/inbox.comp.mail.public-inbox.meta
	nntp://news.gmane.io/gmane.mail.public-inbox.general
 note: .onion URLs require Tor: https://www.torproject.org/

code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

AGPL code for this site: git clone https://public-inbox.org/public-inbox.git