From 550c69496caa3c61188c645b536ec3c4c3ade70a Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Tue, 9 Feb 2021 07:09:31 -0100 Subject: lei: split out MdirReader package, lazy-require earlier We'll do more requires in the top-level lei-daemon process to save work in workers. We can also work towards aborting on user errors in lei-daemon rather than worker processes. "lei import -f mbox*" is finally tested inside t/lei_to_mail.t --- lib/PublicInbox/LeiImport.pm | 25 +++++++++++++++---------- lib/PublicInbox/LeiToMail.pm | 26 ++++++++++---------------- lib/PublicInbox/MdirReader.pm | 21 +++++++++++++++++++++ lib/PublicInbox/TestCommon.pm | 4 +++- 4 files changed, 49 insertions(+), 27 deletions(-) create mode 100644 lib/PublicInbox/MdirReader.pm (limited to 'lib') diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index a63bfdfd..8358d9d4 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -6,7 +6,6 @@ package PublicInbox::LeiImport; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); -use PublicInbox::MboxReader; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::PktOp; @@ -37,8 +36,17 @@ sub call { # the main "lei import" method $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; - if (my @f = grep { -f } @argv && !$fmt) { - return $lei->fail("--format unset for regular files:\n@f"); + my @f; + for my $x (@argv) { + if (-f $x) { push @f, $x } + elsif (-d _) { require PublicInbox::MdirReader } + } + (@f && !$fmt) and + return $lei->fail("--format unset for regular file(s):\n@f"); + if (@f && $fmt ne 'eml') { + require PublicInbox::MboxReader; + PublicInbox::MboxReader->can($fmt) or + return $lei->fail( "--format=$fmt unrecognized\n"); } $self->{0} = $lei->{0} if $lei->{opt}->{stdin}; my $ops = { @@ -83,11 +91,9 @@ error reading $x: $! my $eml = PublicInbox::Eml->new(\$buf); _import_eml($eml, $lei->{sto}, $set_kw); - } else { # some mbox - my $cb = PublicInbox::MboxReader->can($fmt); - $cb or return $lei->child_error(1 >> 8, <<""); ---format $fmt unsupported for $x - + } else { # some mbox (->can already checked in call); + my $cb = PublicInbox::MboxReader->can($fmt) // + die "BUG: bad fmt=$fmt"; $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw); } }; @@ -109,8 +115,7 @@ unable to open $x: $! _import_fh($lei, $fh, $x); } elsif (-d _ && (-d "$x/cur" || -d "$x/new")) { - require PublicInbox::LeiToMail; - PublicInbox::LeiToMail::maildir_each_file($x, + PublicInbox::MdirReader::maildir_each_file($x, \&_import_maildir, $lei->{sto}, $lei->{opt}->{kw}); } else { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index a5a196db..e3e512be 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -18,6 +18,7 @@ use Symbol qw(gensym); use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use Errno qw(EEXIST ESPIPE ENOENT EPIPE); +my ($maildir_each_file); # struggles with short-lived repos, Gcf2Client makes little sense with lei; # but we may use in-process libgit2 in the future. @@ -266,18 +267,6 @@ sub _mbox_write_cb ($$) { } } -sub maildir_each_file ($$;@) { - my ($dir, $cb, @arg) = @_; - $dir .= '/' unless substr($dir, -1) eq '/'; - for my $d (qw(new/ cur/)) { - my $pfx = $dir.$d; - opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; - } - } -} - sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; @@ -354,11 +343,18 @@ sub new { my $dst = $lei->{ovv}->{dst}; my $self = bless {}, $cls; if ($fmt eq 'maildir') { + $maildir_each_file //= do { + require PublicInbox::MdirReader; + PublicInbox::MdirReader->can('maildir_each_file'); + }; + $lei->{opt}->{augment} and + require PublicInbox::InboxWritable; # eml_from_path $self->{base_type} = 'maildir'; -e $dst && !-d _ and die "$dst exists and is not a directory\n"; $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; } elsif (substr($fmt, 0, 4) eq 'mbox') { + require PublicInbox::MboxReader if $lei->{opt}->{augment}; (-d $dst || (-e _ && !-w _)) and die "$dst exists and is not a writable file\n"; $self->can("eml2$fmt") or die "bad mbox --format=$fmt\n"; @@ -389,12 +385,11 @@ sub _do_augment_maildir { if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { - require PublicInbox::InboxWritable; # eml_from_path - maildir_each_file($dst, \&_augment_file, $lei); + $maildir_each_file->($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - maildir_each_file($dst, \&_unlink); + $maildir_each_file->($dst, \&_unlink); } } @@ -435,7 +430,6 @@ sub _do_augment_mbox { my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) : dup_src($out); my $fmt = $lei->{ovv}->{fmt}; - require PublicInbox::MboxReader; PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei); } # maybe some systems don't honor O_APPEND, Perl does this: diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm new file mode 100644 index 00000000..c6a0e7a8 --- /dev/null +++ b/lib/PublicInbox/MdirReader.pm @@ -0,0 +1,21 @@ +# Copyright (C) 2020-2021 all contributors +# License: AGPL-3.0+ + +# Maildirs for now, MH eventually +package PublicInbox::MdirReader; +use strict; +use v5.10.1; + +sub maildir_each_file ($$;@) { + my ($dir, $cb, @arg) = @_; + $dir .= '/' unless substr($dir, -1) eq '/'; + for my $d (qw(new/ cur/)) { + my $pfx = $dir.$d; + opendir my $dh, $pfx or next; + while (defined(my $fn = readdir($dh))) { + $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + } + } +} + +1; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index ec9191b6..53f13437 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -14,7 +14,7 @@ BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick have_xapian_compact json_utf8 setup_public_inboxes - tcp_host_port test_lei $lei $lei_out $lei_err $lei_opt); + tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt); require Test::More; my @methods = grep(!/\W/, @Test::More::EXPORT); eval(join('', map { "*$_=\\&Test::More::$_;" } @methods)); @@ -457,6 +457,8 @@ our $lei = sub { $res; }; +sub lei (@) { $lei->(@_) } + sub json_utf8 () { state $x = ref(PublicInbox::Config->json)->new->utf8->canonical; } -- cgit v1.2.3-24-ge0c7