From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-4.0 required=3.0 tests=ALL_TRUSTED,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 843181FA01 for ; Tue, 9 Feb 2021 08:09:38 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 05/11] lei: split out MdirReader package, lazy-require earlier Date: Tue, 9 Feb 2021 07:09:31 -0100 Message-Id: <20210209080937.4678-6-e@80x24.org> In-Reply-To: <20210209080937.4678-1-e@80x24.org> References: <20210209080937.4678-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: We'll do more requires in the top-level lei-daemon process to save work in workers. We can also work towards aborting on user errors in lei-daemon rather than worker processes. "lei import -f mbox*" is finally tested inside t/lei_to_mail.t --- MANIFEST | 1 + lib/PublicInbox/LeiImport.pm | 25 +++++++++++++++---------- lib/PublicInbox/LeiToMail.pm | 26 ++++++++++---------------- lib/PublicInbox/MdirReader.pm | 21 +++++++++++++++++++++ lib/PublicInbox/TestCommon.pm | 4 +++- t/lei-import.t | 5 ++++- t/lei_to_mail.t | 19 ++++++++++++++++--- 7 files changed, 70 insertions(+), 31 deletions(-) create mode 100644 lib/PublicInbox/MdirReader.pm diff --git a/MANIFEST b/MANIFEST index 7f417743..6b3fc812 100644 --- a/MANIFEST +++ b/MANIFEST @@ -199,6 +199,7 @@ lib/PublicInbox/ManifestJsGz.pm lib/PublicInbox/Mbox.pm lib/PublicInbox/MboxGz.pm lib/PublicInbox/MboxReader.pm +lib/PublicInbox/MdirReader.pm lib/PublicInbox/MiscIdx.pm lib/PublicInbox/MiscSearch.pm lib/PublicInbox/MsgIter.pm diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index a63bfdfd..8358d9d4 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -6,7 +6,6 @@ package PublicInbox::LeiImport; use strict; use v5.10.1; use parent qw(PublicInbox::IPC); -use PublicInbox::MboxReader; use PublicInbox::Eml; use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::PktOp; @@ -37,8 +36,17 @@ sub call { # the main "lei import" method $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; - if (my @f = grep { -f } @argv && !$fmt) { - return $lei->fail("--format unset for regular files:\n@f"); + my @f; + for my $x (@argv) { + if (-f $x) { push @f, $x } + elsif (-d _) { require PublicInbox::MdirReader } + } + (@f && !$fmt) and + return $lei->fail("--format unset for regular file(s):\n@f"); + if (@f && $fmt ne 'eml') { + require PublicInbox::MboxReader; + PublicInbox::MboxReader->can($fmt) or + return $lei->fail( "--format=$fmt unrecognized\n"); } $self->{0} = $lei->{0} if $lei->{opt}->{stdin}; my $ops = { @@ -83,11 +91,9 @@ error reading $x: $! my $eml = PublicInbox::Eml->new(\$buf); _import_eml($eml, $lei->{sto}, $set_kw); - } else { # some mbox - my $cb = PublicInbox::MboxReader->can($fmt); - $cb or return $lei->child_error(1 >> 8, <<""); ---format $fmt unsupported for $x - + } else { # some mbox (->can already checked in call); + my $cb = PublicInbox::MboxReader->can($fmt) // + die "BUG: bad fmt=$fmt"; $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw); } }; @@ -109,8 +115,7 @@ unable to open $x: $! _import_fh($lei, $fh, $x); } elsif (-d _ && (-d "$x/cur" || -d "$x/new")) { - require PublicInbox::LeiToMail; - PublicInbox::LeiToMail::maildir_each_file($x, + PublicInbox::MdirReader::maildir_each_file($x, \&_import_maildir, $lei->{sto}, $lei->{opt}->{kw}); } else { diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index a5a196db..e3e512be 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -18,6 +18,7 @@ use Symbol qw(gensym); use IO::Handle; # ->autoflush use Fcntl qw(SEEK_SET SEEK_END O_CREAT O_EXCL O_WRONLY); use Errno qw(EEXIST ESPIPE ENOENT EPIPE); +my ($maildir_each_file); # struggles with short-lived repos, Gcf2Client makes little sense with lei; # but we may use in-process libgit2 in the future. @@ -266,18 +267,6 @@ sub _mbox_write_cb ($$) { } } -sub maildir_each_file ($$;@) { - my ($dir, $cb, @arg) = @_; - $dir .= '/' unless substr($dir, -1) eq '/'; - for my $d (qw(new/ cur/)) { - my $pfx = $dir.$d; - opendir my $dh, $pfx or next; - while (defined(my $fn = readdir($dh))) { - $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; - } - } -} - sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; @@ -354,11 +343,18 @@ sub new { my $dst = $lei->{ovv}->{dst}; my $self = bless {}, $cls; if ($fmt eq 'maildir') { + $maildir_each_file //= do { + require PublicInbox::MdirReader; + PublicInbox::MdirReader->can('maildir_each_file'); + }; + $lei->{opt}->{augment} and + require PublicInbox::InboxWritable; # eml_from_path $self->{base_type} = 'maildir'; -e $dst && !-d _ and die "$dst exists and is not a directory\n"; $lei->{ovv}->{dst} = $dst .= '/' if substr($dst, -1) ne '/'; } elsif (substr($fmt, 0, 4) eq 'mbox') { + require PublicInbox::MboxReader if $lei->{opt}->{augment}; (-d $dst || (-e _ && !-w _)) and die "$dst exists and is not a writable file\n"; $self->can("eml2$fmt") or die "bad mbox --format=$fmt\n"; @@ -389,12 +385,11 @@ sub _do_augment_maildir { if ($lei->{opt}->{augment}) { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { - require PublicInbox::InboxWritable; # eml_from_path - maildir_each_file($dst, \&_augment_file, $lei); + $maildir_each_file->($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - maildir_each_file($dst, \&_unlink); + $maildir_each_file->($dst, \&_unlink); } } @@ -435,7 +430,6 @@ sub _do_augment_mbox { my $rd = $zsfx ? decompress_src($out, $zsfx, $lei) : dup_src($out); my $fmt = $lei->{ovv}->{fmt}; - require PublicInbox::MboxReader; PublicInbox::MboxReader->$fmt($rd, \&_augment, $lei); } # maybe some systems don't honor O_APPEND, Perl does this: diff --git a/lib/PublicInbox/MdirReader.pm b/lib/PublicInbox/MdirReader.pm new file mode 100644 index 00000000..c6a0e7a8 --- /dev/null +++ b/lib/PublicInbox/MdirReader.pm @@ -0,0 +1,21 @@ +# Copyright (C) 2020-2021 all contributors +# License: AGPL-3.0+ + +# Maildirs for now, MH eventually +package PublicInbox::MdirReader; +use strict; +use v5.10.1; + +sub maildir_each_file ($$;@) { + my ($dir, $cb, @arg) = @_; + $dir .= '/' unless substr($dir, -1) eq '/'; + for my $d (qw(new/ cur/)) { + my $pfx = $dir.$d; + opendir my $dh, $pfx or next; + while (defined(my $fn = readdir($dh))) { + $cb->($pfx.$fn, @arg) if $fn =~ /:2,[A-Za-z]*\z/; + } + } +} + +1; diff --git a/lib/PublicInbox/TestCommon.pm b/lib/PublicInbox/TestCommon.pm index ec9191b6..53f13437 100644 --- a/lib/PublicInbox/TestCommon.pm +++ b/lib/PublicInbox/TestCommon.pm @@ -14,7 +14,7 @@ BEGIN { @EXPORT = qw(tmpdir tcp_server tcp_connect require_git require_mods run_script start_script key2sub xsys xsys_e xqx eml_load tick have_xapian_compact json_utf8 setup_public_inboxes - tcp_host_port test_lei $lei $lei_out $lei_err $lei_opt); + tcp_host_port test_lei lei $lei $lei_out $lei_err $lei_opt); require Test::More; my @methods = grep(!/\W/, @Test::More::EXPORT); eval(join('', map { "*$_=\\&Test::More::$_;" } @methods)); @@ -457,6 +457,8 @@ our $lei = sub { $res; }; +sub lei (@) { $lei->(@_) } + sub json_utf8 () { state $x = ref(PublicInbox::Config->json)->new->utf8->canonical; } diff --git a/t/lei-import.t b/t/lei-import.t index 709d89fa..b691798a 100644 --- a/t/lei-import.t +++ b/t/lei-import.t @@ -3,12 +3,14 @@ # License: AGPL-3.0+ use strict; use v5.10.1; use PublicInbox::TestCommon; test_lei(sub { +ok(!$lei->(qw(import -f bogus), 't/plack-qp.eml'), 'fails with bogus format'); +like($lei_err, qr/\bbogus unrecognized/, 'gave error message'); ok($lei->(qw(q s:boolean)), 'search miss before import'); unlike($lei_out, qr/boolean/i, 'no results, yet'); open my $fh, '<', 't/data/0001.patch' or BAIL_OUT $!; ok($lei->([qw(import -f eml -)], undef, { %$lei_opt, 0 => $fh }), - 'import single file from stdin'); + 'import single file from stdin') or diag $lei_err; close $fh; ok($lei->(qw(q s:boolean)), 'search hit after import'); ok($lei->(qw(import -f eml), 't/data/message_embed.eml'), @@ -35,5 +37,6 @@ $res = json_utf8->decode($lei_out); is($res->[1], undef, 'only one result'); is_deeply($res->[0]->{kw}, [], 'no keywords set'); +# see t/lei_to_mail.t for "import -f mbox*" }); done_testing; diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index a25795ca..77e9902e 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -10,6 +10,7 @@ use Fcntl qw(SEEK_SET); use PublicInbox::Spawn qw(popen_rd which); use List::Util qw(shuffle); require_mods(qw(DBD::SQLite)); +require PublicInbox::MdirReader; require PublicInbox::MboxReader; require PublicInbox::LeiOverview; require PublicInbox::LEI; @@ -127,6 +128,17 @@ my $orig = do { is(do { local $/; <$fh> }, $raw, 'jobs > 1'); $raw; }; + +test_lei(sub { + ok(lei(qw(import -f), $mbox, $fn), 'imported mbox'); + ok(lei(qw(q s:x)), 'lei q works') or diag $lei_err; + my $res = json_utf8->decode($lei_out); + my $x = $res->[0]; + is($x->{'s'}, 'x', 'subject imported') or diag $lei_out; + is_deeply($x->{'kw'}, ['seen'], 'kw imported') or diag $lei_out; + is($res->[1], undef, 'only one result'); +}); + for my $zsfx (qw(gz bz2 xz)) { # XXX should we support zst, zz, lzo, lzma? my $zsfx2cmd = PublicInbox::LeiToMail->can('zsfx2cmd'); SKIP: { @@ -230,6 +242,7 @@ SKIP: { # FIFO support } { # Maildir support + my $each_file = PublicInbox::MdirReader->can('maildir_each_file'); my $md = "$tmpdir/maildir/"; my $wcb = $wcb_get->('maildir', $md); is(ref($wcb), 'CODE', 'got Maildir callback'); @@ -237,7 +250,7 @@ SKIP: { # FIFO support $wcb->(\(my $x = $buf), $b4dc0ffee); my @f; - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); + $each_file->($md, sub { push @f, shift }); open my $fh, $f[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf, 'wrote to Maildir'); @@ -246,7 +259,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\nx\n"), $deadcafe); my @x = (); - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift }); + $each_file->($md, sub { push @x, shift }); is(scalar(@x), 1, 'wrote one new file'); ok(!-f $f[0], 'old file clobbered'); open $fh, $x[0] or BAIL_OUT $!; @@ -257,7 +270,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\ny\n"), $deadcafe); $wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe @f = (); - PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); + $each_file->($md, sub { push @f, shift }); is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there'); my @new = grep(!/\A\Q$x[0]\E\z/, @f); is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');