From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on dcvr.yhbt.net X-Spam-Level: X-Spam-Status: No, score=-3.9 required=3.0 tests=ALL_TRUSTED,AWL,BAYES_00 shortcircuit=no autolearn=ham autolearn_force=no version=3.4.2 Received: from localhost (dcvr.yhbt.net [127.0.0.1]) by dcvr.yhbt.net (Postfix) with ESMTP id 4C23B1FC9E for ; Sun, 7 Feb 2021 08:52:03 +0000 (UTC) From: Eric Wong To: meta@public-inbox.org Subject: [PATCH 17/19] lei import: support Maildirs Date: Sun, 7 Feb 2021 08:51:59 +0000 Message-Id: <20210207085201.13871-18-e@80x24.org> In-Reply-To: <20210207085201.13871-1-e@80x24.org> References: <20210207085201.13871-1-e@80x24.org> MIME-Version: 1.0 Content-Transfer-Encoding: 8bit List-Id: It seems to be working trivially, though I'm probably going to split out Maildir reading into a separate package rather than using LeiToMail. --- MANIFEST | 1 + lib/PublicInbox/LeiImport.pm | 20 +++++++++++++++++--- lib/PublicInbox/LeiStore.pm | 8 +++++++- lib/PublicInbox/LeiToMail.pm | 11 ++++++----- t/lei-import-maildir.t | 33 +++++++++++++++++++++++++++++++++ t/lei_to_mail.t | 6 +++--- 6 files changed, 67 insertions(+), 12 deletions(-) create mode 100644 t/lei-import-maildir.t diff --git a/MANIFEST b/MANIFEST index 521f1f68..7f417743 100644 --- a/MANIFEST +++ b/MANIFEST @@ -359,6 +359,7 @@ t/iso-2202-jp.eml t/kqnotify.t t/lei-daemon.t t/lei-externals.t +t/lei-import-maildir.t t/lei-import.t t/lei-mirror.t t/lei.t diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 2b2dc2f7..a63bfdfd 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -8,6 +8,8 @@ use v5.10.1; use parent qw(PublicInbox::IPC); use PublicInbox::MboxReader; use PublicInbox::Eml; +use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::PktOp; sub _import_eml { # MboxReader callback my ($eml, $sto, $set_kw) = @_; @@ -35,7 +37,9 @@ sub call { # the main "lei import" method $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; - return $lei->fail('--format unspecified') if !$fmt; + if (my @f = grep { -f } @argv && !$fmt) { + return $lei->fail("--format unset for regular files:\n@f"); + } $self->{0} = $lei->{0} if $lei->{opt}->{stdin}; my $ops = { '!' => [ $lei->can('fail_handler'), $lei ], @@ -75,14 +79,14 @@ sub _import_fh { if ($fmt eq 'eml') { my $buf = do { local $/; <$fh> } // return $lei->child_error(1 >> 8, <<""); - error reading $x: $! +error reading $x: $! my $eml = PublicInbox::Eml->new(\$buf); _import_eml($eml, $lei->{sto}, $set_kw); } else { # some mbox my $cb = PublicInbox::MboxReader->can($fmt); $cb or return $lei->child_error(1 >> 8, <<""); - --format $fmt unsupported for $x +--format $fmt unsupported for $x $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw); } @@ -90,6 +94,11 @@ sub _import_fh { $lei->child_error(1 >> 8, ": $@") if $@; } +sub _import_maildir { # maildir_each_file cb + my ($f, $sto, $set_kw) = @_; + $sto->ipc_do('set_eml_from_maildir', $f, $set_kw); +} + sub import_path_url { my ($self, $x) = @_; my $lei = $self->{lei}; @@ -99,6 +108,11 @@ sub import_path_url { unable to open $x: $! _import_fh($lei, $fh, $x); + } elsif (-d _ && (-d "$x/cur" || -d "$x/new")) { + require PublicInbox::LeiToMail; + PublicInbox::LeiToMail::maildir_each_file($x, + \&_import_maildir, + $lei->{sto}, $lei->{opt}->{kw}); } else { $lei->fail("$x unsupported (TODO)"); } diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 3a215973..546d500b 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -12,7 +12,7 @@ use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; use PublicInbox::Import; -use PublicInbox::InboxWritable; +use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; use PublicInbox::ContentHash qw(content_hash content_digest); use PublicInbox::MID qw(mids mids_in); @@ -224,6 +224,12 @@ sub set_eml { add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw); } +sub set_eml_from_maildir { + my ($self, $f, $set_kw) = @_; + my $eml = eml_from_path($f) or return; + set_eml($self, $eml, $set_kw ? maildir_keywords($f) : ()); +} + sub done { my ($self) = @_; my $err = ''; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 857aeb63..a5a196db 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -266,8 +266,9 @@ sub _mbox_write_cb ($$) { } } -sub _maildir_each_file ($$;@) { +sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; + $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; @@ -277,13 +278,13 @@ sub _maildir_each_file ($$;@) { } } -sub _augment_file { # _maildir_each_file cb +sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; _augment($eml, $lei); } -# _maildir_each_file callback, \&CORE::unlink doesn't work with it +# maildir_each_file callback, \&CORE::unlink doesn't work with it sub _unlink { unlink($_[0]) } sub _rand () { @@ -389,11 +390,11 @@ sub _do_augment_maildir { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { require PublicInbox::InboxWritable; # eml_from_path - _maildir_each_file($dst, \&_augment_file, $lei); + maildir_each_file($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - _maildir_each_file($dst, \&_unlink); + maildir_each_file($dst, \&_unlink); } } diff --git a/t/lei-import-maildir.t b/t/lei-import-maildir.t new file mode 100644 index 00000000..5842e19e --- /dev/null +++ b/t/lei-import-maildir.t @@ -0,0 +1,33 @@ +#!perl -w +# Copyright (C) 2020-2021 all contributors +# License: AGPL-3.0+ +use strict; use v5.10.1; use PublicInbox::TestCommon; +use Cwd qw(abs_path); +test_lei(sub { + my $md = "$ENV{HOME}/md"; + for ($md, "$md/new", "$md/cur", "$md/tmp") { + mkdir($_) or BAIL_OUT("mkdir $_: $!"); + } + symlink(abs_path('t/data/0001.patch'), "$md/cur/x:2,S") or + BAIL_OUT "symlink $md $!"; + ok($lei->(qw(import), $md), 'import Maildir'); + ok($lei->(qw(q s:boolean)), 'lei q'); + my $res = json_utf8->decode($lei_out); + like($res->[0]->{'s'}, qr/use boolean/, 'got expected result'); + is_deeply($res->[0]->{kw}, ['seen'], 'keyword set'); + is($res->[1], undef, 'only got one result'); + + ok($lei->(qw(import), $md), 'import Maildir again'); + ok($lei->(qw(q -d none s:boolean)), 'lei q w/o dedupe'); + my $r2 = json_utf8->decode($lei_out); + is_deeply($r2, $res, 'idempotent import'); + + rename("$md/cur/x:2,S", "$md/cur/x:2,SR") or BAIL_OUT "rename: $!"; + ok($lei->(qw(import), $md), 'import Maildir after +answered'); + ok($lei->(qw(q -d none s:boolean)), 'lei q after +answered'); + $res = json_utf8->decode($lei_out); + like($res->[0]->{'s'}, qr/use boolean/, 'got expected result'); + is_deeply($res->[0]->{kw}, ['answered', 'seen'], 'keywords set'); + is($res->[1], undef, 'only got one result'); +}); +done_testing; diff --git a/t/lei_to_mail.t b/t/lei_to_mail.t index f7535687..a25795ca 100644 --- a/t/lei_to_mail.t +++ b/t/lei_to_mail.t @@ -237,7 +237,7 @@ SKIP: { # FIFO support $wcb->(\(my $x = $buf), $b4dc0ffee); my @f; - PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift }); + PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); open my $fh, $f[0] or BAIL_OUT $!; is(do { local $/; <$fh> }, $buf, 'wrote to Maildir'); @@ -246,7 +246,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\nx\n"), $deadcafe); my @x = (); - PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @x, shift }); + PublicInbox::LeiToMail::maildir_each_file($md, sub { push @x, shift }); is(scalar(@x), 1, 'wrote one new file'); ok(!-f $f[0], 'old file clobbered'); open $fh, $x[0] or BAIL_OUT $!; @@ -257,7 +257,7 @@ SKIP: { # FIFO support $wcb->(\($x = $buf."\ny\n"), $deadcafe); $wcb->(\($x = $buf."\ny\n"), $b4dc0ffee); # skipped by dedupe @f = (); - PublicInbox::LeiToMail::_maildir_each_file($md, sub { push @f, shift }); + PublicInbox::LeiToMail::maildir_each_file($md, sub { push @f, shift }); is(scalar grep(/\A\Q$x[0]\E\z/, @f), 1, 'old file still there'); my @new = grep(!/\A\Q$x[0]\E\z/, @f); is(scalar @new, 1, '1 new file written (b4dc0ffee skipped)');