From 8600c999da4786d2297ebbf44aa0618461c2f3cf Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Mon, 8 Feb 2021 05:06:51 -0100 Subject: lei import: support Maildirs It seems to be working trivially, though I'm probably going to split out Maildir reading into a separate package rather than using LeiToMail. --- lib/PublicInbox/LeiImport.pm | 20 +++++++++++++++++--- lib/PublicInbox/LeiStore.pm | 8 +++++++- lib/PublicInbox/LeiToMail.pm | 11 ++++++----- 3 files changed, 30 insertions(+), 9 deletions(-) (limited to 'lib') diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm index 2b2dc2f7..a63bfdfd 100644 --- a/lib/PublicInbox/LeiImport.pm +++ b/lib/PublicInbox/LeiImport.pm @@ -8,6 +8,8 @@ use v5.10.1; use parent qw(PublicInbox::IPC); use PublicInbox::MboxReader; use PublicInbox::Eml; +use PublicInbox::InboxWritable qw(eml_from_path); +use PublicInbox::PktOp; sub _import_eml { # MboxReader callback my ($eml, $sto, $set_kw) = @_; @@ -35,7 +37,9 @@ sub call { # the main "lei import" method $lei->{opt}->{kw} //= 1; my $fmt = $lei->{opt}->{'format'}; my $self = $lei->{imp} = bless {}, $cls; - return $lei->fail('--format unspecified') if !$fmt; + if (my @f = grep { -f } @argv && !$fmt) { + return $lei->fail("--format unset for regular files:\n@f"); + } $self->{0} = $lei->{0} if $lei->{opt}->{stdin}; my $ops = { '!' => [ $lei->can('fail_handler'), $lei ], @@ -75,14 +79,14 @@ sub _import_fh { if ($fmt eq 'eml') { my $buf = do { local $/; <$fh> } // return $lei->child_error(1 >> 8, <<""); - error reading $x: $! +error reading $x: $! my $eml = PublicInbox::Eml->new(\$buf); _import_eml($eml, $lei->{sto}, $set_kw); } else { # some mbox my $cb = PublicInbox::MboxReader->can($fmt); $cb or return $lei->child_error(1 >> 8, <<""); - --format $fmt unsupported for $x +--format $fmt unsupported for $x $cb->(undef, $fh, \&_import_eml, $lei->{sto}, $set_kw); } @@ -90,6 +94,11 @@ sub _import_fh { $lei->child_error(1 >> 8, ": $@") if $@; } +sub _import_maildir { # maildir_each_file cb + my ($f, $sto, $set_kw) = @_; + $sto->ipc_do('set_eml_from_maildir', $f, $set_kw); +} + sub import_path_url { my ($self, $x) = @_; my $lei = $self->{lei}; @@ -99,6 +108,11 @@ sub import_path_url { unable to open $x: $! _import_fh($lei, $fh, $x); + } elsif (-d _ && (-d "$x/cur" || -d "$x/new")) { + require PublicInbox::LeiToMail; + PublicInbox::LeiToMail::maildir_each_file($x, + \&_import_maildir, + $lei->{sto}, $lei->{opt}->{kw}); } else { $lei->fail("$x unsupported (TODO)"); } diff --git a/lib/PublicInbox/LeiStore.pm b/lib/PublicInbox/LeiStore.pm index 3a215973..546d500b 100644 --- a/lib/PublicInbox/LeiStore.pm +++ b/lib/PublicInbox/LeiStore.pm @@ -12,7 +12,7 @@ use v5.10.1; use parent qw(PublicInbox::Lock PublicInbox::IPC); use PublicInbox::ExtSearchIdx; use PublicInbox::Import; -use PublicInbox::InboxWritable; +use PublicInbox::InboxWritable qw(eml_from_path); use PublicInbox::V2Writable; use PublicInbox::ContentHash qw(content_hash content_digest); use PublicInbox::MID qw(mids mids_in); @@ -224,6 +224,12 @@ sub set_eml { add_eml($self, $eml, @kw) // set_eml_keywords($self, $eml, @kw); } +sub set_eml_from_maildir { + my ($self, $f, $set_kw) = @_; + my $eml = eml_from_path($f) or return; + set_eml($self, $eml, $set_kw ? maildir_keywords($f) : ()); +} + sub done { my ($self) = @_; my $err = ''; diff --git a/lib/PublicInbox/LeiToMail.pm b/lib/PublicInbox/LeiToMail.pm index 3f65e9e9..4c5a5685 100644 --- a/lib/PublicInbox/LeiToMail.pm +++ b/lib/PublicInbox/LeiToMail.pm @@ -266,8 +266,9 @@ sub _mbox_write_cb ($$) { } } -sub _maildir_each_file ($$;@) { +sub maildir_each_file ($$;@) { my ($dir, $cb, @arg) = @_; + $dir .= '/' unless substr($dir, -1) eq '/'; for my $d (qw(new/ cur/)) { my $pfx = $dir.$d; opendir my $dh, $pfx or next; @@ -277,13 +278,13 @@ sub _maildir_each_file ($$;@) { } } -sub _augment_file { # _maildir_each_file cb +sub _augment_file { # maildir_each_file cb my ($f, $lei) = @_; my $eml = PublicInbox::InboxWritable::eml_from_path($f) or return; _augment($eml, $lei); } -# _maildir_each_file callback, \&CORE::unlink doesn't work with it +# maildir_each_file callback, \&CORE::unlink doesn't work with it sub _unlink { unlink($_[0]) } sub _rand () { @@ -379,11 +380,11 @@ sub _do_augment_maildir { my $dedupe = $lei->{dedupe}; if ($dedupe && $dedupe->prepare_dedupe) { require PublicInbox::InboxWritable; # eml_from_path - _maildir_each_file($dst, \&_augment_file, $lei); + maildir_each_file($dst, \&_augment_file, $lei); $dedupe->pause_dedupe; } } else { # clobber existing Maildir - _maildir_each_file($dst, \&_unlink); + maildir_each_file($dst, \&_unlink); } } -- cgit v1.2.3-24-ge0c7