From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/8] lei: share input code between convert and import
Date: Mon, 22 Mar 2021 07:53:57 +0000 [thread overview]
Message-ID: <20210322075402.27834-4-e@80x24.org> (raw)
In-Reply-To: <20210322075402.27834-1-e@80x24.org>
These commands accept mail the same way, and this forces
us to maintain consistent input format support between
commands.
We'll be using this for "lei mark", too.
---
MANIFEST | 1 +
lib/PublicInbox/LEI.pm | 17 -------
lib/PublicInbox/LeiConvert.pm | 60 +++----------------------
lib/PublicInbox/LeiImport.pm | 57 ++---------------------
lib/PublicInbox/LeiInput.pm | 85 +++++++++++++++++++++++++++++++++++
5 files changed, 94 insertions(+), 126 deletions(-)
create mode 100644 lib/PublicInbox/LeiInput.pm
diff --git a/MANIFEST b/MANIFEST
index b6b4a3ab..df8440ef 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -187,6 +187,7 @@ lib/PublicInbox/LeiDedupe.pm
lib/PublicInbox/LeiExternal.pm
lib/PublicInbox/LeiHelp.pm
lib/PublicInbox/LeiImport.pm
+lib/PublicInbox/LeiInput.pm
lib/PublicInbox/LeiMirror.pm
lib/PublicInbox/LeiOverview.pm
lib/PublicInbox/LeiP2q.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 9e3bb9b7..0bd52a46 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -419,23 +419,6 @@ sub fail ($$;$) {
undef;
}
-sub check_input_format ($;$) {
- my ($self, $files) = @_;
- my $opt_key = 'in-format';
- my $fmt = $self->{opt}->{$opt_key};
- if (!$fmt) {
- my $err = $files ? "regular file(s):\n@$files" : '--stdin';
- return fail($self, "--$opt_key unset for $err");
- }
- require PublicInbox::MboxLock if $files;
- require PublicInbox::MboxReader;
- return 1 if $fmt eq 'eml';
- # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
- PublicInbox::MboxReader->can($fmt) or
- return fail($self, "--$opt_key=$fmt unrecognized");
- 1;
-}
-
sub out ($;@) {
my $self = shift;
return if print { $self->{1} // return } @_; # likely
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8d3b221a..0aa13229 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -5,7 +5,7 @@
package PublicInbox::LeiConvert;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::Eml;
use PublicInbox::LeiStore;
use PublicInbox::LeiOverview;
@@ -79,64 +79,14 @@ sub do_convert { # via wq_do
sub lei_convert { # the main "lei convert" method
my ($lei, @inputs) = @_;
- my $opt = $lei->{opt};
- $opt->{kw} //= 1;
+ $lei->{opt}->{kw} //= 1;
+ $lei->{opt}->{dedupe} //= 'none';
my $self = $lei->{cnv} = bless {}, __PACKAGE__;
- my $in_fmt = $opt->{'in-format'};
- my (@f, @d);
- $opt->{dedupe} //= 'none';
my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
$lei->{l2m} or return
$lei->fail("output not specified or is not a mail destination");
- my $net = $lei->{net}; # NetWriter may be created by l2m
- $opt->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
- if ($opt->{stdin}) {
- @inputs and return $lei->fail("--stdin and @inputs do not mix");
- $lei->check_input_format(undef) or return;
- $self->{0} = $lei->{0};
- }
- # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
- for my $input (@inputs) {
- my $input_path = $input;
- if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
- require PublicInbox::NetReader;
- $net //= PublicInbox::NetReader->new;
- $net->add_url($input);
- } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
- my $ifmt = lc $1;
- if (($in_fmt // $ifmt) ne $ifmt) {
- return $lei->fail(<<"");
---in-format=$in_fmt and `$ifmt:' conflict
-
- }
- if (-f $input_path) {
- require PublicInbox::MboxLock;
- require PublicInbox::MboxReader;
- PublicInbox::MboxReader->can($ifmt) or return
- $lei->fail("$ifmt not supported");
- } elsif (-d _) {
- require PublicInbox::MdirReader;
- $ifmt eq 'maildir' or return
- $lei->fail("$ifmt not supported");
- } else {
- return $lei->fail("Unable to handle $input");
- }
- } elsif (-f $input) { push @f, $input }
- elsif (-d _) { push @d, $input }
- else { return $lei->fail("Unable to handle $input") }
- }
- if (@f) { $lei->check_input_format(\@f) or return }
- if (@d) { # TODO: check for MH vs Maildir, here
- require PublicInbox::MdirReader;
- }
- $self->{inputs} = \@inputs;
- if ($net) {
- if (my $err = $net->errors) {
- return $lei->fail($err);
- }
- $net->{quiet} = $opt->{quiet};
- $lei->{net} //= $net;
- }
+ $lei->{opt}->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
+ $self->prepare_inputs($lei, \@inputs) or return;
my $op = $lei->workers_start($self, 'lei_convert', 1, {
'' => [ $lei->can('dclose'), $lei ]
});
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 0e2a96e8..e769fba8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -5,7 +5,7 @@
package PublicInbox::LeiImport;
use strict;
use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
use PublicInbox::Eml;
use PublicInbox::PktOp qw(pkt_do);
@@ -67,60 +67,9 @@ sub lei_import { # the main "lei import" method
my ($lei, @inputs) = @_;
my $sto = $lei->_lei_store(1);
$sto->write_prepare($lei);
- my ($net, @f, @d);
$lei->{opt}->{kw} //= 1;
- my $self = $lei->{imp} = bless { inputs => \@inputs }, __PACKAGE__;
- if ($lei->{opt}->{stdin}) {
- @inputs and return $lei->fail("--stdin and @inputs do not mix");
- $lei->check_input_format or return;
- $self->{0} = $lei->{0};
- }
-
- my $fmt = $lei->{opt}->{'in-format'};
- # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
- for my $input (@inputs) {
- my $input_path = $input;
- if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
- require PublicInbox::NetReader;
- $net //= PublicInbox::NetReader->new;
- $net->add_url($input);
- } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
- my $ifmt = lc $1;
- if (($fmt // $ifmt) ne $ifmt) {
- return $lei->fail(<<"");
---in-format=$fmt and `$ifmt:' conflict
-
- }
- if (-f $input_path) {
- require PublicInbox::MboxLock;
- require PublicInbox::MboxReader;
- PublicInbox::MboxReader->can($ifmt) or return
- $lei->fail("$ifmt not supported");
- } elsif (-d _) {
- require PublicInbox::MdirReader;
- $ifmt eq 'maildir' or return
- $lei->fail("$ifmt not supported");
- } else {
- return $lei->fail("Unable to handle $input");
- }
- } elsif (-f $input) { push @f, $input
- } elsif (-d _) { push @d, $input
- } else { return $lei->fail("Unable to handle $input") }
- }
- if (@f) { $lei->check_input_format(\@f) or return }
- if (@d) { # TODO: check for MH vs Maildir, here
- require PublicInbox::MdirReader;
- }
- $self->{inputs} = \@inputs;
- if ($net) {
- if (my $err = $net->errors) {
- return $lei->fail($err);
- }
- $net->{quiet} = $lei->{opt}->{quiet};
- $lei->{net} = $net;
- require PublicInbox::LeiAuth;
- $lei->{auth} = PublicInbox::LeiAuth->new;
- }
+ my $self = $lei->{imp} = bless {}, __PACKAGE__;
+ $self->prepare_inputs($lei, \@inputs) or return;
import_start($lei);
}
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
new file mode 100644
index 00000000..89585a52
--- /dev/null
+++ b/lib/PublicInbox/LeiInput.pm
@@ -0,0 +1,85 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# parent class for LeiImport, LeiConvert
+package PublicInbox::LeiInput;
+use strict;
+use v5.10.1;
+
+sub check_input_format ($;$) {
+ my ($lei, $files) = @_;
+ my $opt_key = 'in-format';
+ my $fmt = $lei->{opt}->{$opt_key};
+ if (!$fmt) {
+ my $err = $files ? "regular file(s):\n@$files" : '--stdin';
+ return $lei->fail("--$opt_key unset for $err");
+ }
+ require PublicInbox::MboxLock if $files;
+ require PublicInbox::MboxReader;
+ return 1 if $fmt eq 'eml';
+ # XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
+ PublicInbox::MboxReader->can($fmt) or
+ return $lei->fail("--$opt_key=$fmt unrecognized");
+ 1;
+}
+
+
+sub prepare_inputs {
+ my ($self, $lei, $inputs) = @_;
+ my $in_fmt = $lei->{opt}->{'in-format'};
+ if ($lei->{opt}->{stdin}) {
+ @$inputs and return
+ $lei->fail("--stdin and @$inputs do not mix");
+ check_input_format($lei) or return;
+ $self->{0} = $lei->{0};
+ }
+ my $net = $lei->{net}; # NetWriter may be created by l2m
+ my $fmt = $lei->{opt}->{'in-format'};
+ my (@f, @d);
+ # e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
+ for my $input (@$inputs) {
+ my $input_path = $input;
+ if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
+ require PublicInbox::NetReader;
+ $net //= PublicInbox::NetReader->new;
+ $net->add_url($input);
+ } elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
+ my $ifmt = lc $1;
+ if (($in_fmt // $ifmt) ne $ifmt) {
+ return $lei->fail(<<"");
+--in-format=$in_fmt and `$ifmt:' conflict
+
+ }
+ if (-f $input_path) {
+ require PublicInbox::MboxLock;
+ require PublicInbox::MboxReader;
+ PublicInbox::MboxReader->can($ifmt) or return
+ $lei->fail("$ifmt not supported");
+ } elsif (-d _) {
+ require PublicInbox::MdirReader;
+ $ifmt eq 'maildir' or return
+ $lei->fail("$ifmt not supported");
+ } else {
+ return $lei->fail("Unable to handle $input");
+ }
+ } elsif (-f $input) { push @f, $input }
+ elsif (-d _) { push @d, $input }
+ else { return $lei->fail("Unable to handle $input") }
+ }
+ if (@f) { check_input_format($lei, \@f) or return }
+ if (@d) { # TODO: check for MH vs Maildir, here
+ require PublicInbox::MdirReader;
+ }
+ if ($net) {
+ if (my $err = $net->errors) {
+ return $lei->fail($err);
+ }
+ $net->{quiet} = $lei->{opt}->{quiet};
+ require PublicInbox::LeiAuth;
+ $lei->{auth} //= PublicInbox::LeiAuth->new;
+ $lei->{net} //= $net;
+ }
+ $self->{inputs} = $inputs;
+}
+
+1;
next prev parent reply other threads:[~2021-03-22 7:54 UTC|newest]
Thread overview: 9+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-03-22 7:53 [PATCH 0/8] lei input handling improvements Eric Wong
2021-03-22 7:53 ` [PATCH 1/8] lei: support -c <name>=<value> to overrides Eric Wong
2021-03-22 7:53 ` [PATCH 2/8] net_reader: escape nasty chars from Net::NNTP->message Eric Wong
2021-03-22 7:53 ` Eric Wong [this message]
2021-03-22 7:53 ` [PATCH 4/8] lei: simplify workers_start and callers Eric Wong
2021-03-22 7:53 ` [PATCH 5/8] mbox_reader: add ->reads method to avoid nonsensical formats Eric Wong
2021-03-22 7:54 ` [PATCH 6/8] lei_input: common filehandle reader for eml + mbox Eric Wong
2021-03-22 7:54 ` [PATCH 7/8] lei_input: drop "From " line on single "eml" (message/rfc822) Eric Wong
2021-03-22 7:54 ` [PATCH 8/8] lei import: ignore Status headers in "eml" messages Eric Wong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
List information: https://public-inbox.org/README
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20210322075402.27834-4-e@80x24.org \
--to=e@80x24.org \
--cc=meta@public-inbox.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://80x24.org/public-inbox.git
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).