user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
From: Eric Wong <e@80x24.org>
To: meta@public-inbox.org
Subject: [PATCH 3/8] lei: share input code between convert and import
Date: Mon, 22 Mar 2021 07:53:57 +0000	[thread overview]
Message-ID: <20210322075402.27834-4-e@80x24.org> (raw)
In-Reply-To: <20210322075402.27834-1-e@80x24.org>

These commands accept mail the same way, and this forces
us to maintain consistent input format support between
commands.

We'll be using this for "lei mark", too.
---
 MANIFEST                      |  1 +
 lib/PublicInbox/LEI.pm        | 17 -------
 lib/PublicInbox/LeiConvert.pm | 60 +++----------------------
 lib/PublicInbox/LeiImport.pm  | 57 ++---------------------
 lib/PublicInbox/LeiInput.pm   | 85 +++++++++++++++++++++++++++++++++++
 5 files changed, 94 insertions(+), 126 deletions(-)
 create mode 100644 lib/PublicInbox/LeiInput.pm

diff --git a/MANIFEST b/MANIFEST
index b6b4a3ab..df8440ef 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -187,6 +187,7 @@ lib/PublicInbox/LeiDedupe.pm
 lib/PublicInbox/LeiExternal.pm
 lib/PublicInbox/LeiHelp.pm
 lib/PublicInbox/LeiImport.pm
+lib/PublicInbox/LeiInput.pm
 lib/PublicInbox/LeiMirror.pm
 lib/PublicInbox/LeiOverview.pm
 lib/PublicInbox/LeiP2q.pm
diff --git a/lib/PublicInbox/LEI.pm b/lib/PublicInbox/LEI.pm
index 9e3bb9b7..0bd52a46 100644
--- a/lib/PublicInbox/LEI.pm
+++ b/lib/PublicInbox/LEI.pm
@@ -419,23 +419,6 @@ sub fail ($$;$) {
 	undef;
 }
 
-sub check_input_format ($;$) {
-	my ($self, $files) = @_;
-	my $opt_key = 'in-format';
-	my $fmt = $self->{opt}->{$opt_key};
-	if (!$fmt) {
-		my $err = $files ? "regular file(s):\n@$files" : '--stdin';
-		return fail($self, "--$opt_key unset for $err");
-	}
-	require PublicInbox::MboxLock if $files;
-	require PublicInbox::MboxReader;
-	return 1 if $fmt eq 'eml';
-	# XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
-	PublicInbox::MboxReader->can($fmt) or
-		return fail($self, "--$opt_key=$fmt unrecognized");
-	1;
-}
-
 sub out ($;@) {
 	my $self = shift;
 	return if print { $self->{1} // return } @_; # likely
diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 8d3b221a..0aa13229 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -5,7 +5,7 @@
 package PublicInbox::LeiConvert;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use PublicInbox::Eml;
 use PublicInbox::LeiStore;
 use PublicInbox::LeiOverview;
@@ -79,64 +79,14 @@ sub do_convert { # via wq_do
 
 sub lei_convert { # the main "lei convert" method
 	my ($lei, @inputs) = @_;
-	my $opt = $lei->{opt};
-	$opt->{kw} //= 1;
+	$lei->{opt}->{kw} //= 1;
+	$lei->{opt}->{dedupe} //= 'none';
 	my $self = $lei->{cnv} = bless {}, __PACKAGE__;
-	my $in_fmt = $opt->{'in-format'};
-	my (@f, @d);
-	$opt->{dedupe} //= 'none';
 	my $ovv = PublicInbox::LeiOverview->new($lei, 'out-format');
 	$lei->{l2m} or return
 		$lei->fail("output not specified or is not a mail destination");
-	my $net = $lei->{net}; # NetWriter may be created by l2m
-	$opt->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
-	if ($opt->{stdin}) {
-		@inputs and return $lei->fail("--stdin and @inputs do not mix");
-		$lei->check_input_format(undef) or return;
-		$self->{0} = $lei->{0};
-	}
-	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
-	for my $input (@inputs) {
-		my $input_path = $input;
-		if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
-			require PublicInbox::NetReader;
-			$net //= PublicInbox::NetReader->new;
-			$net->add_url($input);
-		} elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
-			my $ifmt = lc $1;
-			if (($in_fmt // $ifmt) ne $ifmt) {
-				return $lei->fail(<<"");
---in-format=$in_fmt and `$ifmt:' conflict
-
-			}
-			if (-f $input_path) {
-				require PublicInbox::MboxLock;
-				require PublicInbox::MboxReader;
-				PublicInbox::MboxReader->can($ifmt) or return
-					$lei->fail("$ifmt not supported");
-			} elsif (-d _) {
-				require PublicInbox::MdirReader;
-				$ifmt eq 'maildir' or return
-					$lei->fail("$ifmt not supported");
-			} else {
-				return $lei->fail("Unable to handle $input");
-			}
-		} elsif (-f $input) { push @f, $input }
-		elsif (-d _) { push @d, $input }
-		else { return $lei->fail("Unable to handle $input") }
-	}
-	if (@f) { $lei->check_input_format(\@f) or return }
-	if (@d) { # TODO: check for MH vs Maildir, here
-		require PublicInbox::MdirReader;
-	}
-	$self->{inputs} = \@inputs;
-	if ($net) {
-		if (my $err = $net->errors) {
-			return $lei->fail($err);
-		}
-		$net->{quiet} = $opt->{quiet};
-		$lei->{net} //= $net;
-	}
+	$lei->{opt}->{augment} = 1 unless $ovv->{dst} eq '/dev/stdout';
+	$self->prepare_inputs($lei, \@inputs) or return;
 	my $op = $lei->workers_start($self, 'lei_convert', 1, {
 		'' => [ $lei->can('dclose'), $lei ]
 	});
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 0e2a96e8..e769fba8 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -5,7 +5,7 @@
 package PublicInbox::LeiImport;
 use strict;
 use v5.10.1;
-use parent qw(PublicInbox::IPC);
+use parent qw(PublicInbox::IPC PublicInbox::LeiInput);
 use PublicInbox::Eml;
 use PublicInbox::PktOp qw(pkt_do);
 
@@ -67,60 +67,9 @@ sub lei_import { # the main "lei import" method
 	my ($lei, @inputs) = @_;
 	my $sto = $lei->_lei_store(1);
 	$sto->write_prepare($lei);
-	my ($net, @f, @d);
 	$lei->{opt}->{kw} //= 1;
-	my $self = $lei->{imp} = bless { inputs => \@inputs }, __PACKAGE__;
-	if ($lei->{opt}->{stdin}) {
-		@inputs and return $lei->fail("--stdin and @inputs do not mix");
-		$lei->check_input_format or return;
-		$self->{0} = $lei->{0};
-	}
-
-	my $fmt = $lei->{opt}->{'in-format'};
-	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
-	for my $input (@inputs) {
-		my $input_path = $input;
-		if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
-			require PublicInbox::NetReader;
-			$net //= PublicInbox::NetReader->new;
-			$net->add_url($input);
-		} elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
-			my $ifmt = lc $1;
-			if (($fmt // $ifmt) ne $ifmt) {
-				return $lei->fail(<<"");
---in-format=$fmt and `$ifmt:' conflict
-
-			}
-			if (-f $input_path) {
-				require PublicInbox::MboxLock;
-				require PublicInbox::MboxReader;
-				PublicInbox::MboxReader->can($ifmt) or return
-					$lei->fail("$ifmt not supported");
-			} elsif (-d _) {
-				require PublicInbox::MdirReader;
-				$ifmt eq 'maildir' or return
-					$lei->fail("$ifmt not supported");
-			} else {
-				return $lei->fail("Unable to handle $input");
-			}
-		} elsif (-f $input) { push @f, $input
-		} elsif (-d _) { push @d, $input
-		} else { return $lei->fail("Unable to handle $input") }
-	}
-	if (@f) { $lei->check_input_format(\@f) or return }
-	if (@d) { # TODO: check for MH vs Maildir, here
-		require PublicInbox::MdirReader;
-	}
-	$self->{inputs} = \@inputs;
-	if ($net) {
-		if (my $err = $net->errors) {
-			return $lei->fail($err);
-		}
-		$net->{quiet} = $lei->{opt}->{quiet};
-		$lei->{net} = $net;
-		require PublicInbox::LeiAuth;
-		$lei->{auth} = PublicInbox::LeiAuth->new;
-	}
+	my $self = $lei->{imp} = bless {}, __PACKAGE__;
+	$self->prepare_inputs($lei, \@inputs) or return;
 	import_start($lei);
 }
 
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
new file mode 100644
index 00000000..89585a52
--- /dev/null
+++ b/lib/PublicInbox/LeiInput.pm
@@ -0,0 +1,85 @@
+# Copyright (C) 2021 all contributors <meta@public-inbox.org>
+# License: AGPL-3.0+ <https://www.gnu.org/licenses/agpl-3.0.txt>
+
+# parent class for LeiImport, LeiConvert
+package PublicInbox::LeiInput;
+use strict;
+use v5.10.1;
+
+sub check_input_format ($;$) {
+	my ($lei, $files) = @_;
+	my $opt_key = 'in-format';
+	my $fmt = $lei->{opt}->{$opt_key};
+	if (!$fmt) {
+		my $err = $files ? "regular file(s):\n@$files" : '--stdin';
+		return $lei->fail("--$opt_key unset for $err");
+	}
+	require PublicInbox::MboxLock if $files;
+	require PublicInbox::MboxReader;
+	return 1 if $fmt eq 'eml';
+	# XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
+	PublicInbox::MboxReader->can($fmt) or
+		return $lei->fail("--$opt_key=$fmt unrecognized");
+	1;
+}
+
+
+sub prepare_inputs {
+	my ($self, $lei, $inputs) = @_;
+	my $in_fmt = $lei->{opt}->{'in-format'};
+	if ($lei->{opt}->{stdin}) {
+		@$inputs and return
+			$lei->fail("--stdin and @$inputs do not mix");
+		check_input_format($lei) or return;
+		$self->{0} = $lei->{0};
+	}
+	my $net = $lei->{net}; # NetWriter may be created by l2m
+	my $fmt = $lei->{opt}->{'in-format'};
+	my (@f, @d);
+	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
+	for my $input (@$inputs) {
+		my $input_path = $input;
+		if ($input =~ m!\A(?:imaps?|nntps?|s?news)://!i) {
+			require PublicInbox::NetReader;
+			$net //= PublicInbox::NetReader->new;
+			$net->add_url($input);
+		} elsif ($input_path =~ s/\A([a-z0-9]+)://is) {
+			my $ifmt = lc $1;
+			if (($in_fmt // $ifmt) ne $ifmt) {
+				return $lei->fail(<<"");
+--in-format=$in_fmt and `$ifmt:' conflict
+
+			}
+			if (-f $input_path) {
+				require PublicInbox::MboxLock;
+				require PublicInbox::MboxReader;
+				PublicInbox::MboxReader->can($ifmt) or return
+					$lei->fail("$ifmt not supported");
+			} elsif (-d _) {
+				require PublicInbox::MdirReader;
+				$ifmt eq 'maildir' or return
+					$lei->fail("$ifmt not supported");
+			} else {
+				return $lei->fail("Unable to handle $input");
+			}
+		} elsif (-f $input) { push @f, $input }
+		elsif (-d _) { push @d, $input }
+		else { return $lei->fail("Unable to handle $input") }
+	}
+	if (@f) { check_input_format($lei, \@f) or return }
+	if (@d) { # TODO: check for MH vs Maildir, here
+		require PublicInbox::MdirReader;
+	}
+	if ($net) {
+		if (my $err = $net->errors) {
+			return $lei->fail($err);
+		}
+		$net->{quiet} = $lei->{opt}->{quiet};
+		require PublicInbox::LeiAuth;
+		$lei->{auth} //= PublicInbox::LeiAuth->new;
+		$lei->{net} //= $net;
+	}
+	$self->{inputs} = $inputs;
+}
+
+1;

  parent reply	other threads:[~2021-03-22  7:54 UTC|newest]

Thread overview: 9+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-03-22  7:53 [PATCH 0/8] lei input handling improvements Eric Wong
2021-03-22  7:53 ` [PATCH 1/8] lei: support -c <name>=<value> to overrides Eric Wong
2021-03-22  7:53 ` [PATCH 2/8] net_reader: escape nasty chars from Net::NNTP->message Eric Wong
2021-03-22  7:53 ` Eric Wong [this message]
2021-03-22  7:53 ` [PATCH 4/8] lei: simplify workers_start and callers Eric Wong
2021-03-22  7:53 ` [PATCH 5/8] mbox_reader: add ->reads method to avoid nonsensical formats Eric Wong
2021-03-22  7:54 ` [PATCH 6/8] lei_input: common filehandle reader for eml + mbox Eric Wong
2021-03-22  7:54 ` [PATCH 7/8] lei_input: drop "From " line on single "eml" (message/rfc822) Eric Wong
2021-03-22  7:54 ` [PATCH 8/8] lei import: ignore Status headers in "eml" messages Eric Wong

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

  List information: https://public-inbox.org/README

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210322075402.27834-4-e@80x24.org \
    --to=e@80x24.org \
    --cc=meta@public-inbox.org \
    --subject='Re: [PATCH 3/8] lei: share input code between convert and import' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Code repositories for project(s) associated with this inbox:

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).