user/dev discussion of public-inbox itself
 help / color / mirror / code / Atom feed
* [PATCH 0/3] lei input improvements
@ 2021-03-29  7:08 Eric Wong
  2021-03-29  7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29  7:08 UTC (permalink / raw)
  To: meta

These affect the convert, import, mark sub-commands.

Eric Wong (3):
  lei_input: avoid special case sub for --stdin
  lei: use IO::Uncompress::Gunzip MultiStream
  lei_input: treat ".eml" and ".patch" suffix as "eml"

 lib/PublicInbox/LeiConvert.pm |  1 -
 lib/PublicInbox/LeiImport.pm  |  1 -
 lib/PublicInbox/LeiInput.pm   | 33 +++++++++++++++++++--------------
 lib/PublicInbox/LeiMark.pm    |  1 -
 lib/PublicInbox/LeiRemote.pm  |  2 +-
 lib/PublicInbox/LeiXSearch.pm |  2 +-
 t/lei-import.t                |  2 +-
 t/lei-mark.t                  |  4 ++--
 8 files changed, 24 insertions(+), 22 deletions(-)

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH 1/3] lei_input: avoid special case sub for --stdin
  2021-03-29  7:08 [PATCH 0/3] lei input improvements Eric Wong
@ 2021-03-29  7:08 ` Eric Wong
  2021-03-29  7:08 ` [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream Eric Wong
  2021-03-29  7:08 ` [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml" Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29  7:08 UTC (permalink / raw)
  To: meta

We can consistently open /dev/stdin correctly nowadays, so
drop the input_stdin and just use the normal ->path_to_fd
code path.
---
 lib/PublicInbox/LeiConvert.pm | 1 -
 lib/PublicInbox/LeiImport.pm  | 1 -
 lib/PublicInbox/LeiInput.pm   | 8 +-------
 lib/PublicInbox/LeiMark.pm    | 1 -
 4 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/lib/PublicInbox/LeiConvert.pm b/lib/PublicInbox/LeiConvert.pm
index 5d0adb14..da3b50cc 100644
--- a/lib/PublicInbox/LeiConvert.pm
+++ b/lib/PublicInbox/LeiConvert.pm
@@ -34,7 +34,6 @@ sub input_maildir_cb {
 
 sub do_convert { # via wq_do
 	my ($self) = @_;
-	$self->input_stdin;
 	for my $input (@{$self->{inputs}}) {
 		$self->input_path_url($input);
 	}
diff --git a/lib/PublicInbox/LeiImport.pm b/lib/PublicInbox/LeiImport.pm
index 803b5cda..227a2a21 100644
--- a/lib/PublicInbox/LeiImport.pm
+++ b/lib/PublicInbox/LeiImport.pm
@@ -78,7 +78,6 @@ sub lei_import { # the main "lei import" method
 	$self->{-wq_nr_workers} = $j // 1; # locked
 	my ($op_c, undef) = $lei->workers_start($self, 'lei_import', $j, $ops);
 	$lei->{imp} = $self;
-	$self->wq_io_do('input_stdin', []) if $self->{0};
 	net_merge_complete($self) unless $lei->{auth};
 	$op_c->op_wait_event($ops);
 }
diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index d916249a..93284e8b 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -46,12 +46,6 @@ error reading $name: $!
 	}
 }
 
-sub input_stdin {
-	my ($self) = @_;
-	my $in = delete $self->{0} or return;
-	$self->input_fh($self->{lei}->{opt}->{'in-format'}, $in, '<stdin>');
-}
-
 sub input_path_url {
 	my ($self, $input, @args) = @_;
 	my $lei = $self->{lei};
@@ -94,7 +88,7 @@ sub prepare_inputs { # returns undef on error
 		@$inputs and return
 			$lei->fail("--stdin and @$inputs do not mix");
 		check_input_format($lei) or return;
-		$self->{0} = $lei->{0};
+		push @$inputs, '/dev/stdin';
 	}
 	my $net = $lei->{net}; # NetWriter may be created by l2m
 	my $fmt = $lei->{opt}->{'in-format'};
diff --git a/lib/PublicInbox/LeiMark.pm b/lib/PublicInbox/LeiMark.pm
index 6e611318..b187d6e7 100644
--- a/lib/PublicInbox/LeiMark.pm
+++ b/lib/PublicInbox/LeiMark.pm
@@ -118,7 +118,6 @@ sub lei_mark { # the "lei mark" method
 	$self->{vmd_mod} = $vmd_mod;
 	my ($op_c, undef) = $lei->workers_start($self, 'lei_mark', 1, $ops);
 	$lei->{mark} = $self;
-	$self->wq_io_do('input_stdin', []) if $self->{0};
 	net_merge_complete($self) unless $lei->{auth};
 	$op_c->op_wait_event($ops);
 }

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream
  2021-03-29  7:08 [PATCH 0/3] lei input improvements Eric Wong
  2021-03-29  7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
@ 2021-03-29  7:08 ` Eric Wong
  2021-03-29  7:08 ` [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml" Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29  7:08 UTC (permalink / raw)
  To: meta

This is compatible with default gunzip(1) behavior and
future-proofs us against potential changes in PublicInbox::WWW
to save memory on public-inbox-httpd instances.
---
 lib/PublicInbox/LeiRemote.pm  | 2 +-
 lib/PublicInbox/LeiXSearch.pm | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/PublicInbox/LeiRemote.pm b/lib/PublicInbox/LeiRemote.pm
index 399fc936..945d9990 100644
--- a/lib/PublicInbox/LeiRemote.pm
+++ b/lib/PublicInbox/LeiRemote.pm
@@ -50,7 +50,7 @@ sub mset {
 	my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
 	my $reap = PublicInbox::OnDestroy->new($lei->can('sigint_reap'), $pid);
 	$self->{smsg} = [];
-	$fh = IO::Uncompress::Gunzip->new($fh);
+	$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
 	PublicInbox::MboxReader->mboxrd($fh, \&_each_mboxrd_eml, $self);
 	my $err = waitpid($pid, 0) == $pid ? undef
 					: "BUG: waitpid($cmd): $!";
diff --git a/lib/PublicInbox/LeiXSearch.pm b/lib/PublicInbox/LeiXSearch.pm
index 1a194f1c..f3b8cc25 100644
--- a/lib/PublicInbox/LeiXSearch.pm
+++ b/lib/PublicInbox/LeiXSearch.pm
@@ -272,7 +272,7 @@ sub query_remote_mboxrd {
 		$lei->qerr("# $cmd");
 		my ($fh, $pid) = popen_rd($cmd, undef, $rdr);
 		$reap_curl = PublicInbox::OnDestroy->new($sigint_reap, $pid);
-		$fh = IO::Uncompress::Gunzip->new($fh);
+		$fh = IO::Uncompress::Gunzip->new($fh, MultiStream => 1);
 		PublicInbox::MboxReader->mboxrd($fh, \&each_remote_eml, $self,
 						$lei, $each_smsg);
 		my $err = waitpid($pid, 0) == $pid ? undef

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml"
  2021-03-29  7:08 [PATCH 0/3] lei input improvements Eric Wong
  2021-03-29  7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
  2021-03-29  7:08 ` [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream Eric Wong
@ 2021-03-29  7:08 ` Eric Wong
  2 siblings, 0 replies; 4+ messages in thread
From: Eric Wong @ 2021-03-29  7:08 UTC (permalink / raw)
  To: meta

".eml" is a suffix supported by (/usr/local)/etc/mime.types
on Debian and FreeBSD systems using the "mime-support" package.
".patch" is what "git format-patch" generates by default since
git v1.5.0 in 2007.
---
 lib/PublicInbox/LeiInput.pm | 25 ++++++++++++++++++-------
 t/lei-import.t              |  2 +-
 t/lei-mark.t                |  4 ++--
 3 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/lib/PublicInbox/LeiInput.pm b/lib/PublicInbox/LeiInput.pm
index 93284e8b..c04fc2f8 100644
--- a/lib/PublicInbox/LeiInput.pm
+++ b/lib/PublicInbox/LeiInput.pm
@@ -14,9 +14,9 @@ sub check_input_format ($;$) {
 		my $err = $files ? "regular file(s):\n@$files" : '--stdin';
 		return $lei->fail("--$opt_key unset for $err");
 	}
+	return 1 if $fmt eq 'eml';
 	require PublicInbox::MboxLock if $files;
 	require PublicInbox::MboxReader;
-	return 1 if $fmt eq 'eml';
 	# XXX: should this handle {gz,bz2,xz}? that's currently in LeiToMail
 	PublicInbox::MboxReader->reads($fmt) or
 		return $lei->fail("--$opt_key=$fmt unrecognized");
@@ -28,7 +28,6 @@ sub check_input_format ($;$) {
 sub input_fh {
 	my ($self, $ifmt, $fh, $name, @args) = @_;
 	if ($ifmt eq 'eml') {
-		require PublicInbox::Eml;
 		my $buf = do { local $/; <$fh> } //
 			return $self->{lei}->child_error(1 << 8, <<"");
 error reading $name: $!
@@ -60,13 +59,21 @@ sub input_path_url {
 					$self, @args);
 		return;
 	}
-	$input =~ s!\A([a-z0-9]+):!!i and $ifmt = lc($1);
+	if ($input =~ s!\A([a-z0-9]+):!!i) {
+		$ifmt = lc($1);
+	} elsif ($input =~ /\.(?:patch|eml)\z/i) {
+		$ifmt = 'eml';
+	}
 	my $devfd = $lei->path_to_fd($input) // return;
 	if ($devfd >= 0) {
 		$self->input_fh($ifmt, $lei->{$devfd}, $input, @args);
-	} elsif (-f $input) {
-		my $m = $lei->{opt}->{'lock'} // ($ifmt eq 'eml' ? ['none'] :
-				PublicInbox::MboxLock->defaults);
+	} elsif (-f $input && $ifmt eq 'eml') {
+		open my $fh, '<', $input or
+					return $lei->fail("open($input): $!");
+		$self->input_fh($ifmt, $fh, $input, @args);
+	} elsif (-f _) {
+		my $m = $lei->{opt}->{'lock'} //
+			PublicInbox::MboxLock->defaults;
 		my $mbl = PublicInbox::MboxLock->acq($input, 0, $m);
 		$self->input_fh($ifmt, $mbl->{fh}, $input, @args);
 	} elsif (-d _ && (-d "$input/cur" || -d "$input/new")) {
@@ -91,7 +98,6 @@ sub prepare_inputs { # returns undef on error
 		push @$inputs, '/dev/stdin';
 	}
 	my $net = $lei->{net}; # NetWriter may be created by l2m
-	my $fmt = $lei->{opt}->{'in-format'};
 	my (@f, @d);
 	# e.g. Maildir:/home/user/Mail/ or imaps://example.com/INBOX
 	for my $input (@$inputs) {
@@ -120,6 +126,11 @@ sub prepare_inputs { # returns undef on error
 			} else {
 				return $lei->fail("Unable to handle $input");
 			}
+		} elsif ($input =~ /\.(eml|patch)\z/i && -f $input) {
+			lc($in_fmt//'eml') eq 'eml' or return $lei->fail(<<"");
+$input is `eml', not --in-format=$in_fmt
+
+			require PublicInbox::Eml;
 		} else {
 			my $devfd = $lei->path_to_fd($input) // return;
 			if ($devfd >= 0 || -f $input || -p _) {
diff --git a/t/lei-import.t b/t/lei-import.t
index 33ce490d..99289748 100644
--- a/t/lei-import.t
+++ b/t/lei-import.t
@@ -4,7 +4,7 @@
 use strict; use v5.10.1; use PublicInbox::TestCommon;
 test_lei(sub {
 ok(!lei(qw(import -F bogus), 't/plack-qp.eml'), 'fails with bogus format');
-like($lei_err, qr/\bbogus unrecognized/, 'gave error message');
+like($lei_err, qr/\bis `eml', not --in-format/, 'gave error message');
 
 lei_ok(qw(q s:boolean), \'search miss before import');
 unlike($lei_out, qr/boolean/i, 'no results, yet');
diff --git a/t/lei-mark.t b/t/lei-mark.t
index 7855839e..98652c85 100644
--- a/t/lei-mark.t
+++ b/t/lei-mark.t
@@ -26,8 +26,8 @@ my $check_kw = sub {
 
 test_lei(sub {
 	lei_ok(qw(ls-label)); is($lei_out, '', 'no labels, yet');
-	lei_ok(qw(import -F eml t/utf8.eml));
-	lei_ok(qw(mark -F eml t/utf8.eml +kw:flagged +L:urgent));
+	lei_ok(qw(import t/utf8.eml));
+	lei_ok(qw(mark t/utf8.eml +kw:flagged +L:urgent));
 	$check_kw->(['flagged'], L => ['urgent']);
 	lei_ok(qw(ls-label)); is($lei_out, "urgent\n", 'label found');
 	ok(!lei(qw(mark -F eml t/utf8.eml +kw:seeen)), 'bad kw rejected');

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-03-29  7:08 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-29  7:08 [PATCH 0/3] lei input improvements Eric Wong
2021-03-29  7:08 ` [PATCH 1/3] lei_input: avoid special case sub for --stdin Eric Wong
2021-03-29  7:08 ` [PATCH 2/3] lei: use IO::Uncompress::Gunzip MultiStream Eric Wong
2021-03-29  7:08 ` [PATCH 3/3] lei_input: treat ".eml" and ".patch" suffix as "eml" Eric Wong

Code repositories for project(s) associated with this public inbox

	https://80x24.org/public-inbox.git

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).